Merge tag 'kvm-s390-next-5.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorPaolo Bonzini <pbonzini@redhat.com>
Tue, 21 Dec 2021 17:59:53 +0000 (12:59 -0500)
committerPaolo Bonzini <pbonzini@redhat.com>
Tue, 21 Dec 2021 17:59:53 +0000 (12:59 -0500)
KVM: s390: Fix and cleanup

- fix sigp sense/start/stop/inconsistency
- cleanups

744 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/arm64/pointer-authentication.rst
Documentation/cpu-freq/core.rst
Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.yaml
Documentation/filesystems/cifs/ksmbd.rst
Documentation/filesystems/netfs_library.rst
Documentation/i2c/smbus-protocol.rst
Documentation/networking/ipvs-sysctl.rst
Documentation/networking/timestamping.rst
Documentation/virt/kvm/mmu.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/kernel/syscalls/syscall.tbl
arch/arc/include/asm/cacheflush.h
arch/arm/boot/dts/bcm2711.dtsi
arch/arm/boot/dts/bcm5301x.dtsi
arch/arm/include/asm/cacheflush.h
arch/arm/mach-socfpga/core.h
arch/arm/mach-socfpga/platsmp.c
arch/arm64/boot/dts/exynos/exynosautov9.dtsi
arch/arm64/include/asm/ftrace.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/pgalloc.h
arch/arm64/include/asm/stacktrace.h
arch/arm64/include/asm/uaccess.h
arch/arm64/kernel/entry-ftrace.S
arch/arm64/kernel/ftrace.c
arch/arm64/kernel/machine_kexec.c
arch/arm64/kernel/stacktrace.c
arch/arm64/kvm/Kconfig
arch/arm64/kvm/Makefile
arch/arm64/kvm/arch_timer.c
arch/arm64/kvm/arm.c
arch/arm64/kvm/handle_exit.c
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
arch/arm64/kvm/hyp/nvhe/switch.c
arch/arm64/kvm/hyp/vhe/switch.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/psci.c
arch/arm64/kvm/reset.c
arch/arm64/kvm/vgic/vgic-init.c
arch/arm64/kvm/vgic/vgic-kvm-device.c
arch/arm64/kvm/vgic/vgic-mmio-v2.c
arch/arm64/kvm/vgic/vgic-mmio-v3.c
arch/arm64/kvm/vgic/vgic-v3.c
arch/arm64/kvm/vgic/vgic-v4.c
arch/arm64/kvm/vgic/vgic.c
arch/ia64/kernel/syscalls/syscall.tbl
arch/m68k/include/asm/cacheflush_mm.h
arch/m68k/kernel/syscalls/syscall.tbl
arch/microblaze/kernel/syscalls/syscall.tbl
arch/mips/Kconfig
arch/mips/boot/compressed/Makefile
arch/mips/include/asm/cacheflush.h
arch/mips/include/asm/kvm_host.h
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/proc.c
arch/mips/kvm/Kconfig
arch/mips/kvm/Makefile
arch/mips/kvm/emulate.c
arch/mips/kvm/loongson_ipi.c
arch/mips/kvm/mips.c
arch/nds32/include/asm/cacheflush.h
arch/nios2/include/asm/cacheflush.h
arch/parisc/Makefile
arch/parisc/configs/generic-64bit_defconfig
arch/parisc/include/asm/assembly.h
arch/parisc/include/asm/cacheflush.h
arch/parisc/install.sh
arch/parisc/kernel/entry.S
arch/parisc/kernel/syscall.S
arch/parisc/kernel/time.c
arch/parisc/kernel/vmlinux.lds.S
arch/powerpc/include/asm/asm-prototypes.h
arch/powerpc/include/asm/kvm_asm.h
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_book3s_64.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/switch_to.h
arch/powerpc/include/asm/time.h
arch/powerpc/kernel/cpu_setup_power.c
arch/powerpc/kernel/dt_cpu_ftrs.c
arch/powerpc/kernel/head_32.h
arch/powerpc/kernel/process.c
arch/powerpc/kernel/syscalls/syscall.tbl
arch/powerpc/kernel/time.c
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/Makefile
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s_32_mmu.c
arch/powerpc/kvm/book3s_64_entry.S
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv.h [new file with mode: 0644]
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_hmi.c
arch/powerpc/kvm/book3s_hv_interrupts.S
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/book3s_hv_p9_entry.c
arch/powerpc/kvm/book3s_hv_ras.c
arch/powerpc/kvm/book3s_hv_rm_mmu.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_hv_uvmem.c
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/book3s_pr_papr.c
arch/powerpc/kvm/book3s_xics.c
arch/powerpc/kvm/book3s_xics.h
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/kvm/book3s_xive.h
arch/powerpc/kvm/book3s_xive_native.c
arch/powerpc/kvm/booke.c
arch/powerpc/kvm/e500_emulate.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/book3s64/radix_pgtable.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/platforms/powernv/idle.c
arch/powerpc/xmon/xmon.c
arch/riscv/include/asm/kvm_host.h
arch/riscv/kvm/Makefile
arch/riscv/kvm/mmu.c
arch/riscv/kvm/vcpu_exit.c
arch/riscv/kvm/vcpu_sbi.c
arch/riscv/kvm/vm.c
arch/riscv/kvm/vmid.c
arch/s390/configs/debug_defconfig
arch/s390/configs/defconfig
arch/s390/configs/zfcpdump_defconfig
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/pci_io.h
arch/s390/kvm/Kconfig
arch/s390/kvm/Makefile
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/pv.c
arch/s390/lib/test_unwind.c
arch/sh/include/asm/cacheflush.h
arch/sh/kernel/syscalls/syscall.tbl
arch/sparc/kernel/syscalls/syscall.tbl
arch/x86/entry/entry_64.S
arch/x86/include/asm/fpu/api.h
arch/x86/include/asm/intel-family.h
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_page_track.h
arch/x86/include/asm/sev-common.h
arch/x86/include/asm/xen/hypercall.h
arch/x86/include/asm/xen/hypervisor.h
arch/x86/kernel/fpu/signal.c
arch/x86/kernel/setup.c
arch/x86/kernel/sev.c
arch/x86/kernel/tsc.c
arch/x86/kernel/tsc_sync.c
arch/x86/kvm/Kconfig
arch/x86/kvm/Makefile
arch/x86/kvm/cpuid.c
arch/x86/kvm/debugfs.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/i8254.c
arch/x86/kvm/i8259.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/ioapic.h
arch/x86/kvm/irq.h
arch/x86/kvm/irq_comm.c
arch/x86/kvm/kvm_cache_regs.h
arch/x86/kvm/kvm_onhyperv.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmu_internal.h
arch/x86/kvm/mmu/mmutrace.h
arch/x86/kvm/mmu/page_track.c
arch/x86/kvm/mmu/paging_tmpl.h
arch/x86/kvm/mmu/spte.c
arch/x86/kvm/mmu/spte.h
arch/x86/kvm/mmu/tdp_iter.c
arch/x86/kvm/mmu/tdp_iter.h
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/mmu/tdp_mmu.h
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/pmu.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/trace.h
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/posted_intr.c
arch/x86/kvm/vmx/posted_intr.h
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/realmode/init.c
arch/x86/xen/xen-asm.S
arch/xtensa/include/asm/cacheflush.h
arch/xtensa/kernel/syscalls/syscall.tbl
block/bdev.c
block/blk-core.c
block/blk-mq.c
drivers/acpi/cppc_acpi.c
drivers/acpi/property.c
drivers/android/binder.c
drivers/ata/libata-sata.c
drivers/ata/pata_falcon.c
drivers/ata/sata_fsl.c
drivers/block/loop.c
drivers/block/virtio_blk.c
drivers/block/zram/zram_drv.c
drivers/char/agp/parisc-agp.c
drivers/char/ipmi/ipmi_msghandler.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/intel_pstate.c
drivers/dma-buf/heaps/system_heap.c
drivers/firmware/arm_scmi/base.c
drivers/firmware/arm_scmi/scmi_pm_domain.c
drivers/firmware/arm_scmi/sensors.c
drivers/firmware/arm_scmi/virtio.c
drivers/firmware/arm_scmi/voltage.c
drivers/firmware/smccc/soc_id.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/amdgpu/navi10_ih.c
drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
drivers/gpu/drm/amd/amdgpu/nv.c
drivers/gpu/drm/amd/amdgpu/soc15.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
drivers/gpu/drm/amd/display/dc/core/dc_resource.c
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dc_link.h
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
drivers/gpu/drm/hyperv/hyperv_drm_drv.c
drivers/gpu/drm/i915/display/intel_display_types.h
drivers/gpu/drm/i915/display/intel_dp.c
drivers/gpu/drm/i915/display/intel_dp.h
drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
drivers/gpu/drm/i915/gt/intel_gt_pm.c
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/i915/pxp/intel_pxp_pm.c
drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
drivers/gpu/drm/msm/Kconfig
drivers/gpu/drm/msm/Makefile
drivers/gpu/drm/msm/adreno/a6xx_gpu.c
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
drivers/gpu/drm/msm/dp/dp_aux.c
drivers/gpu/drm/msm/dsi/dsi_host.c
drivers/gpu/drm/msm/msm_debugfs.c
drivers/gpu/drm/msm/msm_drv.c
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/msm/msm_gem_submit.c
drivers/gpu/drm/msm/msm_gpu.h
drivers/gpu/drm/msm/msm_gpu_devfreq.c
drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
drivers/gpu/drm/vc4/vc4_bo.c
drivers/gpu/drm/vc4/vc4_kms.c
drivers/gpu/drm/virtio/virtgpu_drv.c
drivers/gpu/drm/virtio/virtgpu_drv.h
drivers/gpu/drm/virtio/virtgpu_ioctl.c
drivers/gpu/drm/xen/xen_drm_front.c
drivers/hid/hid-asus.c
drivers/hid/hid-ft260.c
drivers/hid/hid-ids.h
drivers/hid/hid-input.c
drivers/hid/hid-magicmouse.c
drivers/hid/hid-multitouch.c
drivers/hid/hid-nintendo.c
drivers/hid/hid-thrustmaster.c
drivers/hid/intel-ish-hid/ishtp-fw-loader.c
drivers/hid/intel-ish-hid/ishtp-hid-client.c
drivers/hid/intel-ish-hid/ishtp/bus.c
drivers/hid/wacom_wac.c
drivers/hid/wacom_wac.h
drivers/i2c/busses/i2c-cbus-gpio.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-rk3x.c
drivers/i2c/busses/i2c-stm32f7.c
drivers/i2c/busses/i2c-virtio.c
drivers/input/misc/xen-kbdfront.c
drivers/iommu/amd/iommu_v2.c
drivers/iommu/intel/cap_audit.c
drivers/iommu/intel/iommu.c
drivers/iommu/rockchip-iommu.c
drivers/media/cec/core/cec-adap.c
drivers/media/common/videobuf2/videobuf2-dma-sg.c
drivers/media/i2c/hi846.c
drivers/media/v4l2-core/v4l2-compat-ioctl32.c
drivers/memory/mtk-smi.c
drivers/mmc/host/mmc_spi.c
drivers/mmc/host/sdhci-esdhc-imx.c
drivers/mmc/host/sdhci.c
drivers/mmc/host/sdhci.h
drivers/net/Kconfig
drivers/net/dsa/b53/b53_spi.c
drivers/net/dsa/microchip/ksz8795.c
drivers/net/dsa/microchip/ksz9477.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/microchip/ksz_common.h
drivers/net/dsa/mv88e6xxx/serdes.c
drivers/net/dsa/mv88e6xxx/serdes.h
drivers/net/dsa/qca8k.c
drivers/net/dsa/rtl8365mb.c
drivers/net/ethernet/aquantia/atlantic/aq_common.h
drivers/net/ethernet/aquantia/atlantic/aq_hw.h
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
drivers/net/ethernet/aquantia/atlantic/aq_vec.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h
drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
drivers/net/ethernet/asix/ax88796c_spi.c
drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/iavf/iavf.h
drivers/net/ethernet/intel/iavf/iavf_ethtool.c
drivers/net/ethernet/intel/iavf/iavf_main.c
drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_xsk.c
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/octeontx2/af/rvu.c
drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/microchip/lan743x_main.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot_vcap.c
drivers/net/ethernet/natsemi/xtsonic.c
drivers/net/ethernet/netronome/nfp/nfp_net.h
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/ni/nixge.c
drivers/net/ethernet/qlogic/qed/qed_int.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac.h
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/hamradio/mkiss.c
drivers/net/ipa/ipa_cmd.c
drivers/net/ipa/ipa_cmd.h
drivers/net/ipa/ipa_endpoint.c
drivers/net/ipa/ipa_main.c
drivers/net/ipa/ipa_modem.c
drivers/net/ipa/ipa_smp2p.c
drivers/net/ipa/ipa_smp2p.h
drivers/net/mdio/mdio-aspeed.c
drivers/net/phy/phylink.c
drivers/net/slip/slip.h
drivers/net/usb/lan78xx.c
drivers/net/usb/smsc95xx.c
drivers/net/virtio_net.c
drivers/net/vrf.c
drivers/net/wireguard/allowedips.c
drivers/net/wireguard/device.c
drivers/net/wireguard/device.h
drivers/net/wireguard/main.c
drivers/net/wireguard/queueing.c
drivers/net/wireguard/queueing.h
drivers/net/wireguard/ratelimiter.c
drivers/net/wireguard/receive.c
drivers/net/wireguard/socket.c
drivers/net/wireless/intel/iwlwifi/fw/uefi.c
drivers/net/wireless/intel/iwlwifi/iwl-drv.c
drivers/net/wireless/intel/iwlwifi/iwl-drv.h
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
drivers/net/wireless/intel/iwlwifi/mvm/ops.c
drivers/net/wireless/intel/iwlwifi/pcie/drv.c
drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
drivers/net/wireless/mediatek/mt76/mt7615/usb_sdio.c
drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c
drivers/net/wireless/mediatek/mt76/mt7915/mac.c
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
drivers/net/wireless/mediatek/mt76/mt7921/sdio_mac.c
drivers/net/wireless/mediatek/mt76/tx.c
drivers/net/wireless/ralink/rt2x00/rt2x00usb.c
drivers/net/wireless/realtek/rtw89/fw.c
drivers/net/wireless/realtek/rtw89/fw.h
drivers/nfc/virtual_ncidev.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/tcp.c
drivers/nvme/target/io-cmd-file.c
drivers/nvme/target/tcp.c
drivers/platform/chrome/cros_ec_ishtp.c
drivers/platform/x86/intel/ishtp_eclite.c
drivers/powercap/dtpm.c
drivers/scsi/lpfc/lpfc_els.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.h
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/qla2xxx/qla_edif.c
drivers/scsi/scsi_debug.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/ufs/ufs-mediatek.c
drivers/scsi/ufs/ufshcd-pci.c
drivers/scsi/ufs/ufshpb.c
drivers/scsi/virtio_scsi.c
drivers/staging/Kconfig
drivers/staging/Makefile
drivers/staging/fbtft/fb_ssd1351.c
drivers/staging/fbtft/fbtft-core.c
drivers/staging/greybus/audio_helper.c
drivers/staging/netlogic/Kconfig [deleted file]
drivers/staging/netlogic/Makefile [deleted file]
drivers/staging/netlogic/TODO [deleted file]
drivers/staging/netlogic/platform_net.c [deleted file]
drivers/staging/netlogic/platform_net.h [deleted file]
drivers/staging/netlogic/xlr_net.c [deleted file]
drivers/staging/netlogic/xlr_net.h [deleted file]
drivers/staging/r8188eu/core/rtw_mlme_ext.c
drivers/staging/r8188eu/os_dep/ioctl_linux.c
drivers/staging/r8188eu/os_dep/mlme_linux.c
drivers/staging/rtl8192e/rtl8192e/rtl_core.c
drivers/target/target_core_fabric_configfs.c
drivers/target/target_core_spc.c
drivers/tee/optee/ffa_abi.c
drivers/tty/hvc/hvc_xen.c
drivers/tty/serial/8250/8250_bcm7271.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/serial/8250/8250_port.c
drivers/tty/serial/Kconfig
drivers/tty/serial/amba-pl011.c
drivers/tty/serial/fsl_lpuart.c
drivers/tty/serial/liteuart.c
drivers/tty/serial/msm_serial.c
drivers/tty/serial/serial-tegra.c
drivers/tty/serial/serial_core.c
drivers/usb/cdns3/cdns3-gadget.c
drivers/usb/cdns3/cdnsp-mem.c
drivers/usb/chipidea/ci_hdrc_imx.c
drivers/usb/core/hub.c
drivers/usb/core/quirks.c
drivers/usb/dwc2/gadget.c
drivers/usb/dwc2/hcd_queue.c
drivers/usb/dwc3/core.c
drivers/usb/dwc3/core.h
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/udc/udc-xilinx.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci-tegra.c
drivers/usb/serial/option.c
drivers/usb/serial/pl2303.c
drivers/usb/typec/tcpm/fusb302.c
drivers/usb/typec/tcpm/tcpm.c
drivers/usb/typec/tipd/core.c
drivers/vdpa/vdpa_sim/vdpa_sim.c
drivers/vfio/pci/vfio_pci_igd.c
drivers/vfio/vfio.c
drivers/vhost/vdpa.c
drivers/vhost/vsock.c
drivers/video/console/vgacon.c
drivers/video/fbdev/xen-fbfront.c
drivers/virtio/virtio_ring.c
drivers/xen/Kconfig
drivers/xen/pvcalls-front.c
drivers/xen/xenbus/xenbus_probe.c
drivers/xen/xenbus/xenbus_probe_frontend.c
fs/btrfs/lzo.c
fs/cifs/cifsfs.h
fs/cifs/connect.c
fs/cifs/fscache.c
fs/cifs/inode.c
fs/cifs/sess.c
fs/cifs/smb2pdu.c
fs/erofs/utils.c
fs/file.c
fs/fuse/dev.c
fs/gfs2/glock.c
fs/gfs2/inode.c
fs/inode.c
fs/io-wq.c
fs/io_uring.c
fs/iomap/buffered-io.c
fs/ksmbd/smb2pdu.c
fs/netfs/read_helper.c
fs/nfs/inode.c
fs/nfs/nfs42proc.c
fs/nfs/nfs42xdr.c
fs/nfs/nfs4state.c
fs/nfs/nfstrace.h
fs/ntfs/Kconfig
fs/xfs/libxfs/xfs_attr.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
include/asm-generic/cacheflush.h
include/linux/acpi.h
include/linux/cacheflush.h [new file with mode: 0644]
include/linux/fs.h
include/linux/highmem.h
include/linux/intel-ish-client-if.h
include/linux/kprobes.h
include/linux/kvm_dirty_ring.h
include/linux/kvm_host.h
include/linux/kvm_types.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mod_devicetable.h
include/linux/netdevice.h
include/linux/page-flags.h
include/linux/pagemap.h
include/linux/ptp_classify.h
include/linux/sched/cputime.h
include/linux/siphash.h
include/linux/virtio.h
include/net/busy_poll.h
include/net/dst_cache.h
include/net/fib_rules.h
include/net/ip6_fib.h
include/net/ip_fib.h
include/net/ipv6_stubs.h
include/net/netns/ipv4.h
include/net/nl802154.h
include/net/sock.h
include/soc/mscc/ocelot_vcap.h
include/sound/soc-acpi.h
include/trace/events/rpcgss.h
include/uapi/drm/virtgpu_drm.h
include/uapi/linux/if_ether.h
include/xen/xenbus.h
kernel/cpu.c
kernel/events/core.c
kernel/kprobes.c
kernel/locking/rwsem.c
kernel/power/hibernate.c
kernel/power/user.c
kernel/sched/core.c
kernel/sched/cputime.c
kernel/softirq.c
kernel/time/tick-sched.c
kernel/trace/trace.h
kernel/trace/trace_events.c
kernel/trace/trace_events_hist.c
kernel/trace/trace_uprobe.c
kernel/trace/tracing_map.c
lib/Kconfig.debug
lib/siphash.c
mm/highmem.c
mm/hugetlb.c
mm/memcontrol.c
mm/shmem.c
mm/util.c
net/8021q/vlan.c
net/8021q/vlan_dev.c
net/core/dev.c
net/core/dst_cache.c
net/core/fib_rules.c
net/core/neighbour.c
net/ethtool/ioctl.c
net/ipv4/fib_frontend.c
net/ipv4/fib_rules.c
net/ipv4/fib_semantics.c
net/ipv4/nexthop.c
net/ipv4/tcp_cubic.c
net/ipv6/af_inet6.c
net/ipv6/fib6_rules.c
net/ipv6/ip6_offload.c
net/ipv6/ip6_output.c
net/ipv6/route.c
net/mctp/route.c
net/mctp/test/utils.c
net/mpls/af_mpls.c
net/mpls/internal.h
net/mptcp/options.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/ncsi/ncsi-cmd.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_flow_table_offload.c
net/netfilter/nft_payload.c
net/netfilter/xt_IDLETIMER.c
net/netlink/af_netlink.c
net/rds/tcp.c
net/rxrpc/conn_client.c
net/rxrpc/peer_object.c
net/sched/sch_ets.c
net/smc/af_smc.c
net/smc/smc_close.c
net/smc/smc_core.c
net/sunrpc/xprtsock.c
net/tls/tls_main.c
net/tls/tls_sw.c
net/unix/af_unix.c
scripts/mod/devicetable-offsets.c
scripts/mod/file2alias.c
security/selinux/ss/hashtab.c
sound/hda/intel-dsp-config.c
sound/pci/cmipci.c
sound/pci/ctxfi/ctamixer.c
sound/pci/ctxfi/ctdaio.c
sound/pci/ctxfi/ctresource.c
sound/pci/ctxfi/ctresource.h
sound/pci/ctxfi/ctsrc.c
sound/pci/hda/hda_intel.c
sound/pci/hda/hda_local.h
sound/pci/hda/patch_cs8409.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/codecs/cs35l41-spi.c
sound/soc/codecs/cs35l41.c
sound/soc/codecs/cs35l41.h
sound/soc/codecs/lpass-rx-macro.c
sound/soc/codecs/rk817_codec.c
sound/soc/codecs/rt1011.c
sound/soc/codecs/rt1011.h
sound/soc/codecs/rt5682-i2c.c
sound/soc/codecs/rt5682.c
sound/soc/codecs/rt5682.h
sound/soc/codecs/rt9120.c
sound/soc/codecs/wcd934x.c
sound/soc/codecs/wcd938x.c
sound/soc/codecs/wm_adsp.c
sound/soc/intel/boards/sof_sdw.c
sound/soc/intel/common/soc-acpi-intel-adl-match.c
sound/soc/intel/common/soc-acpi-intel-cml-match.c
sound/soc/mediatek/mt8173/mt8173-afe-pcm.c
sound/soc/mediatek/mt8173/mt8173-rt5650.c
sound/soc/qcom/qdsp6/audioreach.h
sound/soc/qcom/qdsp6/q6adm.c
sound/soc/qcom/qdsp6/q6asm-dai.c
sound/soc/qcom/qdsp6/q6prm.c
sound/soc/qcom/qdsp6/q6routing.c
sound/soc/sh/rcar/dma.c
sound/soc/soc-acpi.c
sound/soc/soc-dapm.c
sound/soc/soc-topology.c
sound/soc/sof/Kconfig
sound/soc/sof/control.c
sound/soc/sof/intel/hda-bus.c
sound/soc/sof/intel/hda-dsp.c
sound/soc/sof/intel/hda.c
sound/soc/stm/stm32_i2s.c
sound/soc/tegra/tegra186_dspk.c
sound/soc/tegra/tegra210_admaif.c
sound/soc/tegra/tegra210_adx.c
sound/soc/tegra/tegra210_ahub.c
sound/soc/tegra/tegra210_amx.c
sound/soc/tegra/tegra210_dmic.c
sound/soc/tegra/tegra210_i2s.c
sound/soc/tegra/tegra210_mixer.c
sound/soc/tegra/tegra210_mvc.c
sound/soc/tegra/tegra210_sfc.c
sound/usb/pcm.c
sound/xen/xen_snd_front.c
tools/include/linux/kernel.h
tools/include/linux/math.h [new file with mode: 0644]
tools/include/uapi/linux/if_link.h
tools/objtool/elf.c
tools/objtool/objtool.c
tools/testing/radix-tree/linux/lockdep.h
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/kvm_create_max_vcpus.c
tools/testing/selftests/kvm/kvm_page_table_test.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/x86_64/hyperv_features.c
tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
tools/testing/selftests/kvm/x86_64/userspace_io_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/vmx_pmu_msrs_test.c
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/fcnal-test.sh
tools/testing/selftests/net/fib_nexthops.sh
tools/testing/selftests/net/tls.c
tools/testing/selftests/netfilter/Makefile
tools/testing/selftests/netfilter/conntrack_vrf.sh [new file with mode: 0755]
tools/testing/selftests/netfilter/nft_nat.sh
tools/testing/selftests/netfilter/nft_queue.sh
tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
tools/testing/selftests/wireguard/netns.sh
tools/testing/selftests/wireguard/qemu/debug.config
tools/testing/selftests/wireguard/qemu/kernel.config
virt/kvm/Kconfig
virt/kvm/Makefile.kvm [new file with mode: 0644]
virt/kvm/async_pf.c
virt/kvm/kvm_main.c

index 9725c54..4eaefb0 100644 (file)
                        Default is 1 (enabled)
 
        kvm-intel.emulate_invalid_guest_state=
-                       [KVM,Intel] Enable emulation of invalid guest states
-                       Default is 0 (disabled)
+                       [KVM,Intel] Disable emulation of invalid guest state.
+                       Ignored if kvm-intel.enable_unrestricted_guest=1, as
+                       guest state is never invalid for unrestricted guests.
+                       This param doesn't apply to nested guests (L2), as KVM
+                       never emulates invalid L2 guest state.
+                       Default is 1 (enabled)
 
        kvm-intel.flexpriority=
                        [KVM,Intel] Disable FlexPriority feature (TPR shadow).
                        Override pmtimer IOPort with a hex value.
                        e.g. pmtmr=0x508
 
+       pmu_override=   [PPC] Override the PMU.
+                       This option takes over the PMU facility, so it is no
+                       longer usable by perf. Setting this option starts the
+                       PMU counters by setting MMCR0 to 0 (the FC bit is
+                       cleared). If a number is given, then MMCR1 is set to
+                       that number, otherwise (e.g., 'pmu_override=on'), MMCR1
+                       remains 0.
+
        pm_debug_messages       [SUSPEND,KNL]
                        Enable suspend/resume debug messages during boot up.
 
index f127666..e5dad2e 100644 (file)
@@ -53,11 +53,10 @@ The number of bits that the PAC occupies in a pointer is 55 minus the
 virtual address size configured by the kernel. For example, with a
 virtual address size of 48, the PAC is 7 bits wide.
 
-Recent versions of GCC can compile code with APIAKey-based return
-address protection when passed the -msign-return-address option. This
-uses instructions in the HINT space (unless -march=armv8.3-a or higher
-is also passed), and such code can run on systems without the pointer
-authentication extension.
+When ARM64_PTR_AUTH_KERNEL is selected, the kernel will be compiled
+with HINT space pointer authentication instructions protecting
+function returns. Kernels built with this option will work on hardware
+with or without pointer authentication support.
 
 In addition to exec(), keys can also be reinitialized to random values
 using the PR_PAC_RESET_KEYS prctl. A bitmask of PR_PAC_APIAKEY,
index 33cb90b..4ceef8e 100644 (file)
@@ -73,12 +73,12 @@ CPUFREQ_POSTCHANGE.
 The third argument is a struct cpufreq_freqs with the following
 values:
 
-=====  ===========================
-cpu    number of the affected CPU
+====== ======================================
+policy a pointer to the struct cpufreq_policy
 old    old frequency
 new    new frequency
 flags  flags of the cpufreq driver
-=====  ===========================
+====== ======================================
 
 3. CPUFreq Table Generation with Operating Performance Point (OPP)
 ==================================================================
index 29b9447..fe0c89e 100644 (file)
@@ -17,9 +17,10 @@ properties:
     oneOf:
       - enum:
           - fsl,imx7ulp-lpi2c
-          - fsl,imx8qm-lpi2c
       - items:
-          - const: fsl,imx8qxp-lpi2c
+          - enum:
+              - fsl,imx8qxp-lpi2c
+              - fsl,imx8qm-lpi2c
           - const: fsl,imx7ulp-lpi2c
 
   reg:
index a132615..b0d354f 100644 (file)
@@ -50,11 +50,11 @@ ksmbd.mountd (user space daemon)
 --------------------------------
 
 ksmbd.mountd is userspace process to, transfer user account and password that
-are registered using ksmbd.adduser(part of utils for user space). Further it
+are registered using ksmbd.adduser (part of utils for user space). Further it
 allows sharing information parameters that parsed from smb.conf to ksmbd in
 kernel. For the execution part it has a daemon which is continuously running
 and connected to the kernel interface using netlink socket, it waits for the
-requests(dcerpc and share/user info). It handles RPC calls (at a minimum few
+requests (dcerpc and share/user info). It handles RPC calls (at a minimum few
 dozen) that are most important for file server from NetShareEnum and
 NetServerGetInfo. Complete DCE/RPC response is prepared from the user space
 and passed over to the associated kernel thread for the client.
@@ -154,11 +154,11 @@ Each layer
 1. Enable all component prints
        # sudo ksmbd.control -d "all"
 
-2. Enable one of components(smb, auth, vfs, oplock, ipc, conn, rdma)
+2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma)
        # sudo ksmbd.control -d "smb"
 
-3. Show what prints are enable.
-       # cat/sys/class/ksmbd-control/debug
+3. Show what prints are enabled.
+       # cat /sys/class/ksmbd-control/debug
          [smb] auth vfs oplock ipc conn [rdma]
 
 4. Disable prints:
index bb68d39..375baca 100644 (file)
@@ -1,7 +1,7 @@
 .. SPDX-License-Identifier: GPL-2.0
 
 =================================
-NETWORK FILESYSTEM HELPER LIBRARY
+Network Filesystem Helper Library
 =================================
 
 .. Contents:
@@ -37,22 +37,22 @@ into a common call framework.
 
 The following services are provided:
 
- * Handles transparent huge pages (THPs).
+ * Handle folios that span multiple pages.
 
- * Insulates the netfs from VM interface changes.
+ * Insulate the netfs from VM interface changes.
 
- * Allows the netfs to arbitrarily split reads up into pieces, even ones that
-   don't match page sizes or page alignments and that may cross pages.
+ * Allow the netfs to arbitrarily split reads up into pieces, even ones that
+   don't match folio sizes or folio alignments and that may cross folios.
 
- * Allows the netfs to expand a readahead request in both directions to meet
-   its needs.
+ * Allow the netfs to expand a readahead request in both directions to meet its
+   needs.
 
- * Allows the netfs to partially fulfil a read, which will then be resubmitted.
+ * Allow the netfs to partially fulfil a read, which will then be resubmitted.
 
- * Handles local caching, allowing cached data and server-read data to be
+ * Handle local caching, allowing cached data and server-read data to be
    interleaved for a single request.
 
- * Handles clearing of bufferage that aren't on the server.
+ * Handle clearing of bufferage that aren't on the server.
 
  * Handle retrying of reads that failed, switching reads from the cache to the
    server as necessary.
@@ -70,22 +70,22 @@ Read Helper Functions
 
 Three read helpers are provided::
 
* void netfs_readahead(struct readahead_control *ractl,
-                       const struct netfs_read_request_ops *ops,
-                       void *netfs_priv);``
* int netfs_readpage(struct file *file,
-                     struct page *page,
-                     const struct netfs_read_request_ops *ops,
-                     void *netfs_priv);
* int netfs_write_begin(struct file *file,
-                        struct address_space *mapping,
-                        loff_t pos,
-                        unsigned int len,
-                        unsigned int flags,
-                        struct page **_page,
-                        void **_fsdata,
-                        const struct netfs_read_request_ops *ops,
-                        void *netfs_priv);
      void netfs_readahead(struct readahead_control *ractl,
+                            const struct netfs_read_request_ops *ops,
+                            void *netfs_priv);
      int netfs_readpage(struct file *file,
+                          struct folio *folio,
+                          const struct netfs_read_request_ops *ops,
+                          void *netfs_priv);
      int netfs_write_begin(struct file *file,
+                             struct address_space *mapping,
+                             loff_t pos,
+                             unsigned int len,
+                             unsigned int flags,
+                             struct folio **_folio,
+                             void **_fsdata,
+                             const struct netfs_read_request_ops *ops,
+                             void *netfs_priv);
 
 Each corresponds to a VM operation, with the addition of a couple of parameters
 for the use of the read helpers:
@@ -103,8 +103,8 @@ Both of these values will be stored into the read request structure.
 For ->readahead() and ->readpage(), the network filesystem should just jump
 into the corresponding read helper; whereas for ->write_begin(), it may be a
 little more complicated as the network filesystem might want to flush
-conflicting writes or track dirty data and needs to put the acquired page if an
-error occurs after calling the helper.
+conflicting writes or track dirty data and needs to put the acquired folio if
+an error occurs after calling the helper.
 
 The helpers manage the read request, calling back into the network filesystem
 through the suppplied table of operations.  Waits will be performed as
@@ -253,7 +253,7 @@ through which it can issue requests and negotiate::
                void (*issue_op)(struct netfs_read_subrequest *subreq);
                bool (*is_still_valid)(struct netfs_read_request *rreq);
                int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
-                                        struct page *page, void **_fsdata);
+                                        struct folio *folio, void **_fsdata);
                void (*done)(struct netfs_read_request *rreq);
                void (*cleanup)(struct address_space *mapping, void *netfs_priv);
        };
@@ -313,13 +313,14 @@ The operations are as follows:
 
    There is no return value; the netfs_subreq_terminated() function should be
    called to indicate whether or not the operation succeeded and how much data
-   it transferred.  The filesystem also should not deal with setting pages
+   it transferred.  The filesystem also should not deal with setting folios
    uptodate, unlocking them or dropping their refs - the helpers need to deal
    with this as they have to coordinate with copying to the local cache.
 
-   Note that the helpers have the pages locked, but not pinned.  It is possible
-   to use the ITER_XARRAY iov iterator to refer to the range of the inode that
-   is being operated upon without the need to allocate large bvec tables.
+   Note that the helpers have the folios locked, but not pinned.  It is
+   possible to use the ITER_XARRAY iov iterator to refer to the range of the
+   inode that is being operated upon without the need to allocate large bvec
+   tables.
 
  * ``is_still_valid()``
 
@@ -330,15 +331,15 @@ The operations are as follows:
  * ``check_write_begin()``
 
    [Optional] This is called from the netfs_write_begin() helper once it has
-   allocated/grabbed the page to be modified to allow the filesystem to flush
+   allocated/grabbed the folio to be modified to allow the filesystem to flush
    conflicting state before allowing it to be modified.
 
-   It should return 0 if everything is now fine, -EAGAIN if the page should be
+   It should return 0 if everything is now fine, -EAGAIN if the folio should be
    regrabbed and any other error code to abort the operation.
 
  * ``done``
 
-   [Optional] This is called after the pages in the request have all been
+   [Optional] This is called after the folios in the request have all been
    unlocked (and marked uptodate if applicable).
 
  * ``cleanup``
@@ -390,7 +391,7 @@ The read helpers work by the following general procedure:
      * If NETFS_SREQ_CLEAR_TAIL was set, a short read will be cleared to the
        end of the slice instead of reissuing.
 
- * Once the data is read, the pages that have been fully read/cleared:
+ * Once the data is read, the folios that have been fully read/cleared:
 
    * Will be marked uptodate.
 
@@ -398,11 +399,11 @@ The read helpers work by the following general procedure:
 
    * Unlocked
 
- * Any pages that need writing to the cache will then have DIO writes issued.
+ * Any folios that need writing to the cache will then have DIO writes issued.
 
  * Synchronous operations will wait for reading to be complete.
 
- * Writes to the cache will proceed asynchronously and the pages will have the
+ * Writes to the cache will proceed asynchronously and the folios will have the
    PG_fscache mark removed when that completes.
 
  * The request structures will be cleaned up when everything has completed.
@@ -452,6 +453,9 @@ operation table looks like the following::
                            netfs_io_terminated_t term_func,
                            void *term_func_priv);
 
+               int (*prepare_write)(struct netfs_cache_resources *cres,
+                                    loff_t *_start, size_t *_len, loff_t i_size);
+
                int (*write)(struct netfs_cache_resources *cres,
                             loff_t start_pos,
                             struct iov_iter *iter,
@@ -509,6 +513,14 @@ The methods defined in the table are:
    indicating whether the termination is definitely happening in the caller's
    context.
 
+ * ``prepare_write()``
+
+   [Required] Called to adjust a write to the cache and check that there is
+   sufficient space in the cache.  The start and length values indicate the
+   size of the write that netfslib is proposing, and this can be adjusted by
+   the cache to respect DIO boundaries.  The file size is passed for
+   information.
+
  * ``write()``
 
    [Required] Called to write to the cache.  The start file offset is given
@@ -525,4 +537,9 @@ not the read request structure as they could be used in other situations where
 there isn't a read request structure as well, such as writing dirty data to the
 cache.
 
+
+API Function Reference
+======================
+
 .. kernel-doc:: include/linux/netfs.h
+.. kernel-doc:: fs/netfs/read_helper.c
index 9e07e6b..00d8e17 100644 (file)
@@ -36,6 +36,8 @@ Key to symbols
 
 =============== =============================================================
 S               Start condition
+Sr              Repeated start condition, used to switch from write to
+                read mode.
 P               Stop condition
 Rd/Wr (1 bit)   Read/Write bit. Rd equals 1, Wr equals 0.
 A, NA (1 bit)   Acknowledge (ACK) and Not Acknowledge (NACK) bit
@@ -100,7 +102,7 @@ Implemented by i2c_smbus_read_byte_data()
 This reads a single byte from a device, from a designated register.
 The register is specified through the Comm byte::
 
-  S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P
+  S Addr Wr [A] Comm [A] Sr Addr Rd [A] [Data] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_READ_BYTE_DATA
 
@@ -114,7 +116,7 @@ This operation is very like Read Byte; again, data is read from a
 device, from a designated register that is specified through the Comm
 byte. But this time, the data is a complete word (16 bits)::
 
-  S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P
+  S Addr Wr [A] Comm [A] Sr Addr Rd [A] [DataLow] A [DataHigh] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_READ_WORD_DATA
 
@@ -164,7 +166,7 @@ This command selects a device register (through the Comm byte), sends
 16 bits of data to it, and reads 16 bits of data in return::
 
   S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A]
-                               S Addr Rd [A] [DataLow] A [DataHigh] NA P
+                              Sr Addr Rd [A] [DataLow] A [DataHigh] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_PROC_CALL
 
@@ -181,7 +183,7 @@ of data is specified by the device in the Count byte.
 ::
 
   S Addr Wr [A] Comm [A]
-             S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
+            Sr Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_READ_BLOCK_DATA
 
@@ -212,7 +214,7 @@ This command selects a device register (through the Comm byte), sends
 1 to 31 bytes of data to it, and reads 1 to 31 bytes of data in return::
 
   S Addr Wr [A] Comm [A] Count [A] Data [A] ...
-                               S Addr Rd [A] [Count] A [Data] ... A P
+                              Sr Addr Rd [A] [Count] A [Data] ... A P
 
 Functionality flag: I2C_FUNC_SMBUS_BLOCK_PROC_CALL
 
@@ -300,7 +302,7 @@ This command reads a block of bytes from a device, from a
 designated register that is specified through the Comm byte::
 
   S Addr Wr [A] Comm [A]
-             S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
+            Sr Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
 
 Functionality flag: I2C_FUNC_SMBUS_READ_I2C_BLOCK
 
index 95ef56d..387fda8 100644 (file)
@@ -37,8 +37,7 @@ conn_reuse_mode - INTEGER
 
        0: disable any special handling on port reuse. The new
        connection will be delivered to the same real server that was
-       servicing the previous connection. This will effectively
-       disable expire_nodest_conn.
+       servicing the previous connection.
 
        bit 1: enable rescheduling of new connections when it is safe.
        That is, whenever expire_nodest_conn and for TCP sockets, when
index a722eb3..80b1335 100644 (file)
@@ -486,8 +486,8 @@ of packets.
 Drivers are free to use a more permissive configuration than the requested
 configuration. It is expected that drivers should only implement directly the
 most generic mode that can be supported. For example if the hardware can
-support HWTSTAMP_FILTER_V2_EVENT, then it should generally always upscale
-HWTSTAMP_FILTER_V2_L2_SYNC_MESSAGE, and so forth, as HWTSTAMP_FILTER_V2_EVENT
+support HWTSTAMP_FILTER_PTP_V2_EVENT, then it should generally always upscale
+HWTSTAMP_FILTER_PTP_V2_L2_SYNC, and so forth, as HWTSTAMP_FILTER_PTP_V2_EVENT
 is more generic (and more useful to applications).
 
 A driver which supports hardware time stamping shall update the struct
index f60f548..5b1ebad 100644 (file)
@@ -161,7 +161,7 @@ Shadow pages contain the following information:
     If clear, this page corresponds to a guest page table denoted by the gfn
     field.
   role.quadrant:
-    When role.gpte_is_8_bytes=0, the guest uses 32-bit gptes while the host uses 64-bit
+    When role.has_4_byte_gpte=1, the guest uses 32-bit gptes while the host uses 64-bit
     sptes.  That means a guest page table contains more ptes than the host,
     so multiple shadow pages are needed to shadow one guest page.
     For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the
@@ -177,9 +177,9 @@ Shadow pages contain the following information:
     The page is invalid and should not be used.  It is a root page that is
     currently pinned (by a cpu hardware register pointing to it); once it is
     unpinned it will be destroyed.
-  role.gpte_is_8_bytes:
-    Reflects the size of the guest PTE for which the page is valid, i.e. '1'
-    if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
+  role.has_4_byte_gpte:
+    Reflects the size of the guest PTE for which the page is valid, i.e. '0'
+    if direct map or 64-bit gptes are in use, '1' if 32-bit gptes are in use.
   role.efer_nx:
     Contains the value of efer.nx for which the page is valid.
   role.cr0_wp:
index 5250298..43007f2 100644 (file)
@@ -2263,6 +2263,15 @@ L:       linux-iio@vger.kernel.org
 S:     Maintained
 F:     drivers/counter/microchip-tcb-capture.c
 
+ARM/MILBEAUT ARCHITECTURE
+M:     Taichi Sugaya <sugaya.taichi@socionext.com>
+M:     Takao Orito <orito.takao@socionext.com>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:     Maintained
+F:     arch/arm/boot/dts/milbeaut*
+F:     arch/arm/mach-milbeaut/
+N:     milbeaut
+
 ARM/MIOA701 MACHINE SUPPORT
 M:     Robert Jarzmik <robert.jarzmik@free.fr>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -2729,10 +2738,11 @@ S:      Maintained
 F:     drivers/memory/*emif*
 
 ARM/TEXAS INSTRUMENT KEYSTONE ARCHITECTURE
+M:     Nishanth Menon <nm@ti.com>
 M:     Santosh Shilimkar <ssantosh@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git
 F:     arch/arm/boot/dts/keystone-*
 F:     arch/arm/mach-keystone/
 
@@ -3570,13 +3580,14 @@ L:      netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/broadcom/b44.*
 
-BROADCOM B53 ETHERNET SWITCH DRIVER
+BROADCOM B53/SF2 ETHERNET SWITCH DRIVER
 M:     Florian Fainelli <f.fainelli@gmail.com>
 L:     netdev@vger.kernel.org
 L:     openwrt-devel@lists.openwrt.org (subscribers-only)
 S:     Supported
 F:     Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
 F:     drivers/net/dsa/b53/*
+F:     drivers/net/dsa/bcm_sf2*
 F:     include/linux/dsa/brcm.h
 F:     include/linux/platform_data/b53.h
 
@@ -15968,6 +15979,7 @@ F:      arch/mips/generic/board-ranchu.c
 
 RANDOM NUMBER DRIVER
 M:     "Theodore Ts'o" <tytso@mit.edu>
+M:     Jason A. Donenfeld <Jason@zx2c4.com>
 S:     Maintained
 F:     drivers/char/random.c
 
@@ -16490,6 +16502,12 @@ T:     git git://linuxtv.org/media_tree.git
 F:     Documentation/devicetree/bindings/media/allwinner,sun8i-a83t-de2-rotate.yaml
 F:     drivers/media/platform/sunxi/sun8i-rotate/
 
+RPMSG TTY DRIVER
+M:     Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
+L:     linux-remoteproc@vger.kernel.org
+S:     Maintained
+F:     drivers/tty/rpmsg_tty.c
+
 RTL2830 MEDIA DRIVER
 M:     Antti Palosaari <crope@iki.fi>
 L:     linux-media@vger.kernel.org
@@ -16612,7 +16630,8 @@ F:      drivers/iommu/s390-iommu.c
 
 S390 IUCV NETWORK LAYER
 M:     Julian Wiedmann <jwi@linux.ibm.com>
-M:     Karsten Graul <kgraul@linux.ibm.com>
+M:     Alexandra Winter <wintera@linux.ibm.com>
+M:     Wenjia Zhang <wenjia@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 L:     netdev@vger.kernel.org
 S:     Supported
@@ -16623,7 +16642,8 @@ F:      net/iucv/
 
 S390 NETWORK DRIVERS
 M:     Julian Wiedmann <jwi@linux.ibm.com>
-M:     Karsten Graul <kgraul@linux.ibm.com>
+M:     Alexandra Winter <wintera@linux.ibm.com>
+M:     Wenjia Zhang <wenjia@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 L:     netdev@vger.kernel.org
 S:     Supported
@@ -18483,6 +18503,7 @@ F:      include/uapi/linux/pkt_sched.h
 F:     include/uapi/linux/tc_act/
 F:     include/uapi/linux/tc_ematch/
 F:     net/sched/
+F:     tools/testing/selftests/tc-testing
 
 TC90522 MEDIA DRIVER
 M:     Akihiro Tsukada <tskd08@gmail.com>
@@ -19031,11 +19052,12 @@ F:    drivers/mmc/host/tifm_sd.c
 F:     include/linux/tifm.h
 
 TI KEYSTONE MULTICORE NAVIGATOR DRIVERS
+M:     Nishanth Menon <nm@ti.com>
 M:     Santosh Shilimkar <ssantosh@kernel.org>
 L:     linux-kernel@vger.kernel.org
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ti/linux.git
 F:     drivers/soc/ti/*
 
 TI LM49xxx FAMILY ASoC CODEC DRIVERS
index daf95a5..8e35d78 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@
 VERSION = 5
 PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
-NAME = Trick or Treat
+EXTRAVERSION = -rc4
+NAME = Gobble Gobble
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
index 26b8ed1..d3c4ab2 100644 (file)
@@ -991,6 +991,16 @@ config HAVE_ARCH_COMPAT_MMAP_BASES
          and vice-versa 32-bit applications to call 64-bit mmap().
          Required for applications doing different bitness syscalls.
 
+config PAGE_SIZE_LESS_THAN_64KB
+       def_bool y
+       depends on !ARM64_64K_PAGES
+       depends on !IA64_PAGE_SIZE_64KB
+       depends on !PAGE_SIZE_64KB
+       depends on !PARISC_PAGE_SIZE_64KB
+       depends on !PPC_64K_PAGES
+       depends on !PPC_256K_PAGES
+       depends on !PAGE_SIZE_256KB
+
 # This allows to use a set of generic functions to determine mmap base
 # address by giving priority to top-down scheme only if the process
 # is not in legacy mode (compat task, unlimited stack size or
index e4a041c..ca5a322 100644 (file)
 556    common  landlock_restrict_self          sys_landlock_restrict_self
 # 557 reserved for memfd_secret
 558    common  process_mrelease                sys_process_mrelease
+559    common  futex_waitv                     sys_futex_waitv
index e8c2c74..e201b4b 100644 (file)
@@ -36,7 +36,6 @@ void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 
 void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
 void dma_cache_inv(phys_addr_t start, unsigned long sz);
index 3b60297..9e01dbc 100644 (file)
                        #address-cells = <3>;
                        #interrupt-cells = <1>;
                        #size-cells = <2>;
-                       interrupts = <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>,
+                       interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
                                     <GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
                        interrupt-names = "pcie", "msi";
                        interrupt-map-mask = <0x0 0x0 0x0 0x7>;
                        interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143
+                                                       IRQ_TYPE_LEVEL_HIGH>,
+                                       <0 0 0 2 &gicv2 GIC_SPI 144
+                                                       IRQ_TYPE_LEVEL_HIGH>,
+                                       <0 0 0 3 &gicv2 GIC_SPI 145
+                                                       IRQ_TYPE_LEVEL_HIGH>,
+                                       <0 0 0 4 &gicv2 GIC_SPI 146
                                                        IRQ_TYPE_LEVEL_HIGH>;
                        msi-controller;
                        msi-parent = <&pcie0>;
index d4f3550..f69d2af 100644 (file)
 
                        gpio-controller;
                        #gpio-cells = <2>;
+                       interrupt-controller;
+                       #interrupt-cells = <2>;
                };
 
                pcie0: pcie@12000 {
        i2c0: i2c@18009000 {
                compatible = "brcm,iproc-i2c";
                reg = <0x18009000 0x50>;
-               interrupts = <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
                #address-cells = <1>;
                #size-cells = <0>;
                clock-frequency = <100000>;
index e68fb87..5e56288 100644 (file)
@@ -290,7 +290,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
  */
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
-void flush_dcache_folio(struct folio *folio);
 
 #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
index fc2608b..18f0119 100644 (file)
@@ -33,7 +33,7 @@ extern void __iomem *sdr_ctl_base_addr;
 u32 socfpga_sdram_self_refresh(u32 sdr_base);
 extern unsigned int socfpga_sdram_self_refresh_sz;
 
-extern char secondary_trampoline, secondary_trampoline_end;
+extern char secondary_trampoline[], secondary_trampoline_end[];
 
 extern unsigned long socfpga_cpu1start_addr;
 
index fbb80b8..201191c 100644 (file)
 
 static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-       int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
+       int trampoline_size = secondary_trampoline_end - secondary_trampoline;
 
        if (socfpga_cpu1start_addr) {
                /* This will put CPU #1 into reset. */
                writel(RSTMGR_MPUMODRST_CPU1,
                       rst_manager_base_addr + SOCFPGA_RSTMGR_MODMPURST);
 
-               memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
+               memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
 
                writel(__pa_symbol(secondary_startup),
                       sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff));
@@ -45,12 +45,12 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle)
 
 static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-       int trampoline_size = &secondary_trampoline_end - &secondary_trampoline;
+       int trampoline_size = secondary_trampoline_end - secondary_trampoline;
 
        if (socfpga_cpu1start_addr) {
                writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr +
                       SOCFPGA_A10_RSTMGR_MODMPURST);
-               memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size);
+               memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size);
 
                writel(__pa_symbol(secondary_startup),
                       sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff));
index 3e47273..a960c0b 100644 (file)
                        pinctrl-0 = <&ufs_rst_n &ufs_refclk_out>;
                        phys = <&ufs_0_phy>;
                        phy-names = "ufs-phy";
-                       samsung,sysreg = <&syscon_fsys2>;
-                       samsung,ufs-shareability-reg-offset = <0x710>;
+                       samsung,sysreg = <&syscon_fsys2 0x710>;
                        status = "disabled";
                };
        };
index 347b0cc..1494cfa 100644 (file)
 
 #define HAVE_FUNCTION_GRAPH_FP_TEST
 
+/*
+ * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a
+ * "return address pointer" which can be used to uniquely identify a return
+ * address which has been overwritten.
+ *
+ * On arm64 we use the address of the caller's frame record, which remains the
+ * same for the lifetime of the instrumented function, unlike the return
+ * address in the LR.
+ */
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #else
index a39fcf3..01d47c5 100644 (file)
@@ -91,7 +91,7 @@
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 /* TCR_EL2 Registers bits */
-#define TCR_EL2_RES1           ((1 << 31) | (1 << 23))
+#define TCR_EL2_RES1           ((1U << 31) | (1 << 23))
 #define TCR_EL2_TBI            (1 << 20)
 #define TCR_EL2_PS_SHIFT       16
 #define TCR_EL2_PS_MASK                (7 << TCR_EL2_PS_SHIFT)
 #define CPTR_EL2_TFP_SHIFT 10
 
 /* Hyp Coprocessor Trap Register */
-#define CPTR_EL2_TCPAC (1 << 31)
+#define CPTR_EL2_TCPAC (1U << 31)
 #define CPTR_EL2_TAM   (1 << 30)
 #define CPTR_EL2_TTA   (1 << 20)
 #define CPTR_EL2_TFP   (1 << CPTR_EL2_TFP_SHIFT)
index f4871e4..28acc65 100644 (file)
@@ -41,6 +41,8 @@ void kvm_inject_vabt(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
+
 static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
 {
        return !(vcpu->arch.hcr_el2 & HCR_RW);
index 2a5f7f3..0e75277 100644 (file)
@@ -717,7 +717,6 @@ void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 void kvm_arm_init_debug(void);
 void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
index 8433a20..2372244 100644 (file)
@@ -76,7 +76,7 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
 {
-       VM_BUG_ON(mm != &init_mm);
+       VM_BUG_ON(mm && mm != &init_mm);
        __pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
 }
 
index a4e046e..6564a01 100644 (file)
@@ -47,9 +47,6 @@ struct stack_info {
  * @prev_type:   The type of stack this frame record was on, or a synthetic
  *               value of STACK_TYPE_UNKNOWN. This is used to detect a
  *               transition from one stack to another.
- *
- * @graph:       When FUNCTION_GRAPH_TRACER is selected, holds the index of a
- *               replacement lr value in the ftrace graph stack.
  */
 struct stackframe {
        unsigned long fp;
@@ -57,9 +54,6 @@ struct stackframe {
        DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
        unsigned long prev_fp;
        enum stack_type prev_type;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       int graph;
-#endif
 #ifdef CONFIG_KRETPROBES
        struct llist_node *kr_cur;
 #endif
index 6e2e0b7..3a5ff5e 100644 (file)
@@ -281,12 +281,22 @@ do {                                                                      \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
 } while (0)
 
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
 #define __raw_get_user(x, ptr, err)                                    \
 do {                                                                   \
+       __typeof__(*(ptr)) __user *__rgu_ptr = (ptr);                   \
+       __typeof__(x) __rgu_val;                                        \
        __chk_user_ptr(ptr);                                            \
+                                                                       \
        uaccess_ttbr0_enable();                                         \
-       __raw_get_mem("ldtr", x, ptr, err);                             \
+       __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err);               \
        uaccess_ttbr0_disable();                                        \
+                                                                       \
+       (x) = __rgu_val;                                                \
 } while (0)
 
 #define __get_user_error(x, ptr, err)                                  \
@@ -310,14 +320,22 @@ do {                                                                      \
 
 #define get_user       __get_user
 
+/*
+ * We must not call into the scheduler between __uaccess_enable_tco_async() and
+ * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
 #define __get_kernel_nofault(dst, src, type, err_label)                        \
 do {                                                                   \
+       __typeof__(dst) __gkn_dst = (dst);                              \
+       __typeof__(src) __gkn_src = (src);                              \
        int __gkn_err = 0;                                              \
                                                                        \
        __uaccess_enable_tco_async();                                   \
-       __raw_get_mem("ldr", *((type *)(dst)),                          \
-                     (__force type *)(src), __gkn_err);                \
+       __raw_get_mem("ldr", *((type *)(__gkn_dst)),                    \
+                     (__force type *)(__gkn_src), __gkn_err);          \
        __uaccess_disable_tco_async();                                  \
+                                                                       \
        if (unlikely(__gkn_err))                                        \
                goto err_label;                                         \
 } while (0)
@@ -351,11 +369,19 @@ do {                                                                      \
        }                                                               \
 } while (0)
 
+/*
+ * We must not call into the scheduler between uaccess_ttbr0_enable() and
+ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
+ * we must evaluate these outside of the critical section.
+ */
 #define __raw_put_user(x, ptr, err)                                    \
 do {                                                                   \
-       __chk_user_ptr(ptr);                                            \
+       __typeof__(*(ptr)) __user *__rpu_ptr = (ptr);                   \
+       __typeof__(*(ptr)) __rpu_val = (x);                             \
+       __chk_user_ptr(__rpu_ptr);                                      \
+                                                                       \
        uaccess_ttbr0_enable();                                         \
-       __raw_put_mem("sttr", x, ptr, err);                             \
+       __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err);               \
        uaccess_ttbr0_disable();                                        \
 } while (0)
 
@@ -380,14 +406,22 @@ do {                                                                      \
 
 #define put_user       __put_user
 
+/*
+ * We must not call into the scheduler between __uaccess_enable_tco_async() and
+ * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking
+ * functions, we must evaluate these outside of the critical section.
+ */
 #define __put_kernel_nofault(dst, src, type, err_label)                        \
 do {                                                                   \
+       __typeof__(dst) __pkn_dst = (dst);                              \
+       __typeof__(src) __pkn_src = (src);                              \
        int __pkn_err = 0;                                              \
                                                                        \
        __uaccess_enable_tco_async();                                   \
-       __raw_put_mem("str", *((type *)(src)),                          \
-                     (__force type *)(dst), __pkn_err);                \
+       __raw_put_mem("str", *((type *)(__pkn_src)),                    \
+                     (__force type *)(__pkn_dst), __pkn_err);          \
        __uaccess_disable_tco_async();                                  \
+                                                                       \
        if (unlikely(__pkn_err))                                        \
                goto err_label;                                         \
 } while(0)
index b3e4f9a..8cf970d 100644 (file)
        .endm
 
 SYM_CODE_START(ftrace_regs_caller)
+#ifdef BTI_C
+       BTI_C
+#endif
        ftrace_regs_entry       1
        b       ftrace_common
 SYM_CODE_END(ftrace_regs_caller)
 
 SYM_CODE_START(ftrace_caller)
+#ifdef BTI_C
+       BTI_C
+#endif
        ftrace_regs_entry       0
        b       ftrace_common
 SYM_CODE_END(ftrace_caller)
index fc62dfe..4506c4a 100644 (file)
@@ -244,8 +244,6 @@ void arch_ftrace_update_code(int command)
  * on the way back to parent. For this purpose, this function is called
  * in _mcount() or ftrace_caller() to replace return address (*parent) on
  * the call stack to return_to_handler.
- *
- * Note that @frame_pointer is used only for sanity check later.
  */
 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
                           unsigned long frame_pointer)
@@ -263,8 +261,10 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
         */
        old = *parent;
 
-       if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
+       if (!function_graph_enter(old, self_addr, frame_pointer,
+           (void *)frame_pointer)) {
                *parent = return_hooker;
+       }
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
index 1038494..6fb31c1 100644 (file)
@@ -147,7 +147,7 @@ int machine_kexec_post_load(struct kimage *kimage)
        if (rc)
                return rc;
        kimage->arch.ttbr1 = __pa(trans_pgd);
-       kimage->arch.zero_page = __pa(empty_zero_page);
+       kimage->arch.zero_page = __pa_symbol(empty_zero_page);
 
        reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start;
        memcpy(reloc_code, __relocate_new_kernel_start, reloc_size);
index c30624f..94f83cd 100644 (file)
@@ -38,9 +38,6 @@ void start_backtrace(struct stackframe *frame, unsigned long fp,
 {
        frame->fp = fp;
        frame->pc = pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       frame->graph = 0;
-#endif
 #ifdef CONFIG_KRETPROBES
        frame->kr_cur = NULL;
 #endif
@@ -116,20 +113,23 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
        frame->prev_fp = fp;
        frame->prev_type = info.type;
 
+       frame->pc = ptrauth_strip_insn_pac(frame->pc);
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
        if (tsk->ret_stack &&
-               (ptrauth_strip_insn_pac(frame->pc) == (unsigned long)return_to_handler)) {
-               struct ftrace_ret_stack *ret_stack;
+               (frame->pc == (unsigned long)return_to_handler)) {
+               unsigned long orig_pc;
                /*
                 * This is a case where function graph tracer has
                 * modified a return address (LR) in a stack frame
                 * to hook a function return.
                 * So replace it to an original value.
                 */
-               ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++);
-               if (WARN_ON_ONCE(!ret_stack))
+               orig_pc = ftrace_graph_ret_addr(tsk, NULL, frame->pc,
+                                               (void *)frame->fp);
+               if (WARN_ON_ONCE(frame->pc == orig_pc))
                        return -EINVAL;
-               frame->pc = ret_stack->ret;
+               frame->pc = orig_pc;
        }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 #ifdef CONFIG_KRETPROBES
@@ -137,8 +137,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
                frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur);
 #endif
 
-       frame->pc = ptrauth_strip_insn_pac(frame->pc);
-
        return 0;
 }
 NOKPROBE_SYMBOL(unwind_frame);
index 8ffcbe2..f1f8fc0 100644 (file)
@@ -39,6 +39,7 @@ menuconfig KVM
        select HAVE_KVM_IRQ_BYPASS
        select HAVE_KVM_VCPU_RUN_PID_CHANGE
        select SCHED_INFO
+       select INTERVAL_TREE
        help
          Support hosting virtualized guest machines.
 
index 989bb5d..04a53f7 100644 (file)
@@ -5,14 +5,12 @@
 
 ccflags-y += -I $(srctree)/$(src)
 
-KVM=../../../virt/kvm
+include $(srctree)/virt/kvm/Makefile.kvm
 
 obj-$(CONFIG_KVM) += kvm.o
 obj-$(CONFIG_KVM) += hyp/
 
-kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
-        $(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
-        arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
+kvm-y += arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
         inject_fault.o va_layout.o handle_exit.o \
         guest.o debug.o reset.o sys_regs.o \
         vgic-sys-reg-v3.o fpsimd.o pmu.o \
index 3df67c1..6e542e2 100644 (file)
@@ -467,7 +467,7 @@ out:
 }
 
 /*
- * Schedule the background timer before calling kvm_vcpu_block, so that this
+ * Schedule the background timer before calling kvm_vcpu_halt, so that this
  * thread is removed from its waitqueue and made runnable when there's a timer
  * interrupt to handle.
  */
@@ -649,7 +649,6 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_cpu *timer = vcpu_timer(vcpu);
        struct timer_map map;
-       struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
 
        if (unlikely(!timer->enabled))
                return;
@@ -672,7 +671,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
        if (map.emul_ptimer)
                soft_timer_cancel(&map.emul_ptimer->hrtimer);
 
-       if (rcuwait_active(wait))
+       if (kvm_vcpu_is_blocking(vcpu))
                kvm_timer_blocking(vcpu);
 
        /*
@@ -750,7 +749,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 /* Make the updates of cntvoff for all vtimer contexts atomic */
 static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
 {
-       int i;
+       unsigned long i;
        struct kvm *kvm = vcpu->kvm;
        struct kvm_vcpu *tmp;
 
@@ -1189,8 +1188,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 
 static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
 {
-       int vtimer_irq, ptimer_irq;
-       int i, ret;
+       int vtimer_irq, ptimer_irq, ret;
+       unsigned long i;
 
        vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
        ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
@@ -1297,7 +1296,7 @@ void kvm_timer_init_vhe(void)
 static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
 {
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
index e4727dc..14106a7 100644 (file)
@@ -175,19 +175,11 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
  */
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-       int i;
-
        bitmap_free(kvm->arch.pmu_filter);
 
        kvm_vgic_destroy(kvm);
 
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               if (kvm->vcpus[i]) {
-                       kvm_vcpu_destroy(kvm->vcpus[i]);
-                       kvm->vcpus[i] = NULL;
-               }
-       }
-       atomic_set(&kvm->online_vcpus, 0);
+       kvm_destroy_vcpus(kvm);
 }
 
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -368,27 +360,12 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
-       /*
-        * If we're about to block (most likely because we've just hit a
-        * WFI), we need to sync back the state of the GIC CPU interface
-        * so that we have the latest PMR and group enables. This ensures
-        * that kvm_arch_vcpu_runnable has up-to-date data to decide
-        * whether we have pending interrupts.
-        *
-        * For the same reason, we want to tell GICv4 that we need
-        * doorbells to be signalled, should an interrupt become pending.
-        */
-       preempt_disable();
-       kvm_vgic_vmcr_sync(vcpu);
-       vgic_v4_put(vcpu, true);
-       preempt_enable();
+
 }
 
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 {
-       preempt_disable();
-       vgic_v4_load(vcpu);
-       preempt_enable();
+
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -639,7 +616,7 @@ bool kvm_arch_intc_initialized(struct kvm *kvm)
 
 void kvm_arm_halt_guest(struct kvm *kvm)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm)
@@ -649,12 +626,12 @@ void kvm_arm_halt_guest(struct kvm *kvm)
 
 void kvm_arm_resume_guest(struct kvm *kvm)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                vcpu->arch.pause = false;
-               rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
+               __kvm_vcpu_wake_up(vcpu);
        }
 }
 
@@ -679,6 +656,39 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
        smp_rmb();
 }
 
+/**
+ * kvm_vcpu_wfi - emulate Wait-For-Interrupt behavior
+ * @vcpu:      The VCPU pointer
+ *
+ * Suspend execution of a vCPU until a valid wake event is detected, i.e. until
+ * the vCPU is runnable.  The vCPU may or may not be scheduled out, depending
+ * on when a wake event arrives, e.g. there may already be a pending wake event.
+ */
+void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Sync back the state of the GIC CPU interface so that we have
+        * the latest PMR and group enables. This ensures that
+        * kvm_arch_vcpu_runnable has up-to-date data to decide whether
+        * we have pending interrupts, e.g. when determining if the
+        * vCPU should block.
+        *
+        * For the same reason, we want to tell GICv4 that we need
+        * doorbells to be signalled, should an interrupt become pending.
+        */
+       preempt_disable();
+       kvm_vgic_vmcr_sync(vcpu);
+       vgic_v4_put(vcpu, true);
+       preempt_enable();
+
+       kvm_vcpu_halt(vcpu);
+       kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+
+       preempt_disable();
+       vgic_v4_load(vcpu);
+       preempt_enable();
+}
+
 static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.target >= 0;
@@ -2035,7 +2045,7 @@ static int finalize_hyp_mode(void)
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
 {
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        mpidr &= MPIDR_HWID_BITMASK;
        kvm_for_each_vcpu(i, vcpu, kvm) {
index 275a273..6d0baf7 100644 (file)
@@ -82,7 +82,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu)
  *
  * WFE: Yield the CPU and come back to this vcpu when the scheduler
  * decides to.
- * WFI: Simply call kvm_vcpu_block(), which will halt execution of
+ * WFI: Simply call kvm_vcpu_halt(), which will halt execution of
  * world-switches and schedule other host processes until there is an
  * incoming IRQ or FIQ to the VM.
  */
@@ -95,8 +95,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu)
        } else {
                trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
                vcpu->stat.wfi_exit_stat++;
-               kvm_vcpu_block(vcpu);
-               kvm_clear_request(KVM_REQ_UNHALT, vcpu);
+               kvm_vcpu_wfi(vcpu);
        }
 
        kvm_incr_pc(vcpu);
index 7a0af1d..96c5f3f 100644 (file)
@@ -403,6 +403,8 @@ typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *);
 
 static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu);
 
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code);
+
 /*
  * Allow the hypervisor to handle the exit with an exit handler if it has one.
  *
@@ -429,6 +431,18 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
  */
 static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
+       /*
+        * Save PSTATE early so that we can evaluate the vcpu mode
+        * early on.
+        */
+       vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR);
+
+       /*
+        * Check whether we want to repaint the state one way or
+        * another.
+        */
+       early_exit_filter(vcpu, exit_code);
+
        if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
                vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
 
index de7e14c..7ecca8b 100644 (file)
@@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
 {
        ctxt->regs.pc                   = read_sysreg_el2(SYS_ELR);
-       ctxt->regs.pstate               = read_sysreg_el2(SYS_SPSR);
+       /*
+        * Guest PSTATE gets saved at guest fixup time in all
+        * cases. We still need to handle the nVHE host side here.
+        */
+       if (!has_vhe() && ctxt->__hyp_running_vcpu)
+               ctxt->regs.pstate       = read_sysreg_el2(SYS_SPSR);
 
        if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
                ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2);
index c0e3fed..d13115a 100644 (file)
@@ -233,7 +233,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
  * Returns false if the guest ran in AArch32 when it shouldn't have, and
  * thus should exit to the host, or true if a the guest run loop can continue.
  */
-static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
        struct kvm *kvm = kern_hyp_va(vcpu->kvm);
 
@@ -248,10 +248,7 @@ static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code)
                vcpu->arch.target = -1;
                *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
                *exit_code |= ARM_EXCEPTION_IL;
-               return false;
        }
-
-       return true;
 }
 
 /* Switch to the guest for legacy non-VHE systems */
@@ -316,9 +313,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
                /* Jump in the fire! */
                exit_code = __guest_enter(vcpu);
 
-               if (unlikely(!handle_aarch32_guest(vcpu, &exit_code)))
-                       break;
-
                /* And we're baaack! */
        } while (fixup_guest_exit(vcpu, &exit_code));
 
index 5a2cb5d..fbb26b9 100644 (file)
@@ -112,6 +112,10 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
        return hyp_exit_handlers;
 }
 
+static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+}
+
 /* Switch to the guest for VHE systems running in EL2 */
 static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 {
index 326cdfe..e65acf3 100644 (file)
@@ -210,13 +210,13 @@ static void stage2_flush_vm(struct kvm *kvm)
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
-       int idx;
+       int idx, bkt;
 
        idx = srcu_read_lock(&kvm->srcu);
        spin_lock(&kvm->mmu_lock);
 
        slots = kvm_memslots(kvm);
-       kvm_for_each_memslot(memslot, slots)
+       kvm_for_each_memslot(memslot, bkt, slots)
                stage2_flush_memslot(kvm, memslot);
 
        spin_unlock(&kvm->mmu_lock);
@@ -595,14 +595,14 @@ void stage2_unmap_vm(struct kvm *kvm)
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
-       int idx;
+       int idx, bkt;
 
        idx = srcu_read_lock(&kvm->srcu);
        mmap_read_lock(current->mm);
        spin_lock(&kvm->mmu_lock);
 
        slots = kvm_memslots(kvm);
-       kvm_for_each_memslot(memslot, slots)
+       kvm_for_each_memslot(memslot, bkt, slots)
                stage2_unmap_memslot(kvm, memslot);
 
        spin_unlock(&kvm->mmu_lock);
@@ -1463,7 +1463,6 @@ out:
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-                                  const struct kvm_userspace_memory_region *mem,
                                   struct kvm_memory_slot *old,
                                   const struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
@@ -1473,25 +1472,24 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
         * allocated dirty_bitmap[], dirty pages will be tracked while the
         * memory slot is write protected.
         */
-       if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+       if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
                /*
                 * If we're with initial-all-set, we don't need to write
                 * protect any pages because they're all reported as dirty.
                 * Huge pages and normal pages will be write protect gradually.
                 */
                if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) {
-                       kvm_mmu_wp_memory_region(kvm, mem->slot);
+                       kvm_mmu_wp_memory_region(kvm, new->id);
                }
        }
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-                                  struct kvm_memory_slot *memslot,
-                                  const struct kvm_userspace_memory_region *mem,
+                                  const struct kvm_memory_slot *old,
+                                  struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
 {
-       hva_t hva = mem->userspace_addr;
-       hva_t reg_end = hva + mem->memory_size;
+       hva_t hva, reg_end;
        int ret = 0;
 
        if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1502,9 +1500,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
         * Prevent userspace from creating a memory region outside of the IPA
         * space addressable by the KVM guest IPA space.
         */
-       if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
+       if ((new->base_gfn + new->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
                return -EFAULT;
 
+       hva = new->userspace_addr;
+       reg_end = hva + (new->npages << PAGE_SHIFT);
+
        mmap_read_lock(current->mm);
        /*
         * A memory region could potentially cover multiple VMAs, and any holes
@@ -1536,7 +1537,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
                if (vma->vm_flags & VM_PFNMAP) {
                        /* IO region dirty page logging not allowed */
-                       if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+                       if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
                                ret = -EINVAL;
                                break;
                        }
index a5e4bbf..0404357 100644 (file)
@@ -900,7 +900,7 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
  */
 static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
index 74c47d4..ad6c9ef 100644 (file)
@@ -46,7 +46,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
         * specification (ARM DEN 0022A). This means all suspend states
         * for KVM will preserve the register state.
         */
-       kvm_vcpu_block(vcpu);
+       kvm_vcpu_halt(vcpu);
        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
 
        return PSCI_RET_SUCCESS;
@@ -121,8 +121,8 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 
 static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 {
-       int i, matching_cpus = 0;
-       unsigned long mpidr;
+       int matching_cpus = 0;
+       unsigned long i, mpidr;
        unsigned long target_affinity;
        unsigned long target_affinity_mask;
        unsigned long lowest_affinity_level;
@@ -164,7 +164,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *tmp;
 
        /*
index 426bd7f..97de30a 100644 (file)
@@ -170,7 +170,7 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
 {
        struct kvm_vcpu *tmp;
        bool is32bit;
-       int i;
+       unsigned long i;
 
        is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
        if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
index 0a06d06..a7382bd 100644 (file)
@@ -70,8 +70,9 @@ void kvm_vgic_early_init(struct kvm *kvm)
  */
 int kvm_vgic_create(struct kvm *kvm, u32 type)
 {
-       int i, ret;
        struct kvm_vcpu *vcpu;
+       unsigned long i;
+       int ret;
 
        if (irqchip_in_kernel(kvm))
                return -EEXIST;
@@ -255,7 +256,8 @@ int vgic_init(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
        struct kvm_vcpu *vcpu;
-       int ret = 0, i, idx;
+       int ret = 0, i;
+       unsigned long idx;
 
        if (vgic_initialized(kvm))
                return 0;
@@ -308,7 +310,7 @@ int vgic_init(struct kvm *kvm)
                        goto out;
        }
 
-       kvm_for_each_vcpu(i, vcpu, kvm)
+       kvm_for_each_vcpu(idx, vcpu, kvm)
                kvm_vgic_vcpu_enable(vcpu);
 
        ret = kvm_vgic_setup_default_irq_routing(kvm);
@@ -370,7 +372,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
 static void __kvm_vgic_destroy(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        vgic_debug_destroy(kvm);
 
index 0d000d2..c6d52a1 100644 (file)
@@ -325,7 +325,7 @@ void unlock_all_vcpus(struct kvm *kvm)
 bool lock_all_vcpus(struct kvm *kvm)
 {
        struct kvm_vcpu *tmp_vcpu;
-       int c;
+       unsigned long c;
 
        /*
         * Any time a vcpu is run, vcpu_load is called which tries to grab the
index 5f9014a..12e4c22 100644 (file)
@@ -113,9 +113,8 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
        int intid = val & 0xf;
        int targets = (val >> 16) & 0xff;
        int mode = (val >> 24) & 0x03;
-       int c;
        struct kvm_vcpu *vcpu;
-       unsigned long flags;
+       unsigned long flags, c;
 
        switch (mode) {
        case 0x0:               /* as specified by targets */
index bf7ec4a..82906cb 100644 (file)
@@ -754,7 +754,8 @@ static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
 static int vgic_register_all_redist_iodevs(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
-       int c, ret = 0;
+       unsigned long c;
+       int ret = 0;
 
        kvm_for_each_vcpu(c, vcpu, kvm) {
                ret = vgic_register_redist_iodev(vcpu);
@@ -995,10 +996,10 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
        struct kvm_vcpu *c_vcpu;
        u16 target_cpus;
        u64 mpidr;
-       int sgi, c;
+       int sgi;
        int vcpu_id = vcpu->vcpu_id;
        bool broadcast;
-       unsigned long flags;
+       unsigned long c, flags;
 
        sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
        broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
index 04f62c4..5fedaee 100644 (file)
@@ -542,13 +542,13 @@ int vgic_v3_map_resources(struct kvm *kvm)
        struct vgic_dist *dist = &kvm->arch.vgic;
        struct kvm_vcpu *vcpu;
        int ret = 0;
-       int c;
+       unsigned long c;
 
        kvm_for_each_vcpu(c, vcpu, kvm) {
                struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 
                if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
-                       kvm_debug("vcpu %d redistributor base not set\n", c);
+                       kvm_debug("vcpu %ld redistributor base not set\n", c);
                        return -ENXIO;
                }
        }
index 772dd15..ad06ba6 100644 (file)
@@ -189,7 +189,7 @@ void vgic_v4_configure_vsgis(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        kvm_arm_halt_guest(kvm);
 
@@ -235,7 +235,8 @@ int vgic_v4_init(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
        struct kvm_vcpu *vcpu;
-       int i, nr_vcpus, ret;
+       int nr_vcpus, ret;
+       unsigned long i;
 
        if (!kvm_vgic_global_state.has_gicv4)
                return 0; /* Nothing to see here... move along. */
index 5dad499..9b98876 100644 (file)
@@ -990,7 +990,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 void vgic_kick_vcpus(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
-       int c;
+       unsigned long c;
 
        /*
         * We've injected an interrupt, time to find out who deserves
index 6fea184..707ae12 100644 (file)
 446    common  landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    common  process_mrelease                sys_process_mrelease
+449    common  futex_waitv                     sys_futex_waitv
index 8ab4662..1ac55e7 100644 (file)
@@ -250,7 +250,6 @@ static inline void __flush_page_to_ram(void *vaddr)
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 #define flush_dcache_page(page)                __flush_page_to_ram(page_address(page))
-void flush_dcache_folio(struct folio *folio);
 #define flush_dcache_mmap_lock(mapping)                do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)      do { } while (0)
 #define flush_icache_page(vma, page)   __flush_page_to_ram(page_address(page))
index 7976dff..45bc32a 100644 (file)
 446    common  landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    common  process_mrelease                sys_process_mrelease
+449    common  futex_waitv                     sys_futex_waitv
index 6b0e113..2204bde 100644 (file)
 446    common  landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    common  process_mrelease                sys_process_mrelease
+449    common  futex_waitv                     sys_futex_waitv
index de60ad1..0215dc1 100644 (file)
@@ -3097,7 +3097,7 @@ config STACKTRACE_SUPPORT
 config PGTABLE_LEVELS
        int
        default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48
-       default 3 if 64BIT && !PAGE_SIZE_64KB
+       default 3 if 64BIT && (!PAGE_SIZE_64KB || MIPS_VA_BITS_48)
        default 2
 
 config MIPS_AUTO_PFN_OFFSET
index 2861a05..f27cf31 100644 (file)
@@ -52,7 +52,7 @@ endif
 
 vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
 
-vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o
+vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o $(obj)/ashldi3.o
 
 targets := $(notdir $(vmlinuzobjs-y))
 
index f207388..b3dc9c5 100644 (file)
@@ -61,8 +61,6 @@ static inline void flush_dcache_page(struct page *page)
                SetPageDcacheDirty(page);
 }
 
-void flush_dcache_folio(struct folio *folio);
-
 #define flush_dcache_mmap_lock(mapping)                do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)      do { } while (0)
 
index 696f6b0..72b90d4 100644 (file)
@@ -897,7 +897,6 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
 int kvm_arch_flush_remote_tlb(struct kvm *kvm);
index ac0e2cf..24a529c 100644 (file)
@@ -1734,8 +1734,6 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c)
 
 static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 {
-       decode_configs(c);
-
        /* All Loongson processors covered here define ExcCode 16 as GSExc. */
        c->options |= MIPS_CPU_GSEXCEX;
 
@@ -1796,6 +1794,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
                panic("Unknown Loongson Processor ID!");
                break;
        }
+
+       decode_configs(c);
 }
 #else
 static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { }
index 376a6e2..9f47a88 100644 (file)
@@ -185,7 +185,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                seq_puts(m, " tx39_cache");
        if (cpu_has_octeon_cache)
                seq_puts(m, " octeon_cache");
-       if (cpu_has_fpu)
+       if (raw_cpu_has_fpu)
                seq_puts(m, " fpu");
        if (cpu_has_32fpr)
                seq_puts(m, " 32fpr");
index a772974..91d197b 100644 (file)
@@ -27,6 +27,7 @@ config KVM
        select KVM_MMIO
        select MMU_NOTIFIER
        select SRCU
+       select INTERVAL_TREE
        help
          Support for hosting Guest kernels.
 
index d371095..21ff75b 100644 (file)
@@ -2,9 +2,10 @@
 # Makefile for KVM support for MIPS
 #
 
+include $(srctree)/virt/kvm/Makefile.kvm
+
 ccflags-y += -Ivirt/kvm -Iarch/mips/kvm
 
-kvm-y := $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
 kvm-$(CONFIG_CPU_HAS_MSA) += msa.o
 
 kvm-y +=    mips.o emulate.o entry.o \
index 22e745e..b494d8d 100644 (file)
@@ -952,7 +952,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
        if (!vcpu->arch.pending_exceptions) {
                kvm_vz_lose_htimer(vcpu);
                vcpu->arch.wait = 1;
-               kvm_vcpu_block(vcpu);
+               kvm_vcpu_halt(vcpu);
 
                /*
                 * We we are runnable, then definitely go off to user space to
index 3681fc8..5d53f32 100644 (file)
@@ -120,7 +120,7 @@ static int loongson_vipi_write(struct loongson_kvm_ipi *ipi,
                s->status |= data;
                irq.cpu = id;
                irq.irq = 6;
-               kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
+               kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq);
                break;
 
        case CORE0_CLEAR_OFF:
@@ -128,7 +128,7 @@ static int loongson_vipi_write(struct loongson_kvm_ipi *ipi,
                if (!s->status) {
                        irq.cpu = id;
                        irq.irq = -6;
-                       kvm_vcpu_ioctl_interrupt(kvm->vcpus[id], &irq);
+                       kvm_vcpu_ioctl_interrupt(kvm_get_vcpu(kvm, id), &irq);
                }
                break;
 
index aa20d07..e59cb62 100644 (file)
@@ -171,25 +171,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        return 0;
 }
 
-void kvm_mips_free_vcpus(struct kvm *kvm)
-{
-       unsigned int i;
-       struct kvm_vcpu *vcpu;
-
-       kvm_for_each_vcpu(i, vcpu, kvm) {
-               kvm_vcpu_destroy(vcpu);
-       }
-
-       mutex_lock(&kvm->lock);
-
-       for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
-               kvm->vcpus[i] = NULL;
-
-       atomic_set(&kvm->online_vcpus, 0);
-
-       mutex_unlock(&kvm->lock);
-}
-
 static void kvm_mips_free_gpa_pt(struct kvm *kvm)
 {
        /* It should always be safe to remove after flushing the whole range */
@@ -199,7 +180,7 @@ static void kvm_mips_free_gpa_pt(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-       kvm_mips_free_vcpus(kvm);
+       kvm_destroy_vcpus(kvm);
        kvm_mips_free_gpa_pt(kvm);
 }
 
@@ -233,25 +214,20 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-                                  struct kvm_memory_slot *memslot,
-                                  const struct kvm_userspace_memory_region *mem,
+                                  const struct kvm_memory_slot *old,
+                                  struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
 {
        return 0;
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-                                  const struct kvm_userspace_memory_region *mem,
                                   struct kvm_memory_slot *old,
                                   const struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
 {
        int needs_flush;
 
-       kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n",
-                 __func__, kvm, mem->slot, mem->guest_phys_addr,
-                 mem->memory_size, mem->userspace_addr);
-
        /*
         * If dirty page logging is enabled, write protect all pages in the slot
         * ready for dirty logging.
@@ -498,7 +474,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
        if (irq->cpu == -1)
                dvcpu = vcpu;
        else
-               dvcpu = vcpu->kvm->vcpus[irq->cpu];
+               dvcpu = kvm_get_vcpu(vcpu->kvm, irq->cpu);
 
        if (intr == 2 || intr == 3 || intr == 4 || intr == 6) {
                kvm_mips_callbacks->queue_io_int(dvcpu, irq);
index 3fc0bb7..c2a222e 100644 (file)
@@ -27,7 +27,6 @@ void flush_cache_vunmap(unsigned long start, unsigned long end);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
                       unsigned long vaddr, void *dst, void *src, int len);
 void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
index 1999561..d0b71dd 100644 (file)
@@ -29,7 +29,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
        unsigned long pfn);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
index 8db4af4..82d77f4 100644 (file)
 # Mike Shaver, Helge Deller and Martin K. Petersen
 #
 
+ifdef CONFIG_PARISC_SELF_EXTRACT
+boot := arch/parisc/boot
+KBUILD_IMAGE := $(boot)/bzImage
+else
 KBUILD_IMAGE := vmlinuz
+endif
 
 NM             = sh $(srctree)/arch/parisc/nm
 CHECKFLAGS     += -D__hppa__=1
index d2daeac..1b8fd80 100644 (file)
@@ -1,7 +1,9 @@
 CONFIG_LOCALVERSION="-64bit"
 # CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_KERNEL_LZ4=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
 CONFIG_BSD_PROCESS_ACCT=y
 CONFIG_BSD_PROCESS_ACCT_V3=y
 CONFIG_TASKSTATS=y
@@ -35,6 +37,7 @@ CONFIG_MODVERSIONS=y
 CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
+CONFIG_MEMORY_FAILURE=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -65,12 +68,15 @@ CONFIG_SCSI_ISCSI_ATTRS=y
 CONFIG_SCSI_SRP_ATTRS=y
 CONFIG_ISCSI_BOOT_SYSFS=y
 CONFIG_SCSI_MPT2SAS=y
-CONFIG_SCSI_LASI700=m
+CONFIG_SCSI_LASI700=y
 CONFIG_SCSI_SYM53C8XX_2=y
 CONFIG_SCSI_ZALON=y
 CONFIG_SCSI_QLA_ISCSI=m
 CONFIG_SCSI_DH=y
 CONFIG_ATA=y
+CONFIG_SATA_SIL=y
+CONFIG_SATA_SIS=y
+CONFIG_SATA_VIA=y
 CONFIG_PATA_NS87415=y
 CONFIG_PATA_SIL680=y
 CONFIG_ATA_GENERIC=y
@@ -79,6 +85,7 @@ CONFIG_MD_LINEAR=m
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_RAID=m
 CONFIG_DM_UEVENT=y
+CONFIG_DM_AUDIT=y
 CONFIG_FUSION=y
 CONFIG_FUSION_SPI=y
 CONFIG_FUSION_SAS=y
@@ -196,10 +203,15 @@ CONFIG_FB_MATROX_G=y
 CONFIG_FB_MATROX_I2C=y
 CONFIG_FB_MATROX_MAVEN=y
 CONFIG_FB_RADEON=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_CLUT224 is not set
 CONFIG_HIDRAW=y
 CONFIG_HID_PID=y
 CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_UIO=y
 CONFIG_UIO_PDRV_GENIRQ=m
 CONFIG_UIO_AEC=m
index 39e7985..6d13ae2 100644 (file)
        extrd,u \r, 63-(\sa), 64-(\sa), \t
        .endm
 
+       /* Extract unsigned for 32- and 64-bit
+        * The extru instruction leaves the most significant 32 bits of the
+        * target register in an undefined state on PA 2.0 systems. */
+       .macro extru_safe r, p, len, t
+#ifdef CONFIG_64BIT
+       extrd,u \r, 32+(\p), \len, \t
+#else
+       extru   \r, \p, \len, \t
+#endif
+       .endm
+
        /* load 32-bit 'value' into 'reg' compensating for the ldil
         * sign-extension when running in wide mode.
         * WARNING!! neither 'value' nor 'reg' can be expressions
index da0cd4b..859b8a3 100644 (file)
@@ -50,7 +50,6 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 
 #define flush_dcache_mmap_lock(mapping)                xa_lock_irq(&mapping->i_pages)
 #define flush_dcache_mmap_unlock(mapping)      xa_unlock_irq(&mapping->i_pages)
index 056d588..70d3cff 100644 (file)
@@ -39,6 +39,7 @@ verify "$3"
 if [ -n "${INSTALLKERNEL}" ]; then
   if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
   if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+  if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi
 fi
 
 # Default install
index 88c188a..6e9cdb2 100644 (file)
         */
        .macro          L2_ptep pmd,pte,index,va,fault
 #if CONFIG_PGTABLE_LEVELS == 3
-       extru           \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
+       extru_safe      \va,31-ASM_PMD_SHIFT,ASM_BITS_PER_PMD,\index
 #else
-# if defined(CONFIG_64BIT)
-       extrd,u         \va,63-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-  #else
-  # if PAGE_SIZE > 4096
-       extru           \va,31-ASM_PGDIR_SHIFT,32-ASM_PGDIR_SHIFT,\index
-  # else
-       extru           \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
-  # endif
-# endif
+       extru_safe      \va,31-ASM_PGDIR_SHIFT,ASM_BITS_PER_PGD,\index
 #endif
        dep             %r0,31,PAGE_SHIFT,\pmd  /* clear offset */
 #if CONFIG_PGTABLE_LEVELS < 3
        bb,>=,n         \pmd,_PxD_PRESENT_BIT,\fault
        dep             %r0,31,PxD_FLAG_SHIFT,\pmd /* clear flags */
        SHLREG          \pmd,PxD_VALUE_SHIFT,\pmd
-       extru           \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
+       extru_safe      \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
        dep             %r0,31,PAGE_SHIFT,\pmd  /* clear offset */
        shladd          \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
        .endm
index 4fb3b6a..d2497b3 100644 (file)
@@ -566,7 +566,7 @@ lws_compare_and_swap:
        ldo     R%lws_lock_start(%r20), %r28
 
        /* Extract eight bits from r26 and hash lock (Bits 3-11) */
-       extru  %r26, 28, 8, %r20
+       extru_safe  %r26, 28, 8, %r20
 
        /* Find lock to use, the hash is either one of 0 to
           15, multiplied by 16 (keep it 16-byte aligned)
@@ -751,7 +751,7 @@ cas2_lock_start:
        ldo     R%lws_lock_start(%r20), %r28
 
        /* Extract eight bits from r26 and hash lock (Bits 3-11) */
-       extru  %r26, 28, 8, %r20
+       extru_safe  %r26, 28, 8, %r20
 
        /* Find lock to use, the hash is either one of 0 to
           15, multiplied by 16 (keep it 16-byte aligned)
index 9fb1e79..061119a 100644 (file)
@@ -249,30 +249,16 @@ void __init time_init(void)
 static int __init init_cr16_clocksource(void)
 {
        /*
-        * The cr16 interval timers are not syncronized across CPUs on
-        * different sockets, so mark them unstable and lower rating on
-        * multi-socket SMP systems.
+        * The cr16 interval timers are not syncronized across CPUs, even if
+        * they share the same socket.
         */
        if (num_online_cpus() > 1 && !running_on_qemu) {
-               int cpu;
-               unsigned long cpu0_loc;
-               cpu0_loc = per_cpu(cpu_data, 0).cpu_loc;
-
-               for_each_online_cpu(cpu) {
-                       if (cpu == 0)
-                               continue;
-                       if ((cpu0_loc != 0) &&
-                           (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc))
-                               continue;
-
-                       /* mark sched_clock unstable */
-                       clear_sched_clock_stable();
-
-                       clocksource_cr16.name = "cr16_unstable";
-                       clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
-                       clocksource_cr16.rating = 0;
-                       break;
-               }
+               /* mark sched_clock unstable */
+               clear_sched_clock_stable();
+
+               clocksource_cr16.name = "cr16_unstable";
+               clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
+               clocksource_cr16.rating = 0;
        }
 
        /* register at clocksource framework */
index 3d208af..2769eb9 100644 (file)
@@ -57,8 +57,6 @@ SECTIONS
 {
        . = KERNEL_BINARY_TEXT_START;
 
-       _stext = .;     /* start of kernel text, includes init code & data */
-
        __init_begin = .;
        HEAD_TEXT_SECTION
        MLONGCALL_DISCARD(INIT_TEXT_SECTION(8))
@@ -82,6 +80,7 @@ SECTIONS
        /* freed after init ends here */
 
        _text = .;              /* Text and read-only data */
+       _stext = .;
        MLONGCALL_KEEP(INIT_TEXT_SECTION(8))
        .text ALIGN(PAGE_SIZE) : {
                TEXT_TEXT
index 2228238..41b8a1e 100644 (file)
@@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
                                        bool preserve_nv) { }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-void kvmhv_save_host_pmu(void);
-void kvmhv_load_host_pmu(void);
-void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
-void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-
 void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
 
 long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
index fbbf3ce..d68d719 100644 (file)
@@ -79,6 +79,7 @@
 #define BOOK3S_INTERRUPT_FP_UNAVAIL    0x800
 #define BOOK3S_INTERRUPT_DECREMENTER   0x900
 #define BOOK3S_INTERRUPT_HV_DECREMENTER        0x980
+#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER 0x1980
 #define BOOK3S_INTERRUPT_DOORBELL      0xa00
 #define BOOK3S_INTERRUPT_SYSCALL       0xc00
 #define BOOK3S_INTERRUPT_TRACE         0xd00
index 3d31f2c..91c9f93 100644 (file)
@@ -406,6 +406,12 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
        return vcpu->arch.fault_dar;
 }
 
+/* Expiry time of vcpu DEC relative to host TB */
+static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset;
+}
+
 static inline bool is_kvmppc_resume_guest(int r)
 {
        return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
index fff391b..fe07558 100644 (file)
@@ -44,7 +44,6 @@ struct kvm_nested_guest {
        struct mutex tlb_lock;          /* serialize page faults and tlbies */
        struct kvm_nested_guest *next;
        cpumask_t need_tlb_flush;
-       cpumask_t cpu_in_guest;
        short prev_cpu[NR_CPUS];
        u8 radix;                       /* is this nested guest radix */
 };
@@ -154,7 +153,9 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
        return radix;
 }
 
-int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr);
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb);
 
 #define KVM_DEFAULT_HPT_ORDER  24      /* 16MB HPT by default */
 #endif
index e4d2319..a770443 100644 (file)
@@ -287,7 +287,6 @@ struct kvm_arch {
        u32 online_vcores;
        atomic_t hpte_mod_interest;
        cpumask_t need_tlb_flush;
-       cpumask_t cpu_in_guest;
        u8 radix;
        u8 fwnmi_enabled;
        u8 secure_guest;
@@ -579,6 +578,10 @@ struct kvm_vcpu_arch {
        ulong cfar;
        ulong ppr;
        u32 pspb;
+       u8 load_ebb;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       u8 load_tm;
+#endif
        ulong fscr;
        ulong shadow_fscr;
        ulong ebbhr;
@@ -741,7 +744,7 @@ struct kvm_vcpu_arch {
 
        struct hrtimer dec_timer;
        u64 dec_jiffies;
-       u64 dec_expires;
+       u64 dec_expires;        /* Relative to guest timebase. */
        unsigned long pending_exceptions;
        u8 ceded;
        u8 prodded;
@@ -749,6 +752,7 @@ struct kvm_vcpu_arch {
        u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
        u32 last_inst;
 
+       struct rcuwait wait;
        struct rcuwait *waitp;
        struct kvmppc_vcore *vcore;
        int ret;
@@ -864,6 +868,5 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 #endif /* __POWERPC_KVM_HOST_H__ */
index 671fbd1..a14dbcd 100644 (file)
@@ -200,12 +200,11 @@ extern void kvmppc_core_destroy_vm(struct kvm *kvm);
 extern void kvmppc_core_free_memslot(struct kvm *kvm,
                                     struct kvm_memory_slot *slot);
 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-                               struct kvm_memory_slot *memslot,
-                               const struct kvm_userspace_memory_region *mem,
+                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *new,
                                enum kvm_mr_change change);
 extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
-                               const struct kvm_userspace_memory_region *mem,
-                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change);
 extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
@@ -274,12 +273,11 @@ struct kvmppc_ops {
        int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
        void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
        int (*prepare_memory_region)(struct kvm *kvm,
-                                    struct kvm_memory_slot *memslot,
-                                    const struct kvm_userspace_memory_region *mem,
+                                    const struct kvm_memory_slot *old,
+                                    struct kvm_memory_slot *new,
                                     enum kvm_mr_change change);
        void (*commit_memory_region)(struct kvm *kvm,
-                                    const struct kvm_userspace_memory_region *mem,
-                                    const struct kvm_memory_slot *old,
+                                    struct kvm_memory_slot *old,
                                     const struct kvm_memory_slot *new,
                                     enum kvm_mr_change change);
        bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
@@ -552,8 +550,7 @@ extern void kvm_hv_vm_activated(void);
 extern void kvm_hv_vm_deactivated(void);
 extern bool kvm_hv_mode_active(void);
 
-extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
-                                       struct kvm_nested_guest *nested);
+extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu);
 
 #else
 static inline void __init kvm_cma_reserve(void)
@@ -760,6 +757,7 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
 void kvmppc_subcore_enter_guest(void);
 void kvmppc_subcore_exit_guest(void);
 long kvmppc_realmode_hmi_handler(void);
+long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu);
 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
                     long pte_index, unsigned long pteh, unsigned long ptel);
 long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
index 9d1fbd8..1f43ef6 100644 (file)
@@ -112,6 +112,9 @@ static inline void clear_task_ebb(struct task_struct *t)
 #endif
 }
 
+void kvmppc_save_user_regs(void);
+void kvmppc_save_current_sprs(void);
+
 extern int set_thread_tidr(struct task_struct *t);
 
 #endif /* _ASM_POWERPC_SWITCH_TO_H */
index 8c2c3dd..924b215 100644 (file)
@@ -18,6 +18,8 @@
 #include <asm/vdso/timebase.h>
 
 /* time.c */
+extern u64 decrementer_max;
+
 extern unsigned long tb_ticks_per_jiffy;
 extern unsigned long tb_ticks_per_usec;
 extern unsigned long tb_ticks_per_sec;
@@ -97,19 +99,16 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
 extern void secondary_cpu_time_init(void);
 extern void __init time_init(void);
 
-#ifdef CONFIG_PPC64
-static inline unsigned long test_irq_work_pending(void)
-{
-       unsigned long x;
+DECLARE_PER_CPU(u64, decrementers_next_tb);
 
-       asm volatile("lbz %0,%1(13)"
-               : "=r" (x)
-               : "i" (offsetof(struct paca_struct, irq_work_pending)));
-       return x;
+static inline u64 timer_get_next_tb(void)
+{
+       return __this_cpu_read(decrementers_next_tb);
 }
-#endif
 
-DECLARE_PER_CPU(u64, decrementers_next_tb);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now);
+#endif
 
 /* Convert timebase ticks to nanoseconds */
 unsigned long long tb_to_ns(unsigned long long tb_ticks);
index 3cca88e..3dc61e2 100644 (file)
@@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
 static void init_PMU(void)
 {
        mtspr(SPRN_MMCRA, 0);
-       mtspr(SPRN_MMCR0, 0);
+       mtspr(SPRN_MMCR0, MMCR0_FC);
        mtspr(SPRN_MMCR1, 0);
        mtspr(SPRN_MMCR2, 0);
 }
@@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
 {
        mtspr(SPRN_MMCR3, 0);
        mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
-       mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+       mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
 }
 
 /*
@@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
                return;
 
        mtspr(SPRN_LPID, 0);
+       mtspr(SPRN_AMOR, ~0);
        mtspr(SPRN_PCR, PCR_MASK);
        init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
 }
@@ -150,6 +151,7 @@ void __restore_cpu_power7(void)
                return;
 
        mtspr(SPRN_LPID, 0);
+       mtspr(SPRN_AMOR, ~0);
        mtspr(SPRN_PCR, PCR_MASK);
        init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
 }
@@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
                return;
 
        mtspr(SPRN_LPID, 0);
+       mtspr(SPRN_AMOR, ~0);
        mtspr(SPRN_PCR, PCR_MASK);
        init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
        init_HFSCR();
@@ -184,6 +187,7 @@ void __restore_cpu_power8(void)
                return;
 
        mtspr(SPRN_LPID, 0);
+       mtspr(SPRN_AMOR, ~0);
        mtspr(SPRN_PCR, PCR_MASK);
        init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
        init_HFSCR();
@@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
        mtspr(SPRN_PSSCR, 0);
        mtspr(SPRN_LPID, 0);
        mtspr(SPRN_PID, 0);
+       mtspr(SPRN_AMOR, ~0);
        mtspr(SPRN_PCR, PCR_MASK);
        init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
                         LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -223,6 +228,7 @@ void __restore_cpu_power9(void)
        mtspr(SPRN_PSSCR, 0);
        mtspr(SPRN_LPID, 0);
        mtspr(SPRN_PID, 0);
+       mtspr(SPRN_AMOR, ~0);
        mtspr(SPRN_PCR, PCR_MASK);
        init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
                         LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
        mtspr(SPRN_PSSCR, 0);
        mtspr(SPRN_LPID, 0);
        mtspr(SPRN_PID, 0);
+       mtspr(SPRN_AMOR, ~0);
        mtspr(SPRN_PCR, PCR_MASK);
        init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
                         LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -264,6 +271,7 @@ void __restore_cpu_power10(void)
        mtspr(SPRN_PSSCR, 0);
        mtspr(SPRN_LPID, 0);
        mtspr(SPRN_PID, 0);
+       mtspr(SPRN_AMOR, ~0);
        mtspr(SPRN_PCR, PCR_MASK);
        init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
                         LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
index ba527fb..d2b35fb 100644 (file)
@@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void)
        mtspr(SPRN_LPCR, system_registers.lpcr);
        if (hv_mode) {
                mtspr(SPRN_LPID, 0);
+               mtspr(SPRN_AMOR, ~0);
                mtspr(SPRN_HFSCR, system_registers.hfscr);
                mtspr(SPRN_PCR, system_registers.pcr);
        }
@@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)
        }
 
        mtspr(SPRN_LPID, 0);
+       mtspr(SPRN_AMOR, ~0);
 
        lpcr = mfspr(SPRN_LPCR);
        lpcr &=  ~LPCR_LPES0; /* HV external interrupts */
@@ -351,7 +353,7 @@ static void init_pmu_power8(void)
        }
 
        mtspr(SPRN_MMCRA, 0);
-       mtspr(SPRN_MMCR0, 0);
+       mtspr(SPRN_MMCR0, MMCR0_FC);
        mtspr(SPRN_MMCR1, 0);
        mtspr(SPRN_MMCR2, 0);
        mtspr(SPRN_MMCRS, 0);
@@ -390,7 +392,7 @@ static void init_pmu_power9(void)
                mtspr(SPRN_MMCRC, 0);
 
        mtspr(SPRN_MMCRA, 0);
-       mtspr(SPRN_MMCR0, 0);
+       mtspr(SPRN_MMCR0, MMCR0_FC);
        mtspr(SPRN_MMCR1, 0);
        mtspr(SPRN_MMCR2, 0);
 }
@@ -426,7 +428,7 @@ static void init_pmu_power10(void)
 
        mtspr(SPRN_MMCR3, 0);
        mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
-       mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+       mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
 }
 
 static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
index 6b1ec9e..349c4a8 100644 (file)
@@ -202,11 +202,11 @@ vmap_stack_overflow:
        mfspr   r1, SPRN_SPRG_THREAD
        lwz     r1, TASK_CPU - THREAD(r1)
        slwi    r1, r1, 3
-       addis   r1, r1, emergency_ctx@ha
+       addis   r1, r1, emergency_ctx-PAGE_OFFSET@ha
 #else
-       lis     r1, emergency_ctx@ha
+       lis     r1, emergency_ctx-PAGE_OFFSET@ha
 #endif
-       lwz     r1, emergency_ctx@l(r1)
+       lwz     r1, emergency_ctx-PAGE_OFFSET@l(r1)
        addi    r1, r1, THREAD_SIZE - INT_FRAME_SIZE
        EXCEPTION_PROLOG_2 0 vmap_stack_overflow
        prepare_transfer_to_handler
index 406d7ee..5d2333d 100644 (file)
@@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t)
 #endif
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvmppc_save_user_regs(void)
+{
+       unsigned long usermsr;
+
+       if (!current->thread.regs)
+               return;
+
+       usermsr = current->thread.regs->msr;
+
+       if (usermsr & MSR_FP)
+               save_fpu(current);
+
+       if (usermsr & MSR_VEC)
+               save_altivec(current);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       if (usermsr & MSR_TM) {
+               current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+               current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+               current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+               current->thread.regs->msr &= ~MSR_TM;
+       }
+#endif
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
+
+void kvmppc_save_current_sprs(void)
+{
+       save_sprs(&current->thread);
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
 static inline void restore_sprs(struct thread_struct *old_thread,
                                struct thread_struct *new_thread)
 {
index 7bef917..15109af 100644 (file)
 446    common  landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    common  process_mrelease                sys_process_mrelease
+449    common  futex_waitv                     sys_futex_waitv
index cae8f03..f7cddb8 100644 (file)
@@ -88,6 +88,7 @@ static struct clocksource clocksource_timebase = {
 
 #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
 u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
 
 static int decrementer_set_next_event(unsigned long evt,
                                      struct clock_event_device *dev);
@@ -107,6 +108,7 @@ struct clock_event_device decrementer_clockevent = {
 EXPORT_SYMBOL(decrementer_clockevent);
 
 DEFINE_PER_CPU(u64, decrementers_next_tb);
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
 static DEFINE_PER_CPU(struct clock_event_device, decrementers);
 
 #define XSEC_PER_SEC (1024*1024)
@@ -496,6 +498,16 @@ EXPORT_SYMBOL(profile_pc);
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  */
 #ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+       unsigned long x;
+
+       asm volatile("lbz %0,%1(13)"
+               : "=r" (x)
+               : "i" (offsetof(struct paca_struct, irq_work_pending)));
+       return x;
+}
+
 static inline void set_irq_work_pending_flag(void)
 {
        asm volatile("stb %0,%1(13)" : :
@@ -539,13 +551,44 @@ void arch_irq_work_raise(void)
        preempt_enable();
 }
 
+static void set_dec_or_work(u64 val)
+{
+       set_dec(val);
+       /* We may have raced with new irq work */
+       if (unlikely(test_irq_work_pending()))
+               set_dec(1);
+}
+
 #else  /* CONFIG_IRQ_WORK */
 
 #define test_irq_work_pending()        0
 #define clear_irq_work_pending()
 
+static void set_dec_or_work(u64 val)
+{
+       set_dec(val);
+}
 #endif /* CONFIG_IRQ_WORK */
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
+{
+       u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+
+       WARN_ON_ONCE(!arch_irqs_disabled());
+       WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+       if (now >= *next_tb) {
+               local_paca->irq_happened |= PACA_IRQ_DEC;
+       } else {
+               now = *next_tb - now;
+               if (now <= decrementer_max)
+                       set_dec_or_work(now);
+       }
+}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
+
 /*
  * timer_interrupt - gets called when the decrementer overflows,
  * with interrupts disabled.
@@ -606,10 +649,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
        } else {
                now = *next_tb - now;
                if (now <= decrementer_max)
-                       set_dec(now);
-               /* We may have raced with new irq work */
-               if (test_irq_work_pending())
-                       set_dec(1);
+                       set_dec_or_work(now);
                __this_cpu_inc(irq_stat.timer_irqs_others);
        }
 
@@ -843,11 +883,7 @@ static int decrementer_set_next_event(unsigned long evt,
                                      struct clock_event_device *dev)
 {
        __this_cpu_write(decrementers_next_tb, get_tb() + evt);
-       set_dec(evt);
-
-       /* We may have raced with new irq work */
-       if (test_irq_work_pending())
-               set_dec(1);
+       set_dec_or_work(evt);
 
        return 0;
 }
index ff581d7..c8e1560 100644 (file)
@@ -26,6 +26,7 @@ config KVM
        select KVM_VFIO
        select IRQ_BYPASS_MANAGER
        select HAVE_KVM_IRQ_BYPASS
+       select INTERVAL_TREE
 
 config KVM_BOOK3S_HANDLER
        bool
@@ -130,6 +131,21 @@ config KVM_BOOK3S_HV_EXIT_TIMING
 
          If unsure, say N.
 
+config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND
+       bool "Nested L0 host workaround for L1 KVM host PMU handling bug" if EXPERT
+       depends on KVM_BOOK3S_HV_POSSIBLE
+       default !EXPERT
+       help
+         Old nested HV capable Linux guests have a bug where they don't
+         reflect the PMU in-use status of their L2 guest to the L0 host
+         while the L2 PMU registers are live. This can result in loss
+          of L2 PMU register state, causing perf to not work correctly in
+         L2 guests.
+
+         Selecting this option for the L0 host implements a workaround for
+         those buggy L1s which saves the L2 state, at the cost of performance
+         in all nested-capable guest entry/exit.
+
 config KVM_BOOKE_HV
        bool
 
index 583c14e..9bdfc8b 100644 (file)
@@ -4,11 +4,8 @@
 #
 
 ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
-KVM := ../../../virt/kvm
 
-common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
-common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
-common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
+include $(srctree)/virt/kvm/Makefile.kvm
 
 common-objs-y += powerpc.o emulate_loadstore.o
 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
@@ -125,9 +122,8 @@ kvm-book3s_32-objs := \
 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
 
 kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
-kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
 
-kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
+kvm-y += $(kvm-objs-m) $(kvm-objs-y)
 
 obj-$(CONFIG_KVM_E500V2) += kvm.o
 obj-$(CONFIG_KVM_E500MC) += kvm.o
index b785f67..6d52528 100644 (file)
@@ -847,21 +847,19 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
 }
 
 int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-                               struct kvm_memory_slot *memslot,
-                               const struct kvm_userspace_memory_region *mem,
-                               enum kvm_mr_change change)
+                                     const struct kvm_memory_slot *old,
+                                     struct kvm_memory_slot *new,
+                                     enum kvm_mr_change change)
 {
-       return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem,
-                                                       change);
+       return kvm->arch.kvm_ops->prepare_memory_region(kvm, old, new, change);
 }
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
-                               const struct kvm_userspace_memory_region *mem,
-                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
-       kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
+       kvm->arch.kvm_ops->commit_memory_region(kvm, old, new, change);
 }
 
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
index 3fbd570..0215f32 100644 (file)
@@ -337,7 +337,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
 
 static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *v;
 
        /* flush this VA on all cpus */
index 983b8c1..05e003e 100644 (file)
@@ -374,11 +374,16 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 BEGIN_FTR_SECTION
        mtspr   SPRN_DAWRX1,r10
 END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
-       mtspr   SPRN_PID,r10
 
        /*
-        * Switch to host MMU mode
+        * Switch to host MMU mode (don't have the real host PID but we aren't
+        * going back to userspace).
         */
+       hwsync
+       isync
+
+       mtspr   SPRN_PID,r10
+
        ld      r10, HSTATE_KVM_VCPU(r13)
        ld      r10, VCPU_KVM(r10)
        lwz     r10, KVM_HOST_LPID(r10)
@@ -389,6 +394,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
        ld      r10, KVM_HOST_LPCR(r10)
        mtspr   SPRN_LPCR,r10
 
+       isync
+
        /*
         * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
         * MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
index feee40c..6129028 100644 (file)
@@ -530,7 +530,7 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
                                       bool large)
 {
        u64 mask = 0xFFFFFFFFFULL;
-       long i;
+       unsigned long i;
        struct kvm_vcpu *v;
 
        dprintk("KVM MMU: tlbie(0x%lx)\n", va);
index c63e263..2132329 100644 (file)
@@ -734,11 +734,11 @@ void kvmppc_rmap_reset(struct kvm *kvm)
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
-       int srcu_idx;
+       int srcu_idx, bkt;
 
        srcu_idx = srcu_read_lock(&kvm->srcu);
        slots = kvm_memslots(kvm);
-       kvm_for_each_memslot(memslot, slots) {
+       kvm_for_each_memslot(memslot, bkt, slots) {
                /* Mutual exclusion with kvm_unmap_hva_range etc. */
                spin_lock(&kvm->mmu_lock);
                /*
index 1635952..8cebe55 100644 (file)
@@ -57,6 +57,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
 
        preempt_disable();
 
+       asm volatile("hwsync" ::: "memory");
+       isync();
        /* switch the lpid first to avoid running host with unallocated pid */
        old_lpid = mfspr(SPRN_LPID);
        if (old_lpid != lpid)
@@ -75,6 +77,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
                ret = __copy_to_user_inatomic((void __user *)to, from, n);
        pagefault_enable();
 
+       asm volatile("hwsync" ::: "memory");
+       isync();
        /* switch the pid first to avoid running host with unallocated pid */
        if (quadrant == 1 && pid != old_pid)
                mtspr(SPRN_PID, old_pid);
index 7b74fc0..d1817cd 100644 (file)
@@ -80,6 +80,7 @@
 #include <asm/plpar_wrappers.h>
 
 #include "book3s.h"
+#include "book3s_hv.h"
 
 #define CREATE_TRACE_POINTS
 #include "trace_hv.h"
@@ -127,11 +128,6 @@ static bool nested = true;
 module_param(nested, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
 
-static inline bool nesting_enabled(struct kvm *kvm)
-{
-       return kvm->arch.nested_enable && kvm_is_radix(kvm);
-}
-
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
 /*
@@ -276,22 +272,26 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  * they should never fail.)
  */
 
-static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
+static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
 {
        unsigned long flags;
 
+       WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
        spin_lock_irqsave(&vc->stoltb_lock, flags);
-       vc->preempt_tb = mftb();
+       vc->preempt_tb = tb;
        spin_unlock_irqrestore(&vc->stoltb_lock, flags);
 }
 
-static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
+static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb)
 {
        unsigned long flags;
 
+       WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
        spin_lock_irqsave(&vc->stoltb_lock, flags);
        if (vc->preempt_tb != TB_NIL) {
-               vc->stolen_tb += mftb() - vc->preempt_tb;
+               vc->stolen_tb += tb - vc->preempt_tb;
                vc->preempt_tb = TB_NIL;
        }
        spin_unlock_irqrestore(&vc->stoltb_lock, flags);
@@ -301,6 +301,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
 {
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
        unsigned long flags;
+       u64 now;
+
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               return;
+
+       now = mftb();
 
        /*
         * We can test vc->runner without taking the vcore lock,
@@ -309,12 +315,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
         * ever sets it to NULL.
         */
        if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
-               kvmppc_core_end_stolen(vc);
+               kvmppc_core_end_stolen(vc, now);
 
        spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
        if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
            vcpu->arch.busy_preempt != TB_NIL) {
-               vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
+               vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt;
                vcpu->arch.busy_preempt = TB_NIL;
        }
        spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -324,13 +330,19 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
        unsigned long flags;
+       u64 now;
+
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               return;
+
+       now = mftb();
 
        if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
-               kvmppc_core_start_stolen(vc);
+               kvmppc_core_start_stolen(vc, now);
 
        spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
        if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
-               vcpu->arch.busy_preempt = mftb();
+               vcpu->arch.busy_preempt = now;
        spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
 }
 
@@ -675,6 +687,8 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
        u64 p;
        unsigned long flags;
 
+       WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
        spin_lock_irqsave(&vc->stoltb_lock, flags);
        p = vc->stolen_tb;
        if (vc->vcore_state != VCORE_INACTIVE &&
@@ -684,35 +698,30 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
        return p;
 }
 
-static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
-                                   struct kvmppc_vcore *vc)
+static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+                                       unsigned int pcpu, u64 now,
+                                       unsigned long stolen)
 {
        struct dtl_entry *dt;
        struct lppaca *vpa;
-       unsigned long stolen;
-       unsigned long core_stolen;
-       u64 now;
-       unsigned long flags;
 
        dt = vcpu->arch.dtl_ptr;
        vpa = vcpu->arch.vpa.pinned_addr;
-       now = mftb();
-       core_stolen = vcore_stolen_time(vc, now);
-       stolen = core_stolen - vcpu->arch.stolen_logged;
-       vcpu->arch.stolen_logged = core_stolen;
-       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-       stolen += vcpu->arch.busy_stolen;
-       vcpu->arch.busy_stolen = 0;
-       spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+
        if (!dt || !vpa)
                return;
-       memset(dt, 0, sizeof(struct dtl_entry));
+
        dt->dispatch_reason = 7;
-       dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
-       dt->timebase = cpu_to_be64(now + vc->tb_offset);
+       dt->preempt_reason = 0;
+       dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid);
        dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
+       dt->ready_to_enqueue_time = 0;
+       dt->waiting_to_ready_time = 0;
+       dt->timebase = cpu_to_be64(now);
+       dt->fault_addr = 0;
        dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
        dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
+
        ++dt;
        if (dt == vcpu->arch.dtl.pinned_end)
                dt = vcpu->arch.dtl.pinned_addr;
@@ -723,6 +732,27 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
        vcpu->arch.dtl.dirty = true;
 }
 
+static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+                                   struct kvmppc_vcore *vc)
+{
+       unsigned long stolen;
+       unsigned long core_stolen;
+       u64 now;
+       unsigned long flags;
+
+       now = mftb();
+
+       core_stolen = vcore_stolen_time(vc, now);
+       stolen = core_stolen - vcpu->arch.stolen_logged;
+       vcpu->arch.stolen_logged = core_stolen;
+       spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
+       stolen += vcpu->arch.busy_stolen;
+       vcpu->arch.busy_stolen = 0;
+       spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+
+       __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen);
+}
+
 /* See if there is a doorbell interrupt pending for a vcpu */
 static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
 {
@@ -731,6 +761,8 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
 
        if (vcpu->arch.doorbell_request)
                return true;
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               return false;
        /*
         * Ensure that the read of vcore->dpdes comes after the read
         * of vcpu->doorbell_request.  This barrier matches the
@@ -900,13 +932,14 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
         * mode handler is not called but no other threads are in the
         * source vcore.
         */
-
-       spin_lock(&vcore->lock);
-       if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
-           vcore->vcore_state != VCORE_INACTIVE &&
-           vcore->runner)
-               target = vcore->runner;
-       spin_unlock(&vcore->lock);
+       if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+               spin_lock(&vcore->lock);
+               if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+                   vcore->vcore_state != VCORE_INACTIVE &&
+                   vcore->runner)
+                       target = vcore->runner;
+               spin_unlock(&vcore->lock);
+       }
 
        return kvm_vcpu_yield_to(target);
 }
@@ -1421,6 +1454,43 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
        return RESUME_GUEST;
 }
 
+/*
+ * If the lppaca had pmcregs_in_use clear when we exited the guest, then
+ * HFSCR_PM is cleared for next entry. If the guest then tries to access
+ * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM
+ * back in the guest HFSCR will cause the next entry to load the PMU SPRs and
+ * allow the guest access to continue.
+ */
+static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu)
+{
+       if (!(vcpu->arch.hfscr_permitted & HFSCR_PM))
+               return EMULATE_FAIL;
+
+       vcpu->arch.hfscr |= HFSCR_PM;
+
+       return RESUME_GUEST;
+}
+
+static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
+{
+       if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB))
+               return EMULATE_FAIL;
+
+       vcpu->arch.hfscr |= HFSCR_EBB;
+
+       return RESUME_GUEST;
+}
+
+static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu)
+{
+       if (!(vcpu->arch.hfscr_permitted & HFSCR_TM))
+               return EMULATE_FAIL;
+
+       vcpu->arch.hfscr |= HFSCR_TM;
+
+       return RESUME_GUEST;
+}
+
 static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
                                 struct task_struct *tsk)
 {
@@ -1451,6 +1521,10 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
        run->ready_for_interrupt_injection = 1;
        switch (vcpu->arch.trap) {
        /* We're good on these - the host merely wanted to get our attention */
+       case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+               WARN_ON_ONCE(1); /* Should never happen */
+               vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+               fallthrough;
        case BOOK3S_INTERRUPT_HV_DECREMENTER:
                vcpu->stat.dec_exits++;
                r = RESUME_GUEST;
@@ -1575,7 +1649,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
                unsigned long vsid;
                long err;
 
-               if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
+               if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+                   unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) {
                        r = RESUME_GUEST; /* Just retry if it's the canary */
                        break;
                }
@@ -1702,16 +1777,26 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
         * to emulate.
         * Otherwise, we just generate a program interrupt to the guest.
         */
-       case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+       case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+               u64 cause = vcpu->arch.hfscr >> 56;
+
                r = EMULATE_FAIL;
-               if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
-                   cpu_has_feature(CPU_FTR_ARCH_300))
-                       r = kvmppc_emulate_doorbell_instr(vcpu);
+               if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+                       if (cause == FSCR_MSGP_LG)
+                               r = kvmppc_emulate_doorbell_instr(vcpu);
+                       if (cause == FSCR_PM_LG)
+                               r = kvmppc_pmu_unavailable(vcpu);
+                       if (cause == FSCR_EBB_LG)
+                               r = kvmppc_ebb_unavailable(vcpu);
+                       if (cause == FSCR_TM_LG)
+                               r = kvmppc_tm_unavailable(vcpu);
+               }
                if (r == EMULATE_FAIL) {
                        kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
                        r = RESUME_GUEST;
                }
                break;
+       }
 
        case BOOK3S_INTERRUPT_HV_RM_HARD:
                r = RESUME_PASSTHROUGH;
@@ -1768,6 +1853,12 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
                vcpu->stat.ext_intr_exits++;
                r = RESUME_GUEST;
                break;
+       /* These need to go to the nested HV */
+       case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+               vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+               vcpu->stat.dec_exits++;
+               r = RESUME_HOST;
+               break;
        /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
        case BOOK3S_INTERRUPT_HMI:
        case BOOK3S_INTERRUPT_PERFMON:
@@ -1993,7 +2084,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
         */
        if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
                struct kvm_vcpu *vcpu;
-               int i;
+               unsigned long i;
 
                kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (vcpu->arch.vcore != vc)
@@ -2096,8 +2187,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
                 * either vcore->dpdes or doorbell_request.
                 * On POWER8, doorbell_request is 0.
                 */
-               *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
-                                  vcpu->arch.doorbell_request);
+               if (cpu_has_feature(CPU_FTR_ARCH_300))
+                       *val = get_reg_val(id, vcpu->arch.doorbell_request);
+               else
+                       *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
                break;
        case KVM_REG_PPC_VTB:
                *val = get_reg_val(id, vcpu->arch.vcore->vtb);
@@ -2238,8 +2331,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
                *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
                break;
        case KVM_REG_PPC_DEC_EXPIRY:
-               *val = get_reg_val(id, vcpu->arch.dec_expires +
-                                  vcpu->arch.vcore->tb_offset);
+               *val = get_reg_val(id, vcpu->arch.dec_expires);
                break;
        case KVM_REG_PPC_ONLINE:
                *val = get_reg_val(id, vcpu->arch.online);
@@ -2335,7 +2427,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
                vcpu->arch.pspb = set_reg_val(id, *val);
                break;
        case KVM_REG_PPC_DPDES:
-               vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
+               if (cpu_has_feature(CPU_FTR_ARCH_300))
+                       vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1;
+               else
+                       vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
                break;
        case KVM_REG_PPC_VTB:
                vcpu->arch.vcore->vtb = set_reg_val(id, *val);
@@ -2491,8 +2586,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
                r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
                break;
        case KVM_REG_PPC_DEC_EXPIRY:
-               vcpu->arch.dec_expires = set_reg_val(id, *val) -
-                       vcpu->arch.vcore->tb_offset;
+               vcpu->arch.dec_expires = set_reg_val(id, *val);
                break;
        case KVM_REG_PPC_ONLINE:
                i = set_reg_val(id, *val);
@@ -2715,6 +2809,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 #endif
 #endif
        vcpu->arch.mmcr[0] = MMCR0_FC;
+       if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+               vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
+               vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
+       }
+
        vcpu->arch.ctrl = CTRL_RUNLATCH;
        /* default to host PVR, since we can't spoof it */
        kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
@@ -2745,6 +2844,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
 
        vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
 
+       /*
+        * PM, EBB, TM are demand-faulted so start with it clear.
+        */
+       vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM);
+
        kvmppc_mmu_book3s_hv_init(vcpu);
 
        vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -2869,13 +2973,13 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
        unsigned long dec_nsec, now;
 
        now = get_tb();
-       if (now > vcpu->arch.dec_expires) {
+       if (now > kvmppc_dec_expires_host_tb(vcpu)) {
                /* decrementer has already gone negative */
                kvmppc_core_queue_dec(vcpu);
                kvmppc_core_prepare_to_enter(vcpu);
                return;
        }
-       dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
+       dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now);
        hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
        vcpu->arch.timer_running = 1;
 }
@@ -2883,14 +2987,14 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 extern int __kvmppc_vcore_entry(void);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
-                                  struct kvm_vcpu *vcpu)
+                                  struct kvm_vcpu *vcpu, u64 tb)
 {
        u64 now;
 
        if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
                return;
        spin_lock_irq(&vcpu->arch.tbacct_lock);
-       now = mftb();
+       now = tb;
        vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
                vcpu->arch.stolen_logged;
        vcpu->arch.busy_preempt = now;
@@ -2945,30 +3049,59 @@ static void kvmppc_release_hwthread(int cpu)
        tpaca->kvm_hstate.kvm_split_mode = NULL;
 }
 
+static DEFINE_PER_CPU(struct kvm *, cpu_in_guest);
+
 static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 {
        struct kvm_nested_guest *nested = vcpu->arch.nested;
-       cpumask_t *cpu_in_guest;
+       cpumask_t *need_tlb_flush;
        int i;
 
+       if (nested)
+               need_tlb_flush = &nested->need_tlb_flush;
+       else
+               need_tlb_flush = &kvm->arch.need_tlb_flush;
+
        cpu = cpu_first_tlb_thread_sibling(cpu);
-       if (nested) {
-               cpumask_set_cpu(cpu, &nested->need_tlb_flush);
-               cpu_in_guest = &nested->cpu_in_guest;
-       } else {
-               cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
-               cpu_in_guest = &kvm->arch.cpu_in_guest;
-       }
+       for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+                                       i += cpu_tlb_thread_sibling_step())
+               cpumask_set_cpu(i, need_tlb_flush);
+
        /*
-        * Make sure setting of bit in need_tlb_flush precedes
-        * testing of cpu_in_guest bits.  The matching barrier on
-        * the other side is the first smp_mb() in kvmppc_run_core().
+        * Make sure setting of bit in need_tlb_flush precedes testing of
+        * cpu_in_guest. The matching barrier on the other side is hwsync
+        * when switching to guest MMU mode, which happens between
+        * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit
+        * being tested.
         */
        smp_mb();
+
        for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
-                                       i += cpu_tlb_thread_sibling_step())
-               if (cpumask_test_cpu(i, cpu_in_guest))
+                                       i += cpu_tlb_thread_sibling_step()) {
+               struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i);
+
+               if (running == kvm)
                        smp_call_function_single(i, do_nothing, NULL, 1);
+       }
+}
+
+static void do_migrate_away_vcpu(void *arg)
+{
+       struct kvm_vcpu *vcpu = arg;
+       struct kvm *kvm = vcpu->kvm;
+
+       /*
+        * If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync;
+        * ptesync sequence on the old CPU before migrating to a new one, in
+        * case we interrupted the guest between a tlbie ; eieio ;
+        * tlbsync; ptesync sequence.
+        *
+        * Otherwise, ptesync is sufficient for ordering tlbiel sequences.
+        */
+       if (kvm->arch.lpcr & LPCR_GTSE)
+               asm volatile("eieio; tlbsync; ptesync");
+       else
+               asm volatile("ptesync");
 }
 
 static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
@@ -2994,14 +3127,17 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
         * can move around between pcpus.  To cope with this, when
         * a vcpu moves from one pcpu to another, we need to tell
         * any vcpus running on the same core as this vcpu previously
-        * ran to flush the TLB.  The TLB is shared between threads,
-        * so we use a single bit in .need_tlb_flush for all 4 threads.
+        * ran to flush the TLB.
         */
        if (prev_cpu != pcpu) {
-               if (prev_cpu >= 0 &&
-                   cpu_first_tlb_thread_sibling(prev_cpu) !=
-                   cpu_first_tlb_thread_sibling(pcpu))
-                       radix_flush_cpu(kvm, prev_cpu, vcpu);
+               if (prev_cpu >= 0) {
+                       if (cpu_first_tlb_thread_sibling(prev_cpu) !=
+                           cpu_first_tlb_thread_sibling(pcpu))
+                               radix_flush_cpu(kvm, prev_cpu, vcpu);
+
+                       smp_call_function_single(prev_cpu,
+                                       do_migrate_away_vcpu, vcpu, 1);
+               }
                if (nested)
                        nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
                else
@@ -3013,7 +3149,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 {
        int cpu;
        struct paca_struct *tpaca;
-       struct kvm *kvm = vc->kvm;
 
        cpu = vc->pcpu;
        if (vcpu) {
@@ -3024,7 +3159,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
                cpu += vcpu->arch.ptid;
                vcpu->cpu = vc->pcpu;
                vcpu->arch.thread_cpu = cpu;
-               cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
        }
        tpaca = paca_ptrs[cpu];
        tpaca->kvm_hstate.kvm_vcpu = vcpu;
@@ -3125,6 +3259,8 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
 {
        struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
 
+       WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
        vc->vcore_state = VCORE_PREEMPT;
        vc->pcpu = smp_processor_id();
        if (vc->num_threads < threads_per_vcore(vc->kvm)) {
@@ -3134,14 +3270,16 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
        }
 
        /* Start accumulating stolen time */
-       kvmppc_core_start_stolen(vc);
+       kvmppc_core_start_stolen(vc, mftb());
 }
 
 static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
 {
        struct preempted_vcore_list *lp;
 
-       kvmppc_core_end_stolen(vc);
+       WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+       kvmppc_core_end_stolen(vc, mftb());
        if (!list_empty(&vc->preempt_list)) {
                lp = &per_cpu(preempted_vcores, vc->pcpu);
                spin_lock(&lp->lock);
@@ -3268,7 +3406,7 @@ static void prepare_threads(struct kvmppc_vcore *vc)
                        vcpu->arch.ret = RESUME_GUEST;
                else
                        continue;
-               kvmppc_remove_runnable(vc, vcpu);
+               kvmppc_remove_runnable(vc, vcpu, mftb());
                wake_up(&vcpu->arch.cpu_run);
        }
 }
@@ -3287,7 +3425,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
                        list_del_init(&pvc->preempt_list);
                        if (pvc->runner == NULL) {
                                pvc->vcore_state = VCORE_INACTIVE;
-                               kvmppc_core_end_stolen(pvc);
+                               kvmppc_core_end_stolen(pvc, mftb());
                        }
                        spin_unlock(&pvc->lock);
                        continue;
@@ -3296,7 +3434,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
                        spin_unlock(&pvc->lock);
                        continue;
                }
-               kvmppc_core_end_stolen(pvc);
+               kvmppc_core_end_stolen(pvc, mftb());
                pvc->vcore_state = VCORE_PIGGYBACK;
                if (cip->total_threads >= target_threads)
                        break;
@@ -3340,7 +3478,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
                 */
                spin_unlock(&vc->lock);
                /* cancel pending dec exception if dec is positive */
-               if (now < vcpu->arch.dec_expires &&
+               if (now < kvmppc_dec_expires_host_tb(vcpu) &&
                    kvmppc_core_pending_dec(vcpu))
                        kvmppc_core_dequeue_dec(vcpu);
 
@@ -3363,7 +3501,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
                        else
                                ++still_running;
                } else {
-                       kvmppc_remove_runnable(vc, vcpu);
+                       kvmppc_remove_runnable(vc, vcpu, mftb());
                        wake_up(&vcpu->arch.cpu_run);
                }
        }
@@ -3372,7 +3510,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
                        kvmppc_vcore_preempt(vc);
                } else if (vc->runner) {
                        vc->vcore_state = VCORE_PREEMPT;
-                       kvmppc_core_start_stolen(vc);
+                       kvmppc_core_start_stolen(vc, mftb());
                } else {
                        vc->vcore_state = VCORE_INACTIVE;
                }
@@ -3503,7 +3641,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
            ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
                for_each_runnable_thread(i, vcpu, vc) {
                        vcpu->arch.ret = -EBUSY;
-                       kvmppc_remove_runnable(vc, vcpu);
+                       kvmppc_remove_runnable(vc, vcpu, mftb());
                        wake_up(&vcpu->arch.cpu_run);
                }
                goto out;
@@ -3748,7 +3886,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
                kvmppc_release_hwthread(pcpu + i);
                if (sip && sip->napped[i])
                        kvmppc_ipi_thread(pcpu + i);
-               cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
        }
 
        spin_unlock(&vc->lock);
@@ -3770,211 +3907,137 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
        trace_kvmppc_run_core(vc, 1);
 }
 
-static void load_spr_state(struct kvm_vcpu *vcpu)
-{
-       mtspr(SPRN_DSCR, vcpu->arch.dscr);
-       mtspr(SPRN_IAMR, vcpu->arch.iamr);
-       mtspr(SPRN_PSPB, vcpu->arch.pspb);
-       mtspr(SPRN_FSCR, vcpu->arch.fscr);
-       mtspr(SPRN_TAR, vcpu->arch.tar);
-       mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
-       mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
-       mtspr(SPRN_BESCR, vcpu->arch.bescr);
-       mtspr(SPRN_TIDR, vcpu->arch.tid);
-       mtspr(SPRN_AMR, vcpu->arch.amr);
-       mtspr(SPRN_UAMOR, vcpu->arch.uamor);
-
-       /*
-        * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
-        * clear (or hstate set appropriately to catch those registers
-        * being clobbered if we take a MCE or SRESET), so those are done
-        * later.
-        */
-
-       if (!(vcpu->arch.ctrl & 1))
-               mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
-}
-
-static void store_spr_state(struct kvm_vcpu *vcpu)
-{
-       vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
-
-       vcpu->arch.iamr = mfspr(SPRN_IAMR);
-       vcpu->arch.pspb = mfspr(SPRN_PSPB);
-       vcpu->arch.fscr = mfspr(SPRN_FSCR);
-       vcpu->arch.tar = mfspr(SPRN_TAR);
-       vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
-       vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
-       vcpu->arch.bescr = mfspr(SPRN_BESCR);
-       vcpu->arch.tid = mfspr(SPRN_TIDR);
-       vcpu->arch.amr = mfspr(SPRN_AMR);
-       vcpu->arch.uamor = mfspr(SPRN_UAMOR);
-       vcpu->arch.dscr = mfspr(SPRN_DSCR);
-}
-
-/*
- * Privileged (non-hypervisor) host registers to save.
- */
-struct p9_host_os_sprs {
-       unsigned long dscr;
-       unsigned long tidr;
-       unsigned long iamr;
-       unsigned long amr;
-       unsigned long fscr;
-};
-
-static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
-{
-       host_os_sprs->dscr = mfspr(SPRN_DSCR);
-       host_os_sprs->tidr = mfspr(SPRN_TIDR);
-       host_os_sprs->iamr = mfspr(SPRN_IAMR);
-       host_os_sprs->amr = mfspr(SPRN_AMR);
-       host_os_sprs->fscr = mfspr(SPRN_FSCR);
-}
-
-/* vcpu guest regs must already be saved */
-static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
-                                   struct p9_host_os_sprs *host_os_sprs)
-{
-       mtspr(SPRN_PSPB, 0);
-       mtspr(SPRN_UAMOR, 0);
-
-       mtspr(SPRN_DSCR, host_os_sprs->dscr);
-       mtspr(SPRN_TIDR, host_os_sprs->tidr);
-       mtspr(SPRN_IAMR, host_os_sprs->iamr);
-
-       if (host_os_sprs->amr != vcpu->arch.amr)
-               mtspr(SPRN_AMR, host_os_sprs->amr);
-
-       if (host_os_sprs->fscr != vcpu->arch.fscr)
-               mtspr(SPRN_FSCR, host_os_sprs->fscr);
-
-       /* Save guest CTRL register, set runlatch to 1 */
-       if (!(vcpu->arch.ctrl & 1))
-               mtspr(SPRN_CTRLT, 1);
-}
-
 static inline bool hcall_is_xics(unsigned long req)
 {
        return req == H_EOI || req == H_CPPR || req == H_IPI ||
                req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
 }
 
-/*
- * Guest entry for POWER9 and later CPUs.
- */
-static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
-                        unsigned long lpcr)
+static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
+{
+       struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+       if (lp) {
+               u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+               lp->yield_count = cpu_to_be32(yield_count);
+               vcpu->arch.vpa.dirty = 1;
+       }
+}
+
+/* call our hypervisor to load up HV regs and go */
+static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
 {
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
+       unsigned long host_psscr;
+       unsigned long msr;
+       struct hv_guest_state hvregs;
        struct p9_host_os_sprs host_os_sprs;
        s64 dec;
-       u64 tb;
-       int trap, save_pmu;
+       int trap;
 
-       WARN_ON_ONCE(vcpu->arch.ceded);
-
-       dec = mfspr(SPRN_DEC);
-       tb = mftb();
-       if (dec < 0)
-               return BOOK3S_INTERRUPT_HV_DECREMENTER;
-       local_paca->kvm_hstate.dec_expires = dec + tb;
-       if (local_paca->kvm_hstate.dec_expires < time_limit)
-               time_limit = local_paca->kvm_hstate.dec_expires;
+       msr = mfmsr();
 
        save_p9_host_os_sprs(&host_os_sprs);
 
-       kvmhv_save_host_pmu();          /* saves it to PACA kvm_hstate */
+       /*
+        * We need to save and restore the guest visible part of the
+        * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
+        * doesn't do this for us. Note only required if pseries since
+        * this is done in kvmhv_vcpu_entry_p9() below otherwise.
+        */
+       host_psscr = mfspr(SPRN_PSSCR_PR);
 
-       kvmppc_subcore_enter_guest();
+       kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+       if (lazy_irq_pending())
+               return 0;
 
-       vc->entry_exit_map = 1;
-       vc->in_guest = 1;
+       if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
+               msr = mfmsr(); /* TM restore can update msr */
 
-       if (vcpu->arch.vpa.pinned_addr) {
-               struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
-               u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
-               lp->yield_count = cpu_to_be32(yield_count);
-               vcpu->arch.vpa.dirty = 1;
-       }
-
-       if (cpu_has_feature(CPU_FTR_TM) ||
-           cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
-               kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+       if (vcpu->arch.psscr != host_psscr)
+               mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
 
-#ifdef CONFIG_PPC_PSERIES
-       if (kvmhv_on_pseries()) {
-               barrier();
-               if (vcpu->arch.vpa.pinned_addr) {
-                       struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
-                       get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
-               } else {
-                       get_lppaca()->pmcregs_in_use = 1;
-               }
-               barrier();
+       kvmhv_save_hv_regs(vcpu, &hvregs);
+       hvregs.lpcr = lpcr;
+       vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+       hvregs.version = HV_GUEST_STATE_VERSION;
+       if (vcpu->arch.nested) {
+               hvregs.lpid = vcpu->arch.nested->shadow_lpid;
+               hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
+       } else {
+               hvregs.lpid = vcpu->kvm->arch.lpid;
+               hvregs.vcpu_token = vcpu->vcpu_id;
        }
-#endif
-       kvmhv_load_guest_pmu(vcpu);
-
-       msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
-       load_fp_state(&vcpu->arch.fp);
-#ifdef CONFIG_ALTIVEC
-       load_vr_state(&vcpu->arch.vr);
-#endif
-       mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
-
-       load_spr_state(vcpu);
+       hvregs.hdec_expiry = time_limit;
 
        /*
-        * When setting DEC, we must always deal with irq_work_raise via NMI vs
-        * setting DEC. The problem occurs right as we switch into guest mode
-        * if a NMI hits and sets pending work and sets DEC, then that will
-        * apply to the guest and not bring us back to the host.
+        * When setting DEC, we must always deal with irq_work_raise
+        * via NMI vs setting DEC. The problem occurs right as we
+        * switch into guest mode if a NMI hits and sets pending work
+        * and sets DEC, then that will apply to the guest and not
+        * bring us back to the host.
         *
-        * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
-        * example) and set HDEC to 1? That wouldn't solve the nested hv
-        * case which needs to abort the hcall or zero the time limit.
+        * irq_work_raise could check a flag (or possibly LPCR[HDICE]
+        * for example) and set HDEC to 1? That wouldn't solve the
+        * nested hv case which needs to abort the hcall or zero the
+        * time limit.
         *
         * XXX: Another day's problem.
         */
-       mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
+       mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb);
+
+       mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+       mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+       switch_pmu_to_guest(vcpu, &host_os_sprs);
+       trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
+                                 __pa(&vcpu->arch.regs));
+       kvmhv_restore_hv_return_state(vcpu, &hvregs);
+       switch_pmu_to_host(vcpu, &host_os_sprs);
+       vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
+       vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+       vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+       vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+
+       store_vcpu_state(vcpu);
 
-       if (kvmhv_on_pseries()) {
-               /*
-                * We need to save and restore the guest visible part of the
-                * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
-                * doesn't do this for us. Note only required if pseries since
-                * this is done in kvmhv_vcpu_entry_p9() below otherwise.
-                */
-               unsigned long host_psscr;
-               /* call our hypervisor to load up HV regs and go */
-               struct hv_guest_state hvregs;
+       dec = mfspr(SPRN_DEC);
+       if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+               dec = (s32) dec;
+       *tb = mftb();
+       vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset);
 
-               host_psscr = mfspr(SPRN_PSSCR_PR);
-               mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
-               kvmhv_save_hv_regs(vcpu, &hvregs);
-               hvregs.lpcr = lpcr;
-               vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
-               hvregs.version = HV_GUEST_STATE_VERSION;
-               if (vcpu->arch.nested) {
-                       hvregs.lpid = vcpu->arch.nested->shadow_lpid;
-                       hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
-               } else {
-                       hvregs.lpid = vcpu->kvm->arch.lpid;
-                       hvregs.vcpu_token = vcpu->vcpu_id;
-               }
-               hvregs.hdec_expiry = time_limit;
-               mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
-               mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
-               trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
-                                         __pa(&vcpu->arch.regs));
-               kvmhv_restore_hv_return_state(vcpu, &hvregs);
-               vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
-               vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
-               vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
-               vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+       timer_rearm_host_dec(*tb);
+
+       restore_p9_host_os_sprs(vcpu, &host_os_sprs);
+       if (vcpu->arch.psscr != host_psscr)
                mtspr(SPRN_PSSCR_PR, host_psscr);
 
+       return trap;
+}
+
+/*
+ * Guest entry for POWER9 and later CPUs.
+ */
+static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
+                        unsigned long lpcr, u64 *tb)
+{
+       u64 next_timer;
+       int trap;
+
+       next_timer = timer_get_next_tb();
+       if (*tb >= next_timer)
+               return BOOK3S_INTERRUPT_HV_DECREMENTER;
+       if (next_timer < time_limit)
+               time_limit = next_timer;
+       else if (*tb >= time_limit) /* nested time limit */
+               return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER;
+
+       vcpu->arch.ceded = 0;
+
+       vcpu_vpa_increment_dispatch(vcpu);
+
+       if (kvmhv_on_pseries()) {
+               trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
+
                /* H_CEDE has to be handled now, not later */
                if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
                    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
@@ -3982,9 +4045,16 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                        kvmppc_set_gpr(vcpu, 3, 0);
                        trap = 0;
                }
+
        } else {
+               struct kvm *kvm = vcpu->kvm;
+
                kvmppc_xive_push_vcpu(vcpu);
-               trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
+
+               __this_cpu_write(cpu_in_guest, kvm);
+               trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
+               __this_cpu_write(cpu_in_guest, NULL);
+
                if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
                    !(vcpu->arch.shregs.msr & MSR_PR)) {
                        unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -4009,65 +4079,11 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                }
                kvmppc_xive_pull_vcpu(vcpu);
 
-               if (kvm_is_radix(vcpu->kvm))
+               if (kvm_is_radix(kvm))
                        vcpu->arch.slb_max = 0;
        }
 
-       dec = mfspr(SPRN_DEC);
-       if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
-               dec = (s32) dec;
-       tb = mftb();
-       vcpu->arch.dec_expires = dec + tb;
-       vcpu->cpu = -1;
-       vcpu->arch.thread_cpu = -1;
-
-       store_spr_state(vcpu);
-
-       restore_p9_host_os_sprs(vcpu, &host_os_sprs);
-
-       msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
-       store_fp_state(&vcpu->arch.fp);
-#ifdef CONFIG_ALTIVEC
-       store_vr_state(&vcpu->arch.vr);
-#endif
-       vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
-
-       if (cpu_has_feature(CPU_FTR_TM) ||
-           cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
-               kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
-
-       save_pmu = 1;
-       if (vcpu->arch.vpa.pinned_addr) {
-               struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
-               u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
-               lp->yield_count = cpu_to_be32(yield_count);
-               vcpu->arch.vpa.dirty = 1;
-               save_pmu = lp->pmcregs_in_use;
-       }
-       /* Must save pmu if this guest is capable of running nested guests */
-       save_pmu |= nesting_enabled(vcpu->kvm);
-
-       kvmhv_save_guest_pmu(vcpu, save_pmu);
-#ifdef CONFIG_PPC_PSERIES
-       if (kvmhv_on_pseries()) {
-               barrier();
-               get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
-               barrier();
-       }
-#endif
-
-       vc->entry_exit_map = 0x101;
-       vc->in_guest = 0;
-
-       mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
-       /* We may have raced with new irq work */
-       if (test_irq_work_pending())
-               set_dec(1);
-       mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
-
-       kvmhv_load_host_pmu();
-
-       kvmppc_subcore_exit_guest();
+       vcpu_vpa_increment_dispatch(vcpu);
 
        return trap;
 }
@@ -4132,6 +4148,13 @@ static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
        return false;
 }
 
+static bool kvmppc_vcpu_check_block(struct kvm_vcpu *vcpu)
+{
+       if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
+               return true;
+       return false;
+}
+
 /*
  * Check to see if any of the runnable vcpus on the vcore have pending
  * exceptions or are no longer ceded
@@ -4142,7 +4165,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
        int i;
 
        for_each_runnable_thread(i, vcpu, vc) {
-               if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
+               if (kvmppc_vcpu_check_block(vcpu))
                        return 1;
        }
 
@@ -4159,6 +4182,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
        int do_sleep = 1;
        u64 block_ns;
 
+       WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
        /* Poll for pending exceptions and ceded state */
        cur = start_poll = ktime_get();
        if (vc->halt_poll_ns) {
@@ -4355,7 +4380,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
                for_each_runnable_thread(i, v, vc) {
                        kvmppc_core_prepare_to_enter(v);
                        if (signal_pending(v->arch.run_task)) {
-                               kvmppc_remove_runnable(vc, v);
+                               kvmppc_remove_runnable(vc, v, mftb());
                                v->stat.signal_exits++;
                                v->run->exit_reason = KVM_EXIT_INTR;
                                v->arch.ret = -EINTR;
@@ -4396,7 +4421,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
                kvmppc_vcore_end_preempt(vc);
 
        if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
-               kvmppc_remove_runnable(vc, vcpu);
+               kvmppc_remove_runnable(vc, vcpu, mftb());
                vcpu->stat.signal_exits++;
                run->exit_reason = KVM_EXIT_INTR;
                vcpu->arch.ret = -EINTR;
@@ -4417,12 +4442,15 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
 int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
                          unsigned long lpcr)
 {
+       struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
        struct kvm_run *run = vcpu->run;
        int trap, r, pcpu;
        int srcu_idx;
        struct kvmppc_vcore *vc;
        struct kvm *kvm = vcpu->kvm;
        struct kvm_nested_guest *nested = vcpu->arch.nested;
+       unsigned long flags;
+       u64 tb;
 
        trace_kvmppc_run_vcpu_enter(vcpu);
 
@@ -4433,16 +4461,11 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
        vc = vcpu->arch.vcore;
        vcpu->arch.ceded = 0;
        vcpu->arch.run_task = current;
-       vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
        vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
-       vcpu->arch.busy_preempt = TB_NIL;
        vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
-       vc->runnable_threads[0] = vcpu;
-       vc->n_runnable = 1;
-       vc->runner = vcpu;
 
        /* See if the MMU is ready to go */
-       if (!kvm->arch.mmu_ready) {
+       if (unlikely(!kvm->arch.mmu_ready)) {
                r = kvmhv_setup_mmu(vcpu);
                if (r) {
                        run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -4457,29 +4480,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 
        kvmppc_update_vpas(vcpu);
 
-       init_vcore_to_run(vc);
-       vc->preempt_tb = TB_NIL;
-
        preempt_disable();
        pcpu = smp_processor_id();
-       vc->pcpu = pcpu;
        if (kvm_is_radix(kvm))
                kvmppc_prepare_radix_vcpu(vcpu, pcpu);
 
-       local_irq_disable();
-       hard_irq_disable();
+       /* flags save not required, but irq_pmu has no disable/enable API */
+       powerpc_local_irq_pmu_save(flags);
+
        if (signal_pending(current))
                goto sigpend;
-       if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
+       if (need_resched() || !kvm->arch.mmu_ready)
                goto out;
 
        if (!nested) {
                kvmppc_core_prepare_to_enter(vcpu);
-               if (vcpu->arch.doorbell_request) {
-                       vc->dpdes = 1;
-                       smp_wmb();
-                       vcpu->arch.doorbell_request = 0;
-               }
                if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
                             &vcpu->arch.pending_exceptions))
                        lpcr |= LPCR_MER;
@@ -4490,16 +4505,23 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
                goto out;
        }
 
-       kvmppc_clear_host_core(pcpu);
+       if (vcpu->arch.timer_running) {
+               hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+               vcpu->arch.timer_running = 0;
+       }
 
-       local_paca->kvm_hstate.napping = 0;
-       local_paca->kvm_hstate.kvm_split_mode = NULL;
-       kvmppc_start_thread(vcpu, vc);
-       kvmppc_create_dtl_entry(vcpu, vc);
-       trace_kvm_guest_enter(vcpu);
+       tb = mftb();
 
-       vc->vcore_state = VCORE_RUNNING;
-       trace_kvmppc_run_core(vc, 0);
+       vcpu->cpu = pcpu;
+       vcpu->arch.thread_cpu = pcpu;
+       vc->pcpu = pcpu;
+       local_paca->kvm_hstate.kvm_vcpu = vcpu;
+       local_paca->kvm_hstate.ptid = 0;
+       local_paca->kvm_hstate.fake_suspend = 0;
+
+       __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
+
+       trace_kvm_guest_enter(vcpu);
 
        guest_enter_irqoff();
 
@@ -4510,7 +4532,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
        /* Tell lockdep that we're about to enable interrupts */
        trace_hardirqs_on();
 
-       trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr);
+       trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb);
        vcpu->arch.trap = trap;
 
        trace_hardirqs_off();
@@ -4521,8 +4543,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 
        set_irq_happened(trap);
 
-       kvmppc_set_host_core(pcpu);
-
        context_tracking_guest_exit();
        if (!vtime_accounting_enabled_this_cpu()) {
                local_irq_enable();
@@ -4538,9 +4558,10 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
        }
        vtime_account_guest_exit();
 
-       local_irq_enable();
+       vcpu->cpu = -1;
+       vcpu->arch.thread_cpu = -1;
 
-       cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
+       powerpc_local_irq_pmu_restore(flags);
 
        preempt_enable();
 
@@ -4550,7 +4571,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
         * by L2 and the L1 decrementer is provided in hdec_expires
         */
        if (kvmppc_core_pending_dec(vcpu) &&
-                       ((get_tb() < vcpu->arch.dec_expires) ||
+                       ((tb < kvmppc_dec_expires_host_tb(vcpu)) ||
                         (trap == BOOK3S_INTERRUPT_SYSCALL &&
                          kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
                kvmppc_core_dequeue_dec(vcpu);
@@ -4565,28 +4586,31 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
        }
        vcpu->arch.ret = r;
 
-       if (is_kvmppc_resume_guest(r) && vcpu->arch.ceded &&
-           !kvmppc_vcpu_woken(vcpu)) {
+       if (is_kvmppc_resume_guest(r) && !kvmppc_vcpu_check_block(vcpu)) {
                kvmppc_set_timer(vcpu);
-               while (vcpu->arch.ceded && !kvmppc_vcpu_woken(vcpu)) {
+
+               prepare_to_rcuwait(wait);
+               for (;;) {
+                       set_current_state(TASK_INTERRUPTIBLE);
                        if (signal_pending(current)) {
                                vcpu->stat.signal_exits++;
                                run->exit_reason = KVM_EXIT_INTR;
                                vcpu->arch.ret = -EINTR;
                                break;
                        }
-                       spin_lock(&vc->lock);
-                       kvmppc_vcore_blocked(vc);
-                       spin_unlock(&vc->lock);
+
+                       if (kvmppc_vcpu_check_block(vcpu))
+                               break;
+
+                       trace_kvmppc_vcore_blocked(vc, 0);
+                       schedule();
+                       trace_kvmppc_vcore_blocked(vc, 1);
                }
+               finish_rcuwait(wait);
        }
        vcpu->arch.ceded = 0;
 
-       vc->vcore_state = VCORE_INACTIVE;
-       trace_kvmppc_run_core(vc, 1);
-
  done:
-       kvmppc_remove_runnable(vc, vcpu);
        trace_kvmppc_run_vcpu_exit(vcpu);
 
        return vcpu->arch.ret;
@@ -4596,7 +4620,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
        run->exit_reason = KVM_EXIT_INTR;
        vcpu->arch.ret = -EINTR;
  out:
-       local_irq_enable();
+       powerpc_local_irq_pmu_restore(flags);
        preempt_enable();
        goto done;
 }
@@ -4606,23 +4630,25 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
        struct kvm_run *run = vcpu->run;
        int r;
        int srcu_idx;
-       unsigned long ebb_regs[3] = {}; /* shut up GCC */
-       unsigned long user_tar = 0;
-       unsigned int user_vrsave;
        struct kvm *kvm;
+       unsigned long msr;
 
        if (!vcpu->arch.sane) {
                run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                return -EINVAL;
        }
 
+       /* No need to go into the guest when all we'll do is come back out */
+       if (signal_pending(current)) {
+               run->exit_reason = KVM_EXIT_INTR;
+               return -EINTR;
+       }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        /*
         * Don't allow entry with a suspended transaction, because
         * the guest entry/exit code will lose it.
-        * If the guest has TM enabled, save away their TM-related SPRs
-        * (they will get restored by the TM unavailable interrupt).
         */
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
            (current->thread.regs->msr & MSR_TM)) {
                if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
@@ -4630,12 +4656,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
                        run->fail_entry.hardware_entry_failure_reason = 0;
                        return -EINVAL;
                }
-               /* Enable TM so we can read the TM SPRs */
-               mtmsr(mfmsr() | MSR_TM);
-               current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
-               current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
-               current->thread.tm_texasr = mfspr(SPRN_TEXASR);
-               current->thread.regs->msr &= ~MSR_TM;
        }
 #endif
 
@@ -4650,29 +4670,30 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 
        kvmppc_core_prepare_to_enter(vcpu);
 
-       /* No need to go into the guest when all we'll do is come back out */
-       if (signal_pending(current)) {
-               run->exit_reason = KVM_EXIT_INTR;
-               return -EINTR;
-       }
-
        kvm = vcpu->kvm;
        atomic_inc(&kvm->arch.vcpus_running);
        /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
        smp_mb();
 
-       flush_all_to_thread(current);
+       msr = 0;
+       if (IS_ENABLED(CONFIG_PPC_FPU))
+               msr |= MSR_FP;
+       if (cpu_has_feature(CPU_FTR_ALTIVEC))
+               msr |= MSR_VEC;
+       if (cpu_has_feature(CPU_FTR_VSX))
+               msr |= MSR_VSX;
+       if ((cpu_has_feature(CPU_FTR_TM) ||
+           cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+                       (vcpu->arch.hfscr & HFSCR_TM))
+               msr |= MSR_TM;
+       msr = msr_check_and_set(msr);
 
-       /* Save userspace EBB and other register values */
-       if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
-               ebb_regs[0] = mfspr(SPRN_EBBHR);
-               ebb_regs[1] = mfspr(SPRN_EBBRR);
-               ebb_regs[2] = mfspr(SPRN_BESCR);
-               user_tar = mfspr(SPRN_TAR);
-       }
-       user_vrsave = mfspr(SPRN_VRSAVE);
+       kvmppc_save_user_regs();
+
+       kvmppc_save_current_sprs();
 
-       vcpu->arch.waitp = &vcpu->arch.vcore->wait;
+       if (!cpu_has_feature(CPU_FTR_ARCH_300))
+               vcpu->arch.waitp = &vcpu->arch.vcore->wait;
        vcpu->arch.pgdir = kvm->mm->pgd;
        vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
@@ -4711,15 +4732,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
                }
        } while (is_kvmppc_resume_guest(r));
 
-       /* Restore userspace EBB and other register values */
-       if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
-               mtspr(SPRN_EBBHR, ebb_regs[0]);
-               mtspr(SPRN_EBBRR, ebb_regs[1]);
-               mtspr(SPRN_BESCR, ebb_regs[2]);
-               mtspr(SPRN_TAR, user_tar);
-       }
-       mtspr(SPRN_VRSAVE, user_vrsave);
-
        vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
        atomic_dec(&kvm->arch.vcpus_running);
 
@@ -4786,8 +4798,8 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
-       int i, r;
-       unsigned long n;
+       int r;
+       unsigned long n, i;
        unsigned long *buf, *p;
        struct kvm_vcpu *vcpu;
 
@@ -4854,37 +4866,38 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *slot)
 }
 
 static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
-                                       struct kvm_memory_slot *slot,
-                                       const struct kvm_userspace_memory_region *mem,
-                                       enum kvm_mr_change change)
+                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *new,
+                               enum kvm_mr_change change)
 {
-       unsigned long npages = mem->memory_size >> PAGE_SHIFT;
-
        if (change == KVM_MR_CREATE) {
-               slot->arch.rmap = vzalloc(array_size(npages,
-                                         sizeof(*slot->arch.rmap)));
-               if (!slot->arch.rmap)
+               unsigned long size = array_size(new->npages, sizeof(*new->arch.rmap));
+
+               if ((size >> PAGE_SHIFT) > totalram_pages())
+                       return -ENOMEM;
+
+               new->arch.rmap = vzalloc(size);
+               if (!new->arch.rmap)
                        return -ENOMEM;
+       } else if (change != KVM_MR_DELETE) {
+               new->arch.rmap = old->arch.rmap;
        }
 
        return 0;
 }
 
 static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
-                               const struct kvm_userspace_memory_region *mem,
-                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
-       unsigned long npages = mem->memory_size >> PAGE_SHIFT;
-
        /*
-        * If we are making a new memslot, it might make
+        * If we are creating or modifying a memslot, it might make
         * some address that was previously cached as emulated
         * MMIO be no longer emulated MMIO, so invalidate
         * all the caches of emulated MMIO translations.
         */
-       if (npages)
+       if (change != KVM_MR_DELETE)
                atomic64_inc(&kvm->arch.mmio_update);
 
        /*
@@ -5072,6 +5085,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
  */
 int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
 {
+       unsigned long lpcr, lpcr_mask;
+
        if (nesting_enabled(kvm))
                kvmhv_release_all_nested(kvm);
        kvmppc_rmap_reset(kvm);
@@ -5081,8 +5096,13 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
        kvm->arch.radix = 0;
        spin_unlock(&kvm->mmu_lock);
        kvmppc_free_radix(kvm);
-       kvmppc_update_lpcr(kvm, LPCR_VPM1,
-                          LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+
+       lpcr = LPCR_VPM1;
+       lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+       if (cpu_has_feature(CPU_FTR_ARCH_31))
+               lpcr_mask |= LPCR_HAIL;
+       kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
        return 0;
 }
 
@@ -5092,6 +5112,7 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
  */
 int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
 {
+       unsigned long lpcr, lpcr_mask;
        int err;
 
        err = kvmppc_init_vm_radix(kvm);
@@ -5103,8 +5124,17 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
        kvm->arch.radix = 1;
        spin_unlock(&kvm->mmu_lock);
        kvmppc_free_hpt(&kvm->arch.hpt);
-       kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
-                          LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+
+       lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+       lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+       if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+               lpcr_mask |= LPCR_HAIL;
+               if (cpu_has_feature(CPU_FTR_HVMODE) &&
+                               (kvm->arch.host_lpcr & LPCR_HAIL))
+                       lpcr |= LPCR_HAIL;
+       }
+       kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
        return 0;
 }
 
@@ -5126,6 +5156,9 @@ void kvmppc_alloc_host_rm_ops(void)
        int cpu, core;
        int size;
 
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               return;
+
        /* Not the first time here ? */
        if (kvmppc_host_rm_ops_hv != NULL)
                return;
@@ -5268,6 +5301,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
                kvm->arch.mmu_ready = 1;
                lpcr &= ~LPCR_VPM1;
                lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+               if (cpu_has_feature(CPU_FTR_HVMODE) &&
+                   cpu_has_feature(CPU_FTR_ARCH_31) &&
+                   (kvm->arch.host_lpcr & LPCR_HAIL))
+                       lpcr |= LPCR_HAIL;
                ret = kvmppc_init_vm_radix(kvm);
                if (ret) {
                        kvmppc_free_lpid(kvm->arch.lpid);
@@ -5861,7 +5898,7 @@ static int kvmhv_svm_off(struct kvm *kvm)
        int mmu_was_ready;
        int srcu_idx;
        int ret = 0;
-       int i;
+       unsigned long i;
 
        if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
                return ret;
@@ -5883,11 +5920,12 @@ static int kvmhv_svm_off(struct kvm *kvm)
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
                struct kvm_memory_slot *memslot;
                struct kvm_memslots *slots = __kvm_memslots(kvm, i);
+               int bkt;
 
                if (!slots)
                        continue;
 
-               kvm_for_each_memslot(memslot, slots) {
+               kvm_for_each_memslot(memslot, bkt, slots) {
                        kvmppc_uvmem_drop_pages(memslot, kvm, true);
                        uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
                }
@@ -6063,9 +6101,11 @@ static int kvmppc_book3s_init_hv(void)
        if (r)
                return r;
 
-       r = kvm_init_subcore_bitmap();
-       if (r)
-               return r;
+       if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+               r = kvm_init_subcore_bitmap();
+               if (r)
+                       return r;
+       }
 
        /*
         * We need a way of accessing the XICS interrupt controller,
diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h
new file mode 100644 (file)
index 0000000..6b7f07d
--- /dev/null
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+       unsigned long iamr;
+       unsigned long amr;
+
+       unsigned int pmc1;
+       unsigned int pmc2;
+       unsigned int pmc3;
+       unsigned int pmc4;
+       unsigned int pmc5;
+       unsigned int pmc6;
+       unsigned long mmcr0;
+       unsigned long mmcr1;
+       unsigned long mmcr2;
+       unsigned long mmcr3;
+       unsigned long mmcra;
+       unsigned long siar;
+       unsigned long sier1;
+       unsigned long sier2;
+       unsigned long sier3;
+       unsigned long sdar;
+};
+
+static inline bool nesting_enabled(struct kvm *kvm)
+{
+       return kvm->arch.nested_enable && kvm_is_radix(kvm);
+}
+
+bool load_vcpu_state(struct kvm_vcpu *vcpu,
+                          struct p9_host_os_sprs *host_os_sprs);
+void store_vcpu_state(struct kvm_vcpu *vcpu);
+void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs);
+void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+                                   struct p9_host_os_sprs *host_os_sprs);
+void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+                           struct p9_host_os_sprs *host_os_sprs);
+void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+                           struct p9_host_os_sprs *host_os_sprs);
index fcf4760..7d6d913 100644 (file)
@@ -649,6 +649,8 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
        int ext;
        unsigned long lpcr;
 
+       WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
        /* Insert EXTERNAL bit into LPCR at the MER bit position */
        ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
        lpcr = mfspr(SPRN_LPCR);
@@ -682,57 +684,23 @@ static void flush_guest_tlb(struct kvm *kvm)
        unsigned long rb, set;
 
        rb = PPC_BIT(52);       /* IS = 2 */
-       if (kvm_is_radix(kvm)) {
-               /* R=1 PRS=1 RIC=2 */
+       for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+               /* R=0 PRS=0 RIC=0 */
                asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
-                            : : "r" (rb), "i" (1), "i" (1), "i" (2),
+                            : : "r" (rb), "i" (0), "i" (0), "i" (0),
                               "r" (0) : "memory");
-               for (set = 1; set < kvm->arch.tlb_sets; ++set) {
-                       rb += PPC_BIT(51);      /* increment set number */
-                       /* R=1 PRS=1 RIC=0 */
-                       asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
-                                    : : "r" (rb), "i" (1), "i" (1), "i" (0),
-                                      "r" (0) : "memory");
-               }
-               asm volatile("ptesync": : :"memory");
-               asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
-       } else {
-               for (set = 0; set < kvm->arch.tlb_sets; ++set) {
-                       /* R=0 PRS=0 RIC=0 */
-                       asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
-                                    : : "r" (rb), "i" (0), "i" (0), "i" (0),
-                                      "r" (0) : "memory");
-                       rb += PPC_BIT(51);      /* increment set number */
-               }
-               asm volatile("ptesync": : :"memory");
-               asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+               rb += PPC_BIT(51);      /* increment set number */
        }
+       asm volatile("ptesync": : :"memory");
 }
 
-void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
-                                struct kvm_nested_guest *nested)
+void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu)
 {
-       cpumask_t *need_tlb_flush;
-
-       /*
-        * On POWER9, individual threads can come in here, but the
-        * TLB is shared between the 4 threads in a core, hence
-        * invalidating on one thread invalidates for all.
-        * Thus we make all 4 threads use the same bit.
-        */
-       if (cpu_has_feature(CPU_FTR_ARCH_300))
-               pcpu = cpu_first_tlb_thread_sibling(pcpu);
-
-       if (nested)
-               need_tlb_flush = &nested->need_tlb_flush;
-       else
-               need_tlb_flush = &kvm->arch.need_tlb_flush;
-
-       if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
+       if (cpumask_test_cpu(pcpu, &kvm->arch.need_tlb_flush)) {
                flush_guest_tlb(kvm);
 
                /* Clear the bit after the TLB flush */
-               cpumask_clear_cpu(pcpu, need_tlb_flush);
+               cpumask_clear_cpu(pcpu, &kvm->arch.need_tlb_flush);
        }
 }
 EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
index 9af6604..1ec50c6 100644 (file)
@@ -20,10 +20,15 @@ void wait_for_subcore_guest_exit(void)
 
        /*
         * NULL bitmap pointer indicates that KVM module hasn't
-        * been loaded yet and hence no guests are running.
+        * been loaded yet and hence no guests are running, or running
+        * on POWER9 or newer CPU.
+        *
         * If no KVM is in use, no need to co-ordinate among threads
         * as all of them will always be in host and no one is going
         * to modify TB other than the opal hmi handler.
+        *
+        * POWER9 and newer don't need this synchronisation.
+        *
         * Hence, just return from here.
         */
        if (!local_paca->sibling_subcore_state)
index 4444f83..59d89e4 100644 (file)
@@ -104,7 +104,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        mtlr    r0
        blr
 
-_GLOBAL(kvmhv_save_host_pmu)
+/*
+ * void kvmhv_save_host_pmu(void)
+ */
+kvmhv_save_host_pmu:
 BEGIN_FTR_SECTION
        /* Work around P8 PMAE bug */
        li      r3, -1
@@ -138,14 +141,6 @@ BEGIN_FTR_SECTION
        std     r8, HSTATE_MMCR2(r13)
        std     r9, HSTATE_SIER(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-       mfspr   r5, SPRN_MMCR3
-       mfspr   r6, SPRN_SIER2
-       mfspr   r7, SPRN_SIER3
-       std     r5, HSTATE_MMCR3(r13)
-       std     r6, HSTATE_SIER2(r13)
-       std     r7, HSTATE_SIER3(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
        mfspr   r3, SPRN_PMC1
        mfspr   r5, SPRN_PMC2
        mfspr   r6, SPRN_PMC3
index ed8a2c9..8f8daae 100644 (file)
@@ -358,6 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        /* convert TB values/offsets to host (L0) values */
        hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
        vc->tb_offset += l2_hv.tb_offset;
+       vcpu->arch.dec_expires += l2_hv.tb_offset;
 
        /* set L1 state to L2 state */
        vcpu->arch.nested = l2;
@@ -374,11 +375,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        vcpu->arch.ret = RESUME_GUEST;
        vcpu->arch.trap = 0;
        do {
-               if (mftb() >= hdec_exp) {
-                       vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
-                       r = RESUME_HOST;
-                       break;
-               }
                r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
        } while (is_kvmppc_resume_guest(r));
 
@@ -399,6 +395,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        if (l2_regs.msr & MSR_TS_MASK)
                vcpu->arch.shregs.msr |= MSR_TS_S;
        vc->tb_offset = saved_l1_hv.tb_offset;
+       /* XXX: is this always the same delta as saved_l1_hv.tb_offset? */
+       vcpu->arch.dec_expires -= l2_hv.tb_offset;
        restore_hv_regs(vcpu, &saved_l1_hv);
        vcpu->arch.purr += delta_purr;
        vcpu->arch.spurr += delta_spurr;
@@ -582,7 +580,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
        if (eaddr & (0xFFFUL << 52))
                return H_PARAMETER;
 
-       buf = kzalloc(n, GFP_KERNEL);
+       buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
        if (!buf)
                return H_NO_MEM;
 
@@ -749,7 +747,7 @@ void kvmhv_release_all_nested(struct kvm *kvm)
        struct kvm_nested_guest *gp;
        struct kvm_nested_guest *freelist = NULL;
        struct kvm_memory_slot *memslot;
-       int srcu_idx;
+       int srcu_idx, bkt;
 
        spin_lock(&kvm->mmu_lock);
        for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
@@ -770,7 +768,7 @@ void kvmhv_release_all_nested(struct kvm *kvm)
        }
 
        srcu_idx = srcu_read_lock(&kvm->srcu);
-       kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+       kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
                kvmhv_free_memslot_nest_rmap(memslot);
        srcu_read_unlock(&kvm->srcu, srcu_idx);
 }
index 961b3d7..a28e5b3 100644 (file)
@@ -4,8 +4,439 @@
 #include <asm/asm-prototypes.h>
 #include <asm/dbell.h>
 #include <asm/kvm_ppc.h>
+#include <asm/pmc.h>
 #include <asm/ppc-opcode.h>
 
+#include "book3s_hv.h"
+
+static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
+{
+       if (!(mmcr0 & MMCR0_FC))
+               goto do_freeze;
+       if (mmcra & MMCRA_SAMPLE_ENABLE)
+               goto do_freeze;
+       if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+               if (!(mmcr0 & MMCR0_PMCCEXT))
+                       goto do_freeze;
+               if (!(mmcra & MMCRA_BHRB_DISABLE))
+                       goto do_freeze;
+       }
+       return;
+
+do_freeze:
+       mmcr0 = MMCR0_FC;
+       mmcra = 0;
+       if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+               mmcr0 |= MMCR0_PMCCEXT;
+               mmcra = MMCRA_BHRB_DISABLE;
+       }
+
+       mtspr(SPRN_MMCR0, mmcr0);
+       mtspr(SPRN_MMCRA, mmcra);
+       isync();
+}
+
+void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+                        struct p9_host_os_sprs *host_os_sprs)
+{
+       struct lppaca *lp;
+       int load_pmu = 1;
+
+       lp = vcpu->arch.vpa.pinned_addr;
+       if (lp)
+               load_pmu = lp->pmcregs_in_use;
+
+       /* Save host */
+       if (ppc_get_pmu_inuse()) {
+               /*
+                * It might be better to put PMU handling (at least for the
+                * host) in the perf subsystem because it knows more about what
+                * is being used.
+                */
+
+               /* POWER9, POWER10 do not implement HPMC or SPMC */
+
+               host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
+               host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
+
+               freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
+
+               host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
+               host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
+               host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
+               host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
+               host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
+               host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
+               host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
+               host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
+               host_os_sprs->sdar = mfspr(SPRN_SDAR);
+               host_os_sprs->siar = mfspr(SPRN_SIAR);
+               host_os_sprs->sier1 = mfspr(SPRN_SIER);
+
+               if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+                       host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
+                       host_os_sprs->sier2 = mfspr(SPRN_SIER2);
+                       host_os_sprs->sier3 = mfspr(SPRN_SIER3);
+               }
+       }
+
+#ifdef CONFIG_PPC_PSERIES
+       /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */
+       if (kvmhv_on_pseries()) {
+               barrier();
+               get_lppaca()->pmcregs_in_use = load_pmu;
+               barrier();
+       }
+#endif
+
+       /*
+        * Load guest. If the VPA said the PMCs are not in use but the guest
+        * tried to access them anyway, HFSCR[PM] will be set by the HFAC
+        * fault so we can make forward progress.
+        */
+       if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) {
+               mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
+               mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
+               mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
+               mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
+               mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
+               mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
+               mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
+               mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
+               mtspr(SPRN_SDAR, vcpu->arch.sdar);
+               mtspr(SPRN_SIAR, vcpu->arch.siar);
+               mtspr(SPRN_SIER, vcpu->arch.sier[0]);
+
+               if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+                       mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]);
+                       mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
+                       mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
+               }
+
+               /* Set MMCRA then MMCR0 last */
+               mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
+               mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
+               /* No isync necessary because we're starting counters */
+
+               if (!vcpu->arch.nested &&
+                               (vcpu->arch.hfscr_permitted & HFSCR_PM))
+                       vcpu->arch.hfscr |= HFSCR_PM;
+       }
+}
+EXPORT_SYMBOL_GPL(switch_pmu_to_guest);
+
+void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+                       struct p9_host_os_sprs *host_os_sprs)
+{
+       struct lppaca *lp;
+       int save_pmu = 1;
+
+       lp = vcpu->arch.vpa.pinned_addr;
+       if (lp)
+               save_pmu = lp->pmcregs_in_use;
+       if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) {
+               /*
+                * Save pmu if this guest is capable of running nested guests.
+                * This is option is for old L1s that do not set their
+                * lppaca->pmcregs_in_use properly when entering their L2.
+                */
+               save_pmu |= nesting_enabled(vcpu->kvm);
+       }
+
+       if (save_pmu) {
+               vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
+               vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
+
+               freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
+
+               vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
+               vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
+               vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
+               vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
+               vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
+               vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
+               vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
+               vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
+               vcpu->arch.sdar = mfspr(SPRN_SDAR);
+               vcpu->arch.siar = mfspr(SPRN_SIAR);
+               vcpu->arch.sier[0] = mfspr(SPRN_SIER);
+
+               if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+                       vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
+                       vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
+                       vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
+               }
+
+       } else if (vcpu->arch.hfscr & HFSCR_PM) {
+               /*
+                * The guest accessed PMC SPRs without specifying they should
+                * be preserved, or it cleared pmcregs_in_use after the last
+                * access. Just ensure they are frozen.
+                */
+               freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
+
+               /*
+                * Demand-fault PMU register access in the guest.
+                *
+                * This is used to grab the guest's VPA pmcregs_in_use value
+                * and reflect it into the host's VPA in the case of a nested
+                * hypervisor.
+                *
+                * It also avoids having to zero-out SPRs after each guest
+                * exit to avoid side-channels when.
+                *
+                * This is cleared here when we exit the guest, so later HFSCR
+                * interrupt handling can add it back to run the guest with
+                * PM enabled next time.
+                */
+               if (!vcpu->arch.nested)
+                       vcpu->arch.hfscr &= ~HFSCR_PM;
+       } /* otherwise the PMU should still be frozen */
+
+#ifdef CONFIG_PPC_PSERIES
+       if (kvmhv_on_pseries()) {
+               barrier();
+               get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+               barrier();
+       }
+#endif
+
+       if (ppc_get_pmu_inuse()) {
+               mtspr(SPRN_PMC1, host_os_sprs->pmc1);
+               mtspr(SPRN_PMC2, host_os_sprs->pmc2);
+               mtspr(SPRN_PMC3, host_os_sprs->pmc3);
+               mtspr(SPRN_PMC4, host_os_sprs->pmc4);
+               mtspr(SPRN_PMC5, host_os_sprs->pmc5);
+               mtspr(SPRN_PMC6, host_os_sprs->pmc6);
+               mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
+               mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
+               mtspr(SPRN_SDAR, host_os_sprs->sdar);
+               mtspr(SPRN_SIAR, host_os_sprs->siar);
+               mtspr(SPRN_SIER, host_os_sprs->sier1);
+
+               if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+                       mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
+                       mtspr(SPRN_SIER2, host_os_sprs->sier2);
+                       mtspr(SPRN_SIER3, host_os_sprs->sier3);
+               }
+
+               /* Set MMCRA then MMCR0 last */
+               mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
+               mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
+               isync();
+       }
+}
+EXPORT_SYMBOL_GPL(switch_pmu_to_host);
+
+static void load_spr_state(struct kvm_vcpu *vcpu,
+                               struct p9_host_os_sprs *host_os_sprs)
+{
+       /* TAR is very fast */
+       mtspr(SPRN_TAR, vcpu->arch.tar);
+
+#ifdef CONFIG_ALTIVEC
+       if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+           current->thread.vrsave != vcpu->arch.vrsave)
+               mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
+
+       if (vcpu->arch.hfscr & HFSCR_EBB) {
+               if (current->thread.ebbhr != vcpu->arch.ebbhr)
+                       mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+               if (current->thread.ebbrr != vcpu->arch.ebbrr)
+                       mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+               if (current->thread.bescr != vcpu->arch.bescr)
+                       mtspr(SPRN_BESCR, vcpu->arch.bescr);
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+                       current->thread.tidr != vcpu->arch.tid)
+               mtspr(SPRN_TIDR, vcpu->arch.tid);
+       if (host_os_sprs->iamr != vcpu->arch.iamr)
+               mtspr(SPRN_IAMR, vcpu->arch.iamr);
+       if (host_os_sprs->amr != vcpu->arch.amr)
+               mtspr(SPRN_AMR, vcpu->arch.amr);
+       if (vcpu->arch.uamor != 0)
+               mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+       if (current->thread.fscr != vcpu->arch.fscr)
+               mtspr(SPRN_FSCR, vcpu->arch.fscr);
+       if (current->thread.dscr != vcpu->arch.dscr)
+               mtspr(SPRN_DSCR, vcpu->arch.dscr);
+       if (vcpu->arch.pspb != 0)
+               mtspr(SPRN_PSPB, vcpu->arch.pspb);
+
+       /*
+        * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+        * clear (or hstate set appropriately to catch those registers
+        * being clobbered if we take a MCE or SRESET), so those are done
+        * later.
+        */
+
+       if (!(vcpu->arch.ctrl & 1))
+               mtspr(SPRN_CTRLT, 0);
+}
+
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.tar = mfspr(SPRN_TAR);
+
+#ifdef CONFIG_ALTIVEC
+       if (cpu_has_feature(CPU_FTR_ALTIVEC))
+               vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+#endif
+
+       if (vcpu->arch.hfscr & HFSCR_EBB) {
+               vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+               vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+               vcpu->arch.bescr = mfspr(SPRN_BESCR);
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TIDR))
+               vcpu->arch.tid = mfspr(SPRN_TIDR);
+       vcpu->arch.iamr = mfspr(SPRN_IAMR);
+       vcpu->arch.amr = mfspr(SPRN_AMR);
+       vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+       vcpu->arch.fscr = mfspr(SPRN_FSCR);
+       vcpu->arch.dscr = mfspr(SPRN_DSCR);
+       vcpu->arch.pspb = mfspr(SPRN_PSPB);
+
+       vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+}
+
+/* Returns true if current MSR and/or guest MSR may have changed */
+bool load_vcpu_state(struct kvm_vcpu *vcpu,
+                    struct p9_host_os_sprs *host_os_sprs)
+{
+       bool ret = false;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       if (cpu_has_feature(CPU_FTR_TM) ||
+           cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+               unsigned long guest_msr = vcpu->arch.shregs.msr;
+               if (MSR_TM_ACTIVE(guest_msr)) {
+                       kvmppc_restore_tm_hv(vcpu, guest_msr, true);
+                       ret = true;
+               } else if (vcpu->arch.hfscr & HFSCR_TM) {
+                       mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+                       mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
+                       mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+               }
+       }
+#endif
+
+       load_spr_state(vcpu, host_os_sprs);
+
+       load_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+       load_vr_state(&vcpu->arch.vr);
+#endif
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(load_vcpu_state);
+
+void store_vcpu_state(struct kvm_vcpu *vcpu)
+{
+       store_spr_state(vcpu);
+
+       store_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+       store_vr_state(&vcpu->arch.vr);
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       if (cpu_has_feature(CPU_FTR_TM) ||
+           cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+               unsigned long guest_msr = vcpu->arch.shregs.msr;
+               if (MSR_TM_ACTIVE(guest_msr)) {
+                       kvmppc_save_tm_hv(vcpu, guest_msr, true);
+               } else if (vcpu->arch.hfscr & HFSCR_TM) {
+                       vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+                       vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
+                       vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
+
+                       if (!vcpu->arch.nested) {
+                               vcpu->arch.load_tm++; /* see load_ebb comment */
+                               if (!vcpu->arch.load_tm)
+                                       vcpu->arch.hfscr &= ~HFSCR_TM;
+                       }
+               }
+       }
+#endif
+}
+EXPORT_SYMBOL_GPL(store_vcpu_state);
+
+void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+       host_os_sprs->iamr = mfspr(SPRN_IAMR);
+       host_os_sprs->amr = mfspr(SPRN_AMR);
+}
+EXPORT_SYMBOL_GPL(save_p9_host_os_sprs);
+
+/* vcpu guest regs must already be saved */
+void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+                            struct p9_host_os_sprs *host_os_sprs)
+{
+       /*
+        * current->thread.xxx registers must all be restored to host
+        * values before a potential context switch, othrewise the context
+        * switch itself will overwrite current->thread.xxx with the values
+        * from the guest SPRs.
+        */
+
+       mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
+
+       if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+                       current->thread.tidr != vcpu->arch.tid)
+               mtspr(SPRN_TIDR, current->thread.tidr);
+       if (host_os_sprs->iamr != vcpu->arch.iamr)
+               mtspr(SPRN_IAMR, host_os_sprs->iamr);
+       if (vcpu->arch.uamor != 0)
+               mtspr(SPRN_UAMOR, 0);
+       if (host_os_sprs->amr != vcpu->arch.amr)
+               mtspr(SPRN_AMR, host_os_sprs->amr);
+       if (current->thread.fscr != vcpu->arch.fscr)
+               mtspr(SPRN_FSCR, current->thread.fscr);
+       if (current->thread.dscr != vcpu->arch.dscr)
+               mtspr(SPRN_DSCR, current->thread.dscr);
+       if (vcpu->arch.pspb != 0)
+               mtspr(SPRN_PSPB, 0);
+
+       /* Save guest CTRL register, set runlatch to 1 */
+       if (!(vcpu->arch.ctrl & 1))
+               mtspr(SPRN_CTRLT, 1);
+
+#ifdef CONFIG_ALTIVEC
+       if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+           vcpu->arch.vrsave != current->thread.vrsave)
+               mtspr(SPRN_VRSAVE, current->thread.vrsave);
+#endif
+       if (vcpu->arch.hfscr & HFSCR_EBB) {
+               if (vcpu->arch.bescr != current->thread.bescr)
+                       mtspr(SPRN_BESCR, current->thread.bescr);
+               if (vcpu->arch.ebbhr != current->thread.ebbhr)
+                       mtspr(SPRN_EBBHR, current->thread.ebbhr);
+               if (vcpu->arch.ebbrr != current->thread.ebbrr)
+                       mtspr(SPRN_EBBRR, current->thread.ebbrr);
+
+               if (!vcpu->arch.nested) {
+                       /*
+                        * This is like load_fp in context switching, turn off
+                        * the facility after it wraps the u8 to try avoiding
+                        * saving and restoring the registers each partition
+                        * switch.
+                        */
+                       vcpu->arch.load_ebb++;
+                       if (!vcpu->arch.load_ebb)
+                               vcpu->arch.hfscr &= ~HFSCR_EBB;
+               }
+       }
+
+       if (vcpu->arch.tar != current->thread.tar)
+               mtspr(SPRN_TAR, current->thread.tar);
+}
+EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs);
+
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
 static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
 {
@@ -56,10 +487,22 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator
 #define accumulate_time(vcpu, next) do {} while (0)
 #endif
 
-static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
+static inline u64 mfslbv(unsigned int idx)
 {
-       asm volatile("slbmfev  %0,%1" : "=r" (*slbev) : "r" (idx));
-       asm volatile("slbmfee  %0,%1" : "=r" (*slbee) : "r" (idx));
+       u64 slbev;
+
+       asm volatile("slbmfev  %0,%1" : "=r" (slbev) : "r" (idx));
+
+       return slbev;
+}
+
+static inline u64 mfslbe(unsigned int idx)
+{
+       u64 slbee;
+
+       asm volatile("slbmfee  %0,%1" : "=r" (slbee) : "r" (idx));
+
+       return slbee;
 }
 
 static inline void mtslb(u64 slbee, u64 slbev)
@@ -100,17 +543,19 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6
        lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
 
        /*
-        * All the isync()s are overkill but trivially follow the ISA
-        * requirements. Some can likely be replaced with justification
-        * comment for why they are not needed.
+        * Prior memory accesses to host PID Q3 must be completed before we
+        * start switching, and stores must be drained to avoid not-my-LPAR
+        * logic (see switch_mmu_to_host).
         */
+       asm volatile("hwsync" ::: "memory");
        isync();
        mtspr(SPRN_LPID, lpid);
-       isync();
        mtspr(SPRN_LPCR, lpcr);
-       isync();
        mtspr(SPRN_PID, vcpu->arch.pid);
-       isync();
+       /*
+        * isync not required here because we are HRFID'ing to guest before
+        * any guest context access, which is context synchronising.
+        */
 }
 
 static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
@@ -120,25 +565,41 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64
 
        lpid = kvm->arch.lpid;
 
+       /*
+        * See switch_mmu_to_guest_radix. ptesync should not be required here
+        * even if the host is in HPT mode because speculative accesses would
+        * not cause RC updates (we are in real mode).
+        */
+       asm volatile("hwsync" ::: "memory");
+       isync();
        mtspr(SPRN_LPID, lpid);
        mtspr(SPRN_LPCR, lpcr);
        mtspr(SPRN_PID, vcpu->arch.pid);
 
        for (i = 0; i < vcpu->arch.slb_max; i++)
                mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
-
-       isync();
+       /*
+        * isync not required here, see switch_mmu_to_guest_radix.
+        */
 }
 
 static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
 {
+       /*
+        * The guest has exited, so guest MMU context is no longer being
+        * non-speculatively accessed, but a hwsync is needed before the
+        * mtLPIDR / mtPIDR switch, in order to ensure all stores are drained,
+        * so the not-my-LPAR tlbie logic does not overlook them.
+        */
+       asm volatile("hwsync" ::: "memory");
        isync();
        mtspr(SPRN_PID, pid);
-       isync();
        mtspr(SPRN_LPID, kvm->arch.host_lpid);
-       isync();
        mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
-       isync();
+       /*
+        * isync is not required after the switch, because mtmsrd with L=0
+        * is performed after this switch, which is context synchronising.
+        */
 
        if (!radix_enabled())
                slb_restore_bolted_realmode();
@@ -171,8 +632,10 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
                 */
                for (i = 0; i < vcpu->arch.slb_nr; i++) {
                        u64 slbee, slbev;
-                       mfslb(i, &slbee, &slbev);
+
+                       slbee = mfslbe(i);
                        if (slbee & SLB_ESID_V) {
+                               slbev = mfslbv(i);
                                vcpu->arch.slb[nr].orige = slbee | i;
                                vcpu->arch.slb[nr].origv = slbev;
                                nr++;
@@ -183,15 +646,128 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
        }
 }
 
-int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
+static void flush_guest_tlb(struct kvm *kvm)
 {
+       unsigned long rb, set;
+
+       rb = PPC_BIT(52);       /* IS = 2 */
+       if (kvm_is_radix(kvm)) {
+               /* R=1 PRS=1 RIC=2 */
+               asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+                            : : "r" (rb), "i" (1), "i" (1), "i" (2),
+                              "r" (0) : "memory");
+               for (set = 1; set < kvm->arch.tlb_sets; ++set) {
+                       rb += PPC_BIT(51);      /* increment set number */
+                       /* R=1 PRS=1 RIC=0 */
+                       asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+                                    : : "r" (rb), "i" (1), "i" (1), "i" (0),
+                                      "r" (0) : "memory");
+               }
+               asm volatile("ptesync": : :"memory");
+               // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+               asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
+       } else {
+               for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+                       /* R=0 PRS=0 RIC=0 */
+                       asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+                                    : : "r" (rb), "i" (0), "i" (0), "i" (0),
+                                      "r" (0) : "memory");
+                       rb += PPC_BIT(51);      /* increment set number */
+               }
+               asm volatile("ptesync": : :"memory");
+               // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+               asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+       }
+}
+
+static void check_need_tlb_flush(struct kvm *kvm, int pcpu,
+                                struct kvm_nested_guest *nested)
+{
+       cpumask_t *need_tlb_flush;
+       bool all_set = true;
+       int i;
+
+       if (nested)
+               need_tlb_flush = &nested->need_tlb_flush;
+       else
+               need_tlb_flush = &kvm->arch.need_tlb_flush;
+
+       if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush)))
+               return;
+
+       /*
+        * Individual threads can come in here, but the TLB is shared between
+        * the 4 threads in a core, hence invalidating on one thread
+        * invalidates for all, so only invalidate the first time (if all bits
+        * were set.  The others must still execute a ptesync.
+        *
+        * If a race occurs and two threads do the TLB flush, that is not a
+        * problem, just sub-optimal.
+        */
+       for (i = cpu_first_tlb_thread_sibling(pcpu);
+                       i <= cpu_last_tlb_thread_sibling(pcpu);
+                       i += cpu_tlb_thread_sibling_step()) {
+               if (!cpumask_test_cpu(i, need_tlb_flush)) {
+                       all_set = false;
+                       break;
+               }
+       }
+       if (all_set)
+               flush_guest_tlb(kvm);
+       else
+               asm volatile("ptesync" ::: "memory");
+
+       /* Clear the bit after the TLB flush */
+       cpumask_clear_cpu(pcpu, need_tlb_flush);
+}
+
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr)
+{
+       unsigned long msr_needed = 0;
+
+       msr &= ~MSR_EE;
+
+       /* MSR bits may have been cleared by context switch so must recheck */
+       if (IS_ENABLED(CONFIG_PPC_FPU))
+               msr_needed |= MSR_FP;
+       if (cpu_has_feature(CPU_FTR_ALTIVEC))
+               msr_needed |= MSR_VEC;
+       if (cpu_has_feature(CPU_FTR_VSX))
+               msr_needed |= MSR_VSX;
+       if ((cpu_has_feature(CPU_FTR_TM) ||
+           cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+                       (vcpu->arch.hfscr & HFSCR_TM))
+               msr_needed |= MSR_TM;
+
+       /*
+        * This could be combined with MSR[RI] clearing, but that expands
+        * the unrecoverable window. It would be better to cover unrecoverable
+        * with KVM bad interrupt handling rather than use MSR[RI] at all.
+        *
+        * Much more difficult and less worthwhile to combine with IR/DR
+        * disable.
+        */
+       if ((msr & msr_needed) != msr_needed) {
+               msr |= msr_needed;
+               __mtmsrd(msr, 0);
+       } else {
+               __hard_irq_disable();
+       }
+       local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+       return msr;
+}
+EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities);
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
+{
+       struct p9_host_os_sprs host_os_sprs;
        struct kvm *kvm = vcpu->kvm;
        struct kvm_nested_guest *nested = vcpu->arch.nested;
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
-       s64 hdec;
-       u64 tb, purr, spurr;
+       s64 hdec, dec;
+       u64 purr, spurr;
        u64 *exsave;
-       bool ri_set;
        int trap;
        unsigned long msr;
        unsigned long host_hfscr;
@@ -199,11 +775,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
        unsigned long host_dawr0;
        unsigned long host_dawrx0;
        unsigned long host_psscr;
+       unsigned long host_hpsscr;
        unsigned long host_pidr;
        unsigned long host_dawr1;
        unsigned long host_dawrx1;
+       unsigned long dpdes;
 
-       hdec = time_limit - mftb();
+       hdec = time_limit - *tb;
        if (hdec < 0)
                return BOOK3S_INTERRUPT_HV_DECREMENTER;
 
@@ -214,51 +792,84 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
        vcpu->arch.ceded = 0;
 
-       if (vc->tb_offset) {
-               u64 new_tb = mftb() + vc->tb_offset;
-               mtspr(SPRN_TBU40, new_tb);
-               tb = mftb();
-               if ((tb & 0xffffff) < (new_tb & 0xffffff))
-                       mtspr(SPRN_TBU40, new_tb + 0x1000000);
-               vc->tb_offset_applied = vc->tb_offset;
-       }
-
-       msr = mfmsr();
+       /* Save MSR for restore, with EE clear. */
+       msr = mfmsr() & ~MSR_EE;
 
        host_hfscr = mfspr(SPRN_HFSCR);
        host_ciabr = mfspr(SPRN_CIABR);
-       host_dawr0 = mfspr(SPRN_DAWR0);
-       host_dawrx0 = mfspr(SPRN_DAWRX0);
-       host_psscr = mfspr(SPRN_PSSCR);
+       host_psscr = mfspr(SPRN_PSSCR_PR);
+       if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+               host_hpsscr = mfspr(SPRN_PSSCR);
        host_pidr = mfspr(SPRN_PID);
-       if (cpu_has_feature(CPU_FTR_DAWR1)) {
-               host_dawr1 = mfspr(SPRN_DAWR1);
-               host_dawrx1 = mfspr(SPRN_DAWRX1);
-       }
 
-       if (vc->pcr)
-               mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
-       mtspr(SPRN_DPDES, vc->dpdes);
-       mtspr(SPRN_VTB, vc->vtb);
+       if (dawr_enabled()) {
+               host_dawr0 = mfspr(SPRN_DAWR0);
+               host_dawrx0 = mfspr(SPRN_DAWRX0);
+               if (cpu_has_feature(CPU_FTR_DAWR1)) {
+                       host_dawr1 = mfspr(SPRN_DAWR1);
+                       host_dawrx1 = mfspr(SPRN_DAWRX1);
+               }
+       }
 
        local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
        local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+
+       save_p9_host_os_sprs(&host_os_sprs);
+
+       msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+       if (lazy_irq_pending()) {
+               trap = 0;
+               goto out;
+       }
+
+       if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
+               msr = mfmsr(); /* MSR may have been updated */
+
+       if (vc->tb_offset) {
+               u64 new_tb = *tb + vc->tb_offset;
+               mtspr(SPRN_TBU40, new_tb);
+               if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+                       new_tb += 0x1000000;
+                       mtspr(SPRN_TBU40, new_tb);
+               }
+               *tb = new_tb;
+               vc->tb_offset_applied = vc->tb_offset;
+       }
+
+       mtspr(SPRN_VTB, vc->vtb);
        mtspr(SPRN_PURR, vcpu->arch.purr);
        mtspr(SPRN_SPURR, vcpu->arch.spurr);
 
+       if (vc->pcr)
+               mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+       if (vcpu->arch.doorbell_request) {
+               vcpu->arch.doorbell_request = 0;
+               mtspr(SPRN_DPDES, 1);
+       }
+
        if (dawr_enabled()) {
-               mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
-               mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+               if (vcpu->arch.dawr0 != host_dawr0)
+                       mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+               if (vcpu->arch.dawrx0 != host_dawrx0)
+                       mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
                if (cpu_has_feature(CPU_FTR_DAWR1)) {
-                       mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
-                       mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+                       if (vcpu->arch.dawr1 != host_dawr1)
+                               mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+                       if (vcpu->arch.dawrx1 != host_dawrx1)
+                               mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
                }
        }
-       mtspr(SPRN_CIABR, vcpu->arch.ciabr);
-       mtspr(SPRN_IC, vcpu->arch.ic);
+       if (vcpu->arch.ciabr != host_ciabr)
+               mtspr(SPRN_CIABR, vcpu->arch.ciabr);
 
-       mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
-             (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+       if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+               mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+                     (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+       } else {
+               if (vcpu->arch.psscr != host_psscr)
+                       mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+       }
 
        mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
 
@@ -276,18 +887,34 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
         * HDSI which should correctly update the HDSISR the second time HDSI
         * entry.
         *
-        * Just do this on all p9 processors for now.
+        * The "radix prefetch bug" test can be used to test for this bug, as
+        * it also exists fo DD2.1 and below.
         */
-       mtspr(SPRN_HDSISR, HDSISR_CANARY);
+       if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+               mtspr(SPRN_HDSISR, HDSISR_CANARY);
 
        mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
        mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
        mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
        mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
 
-       mtspr(SPRN_AMOR, ~0UL);
+       /*
+        * It might be preferable to load_vcpu_state here, in order to get the
+        * GPR/FP register loads executing in parallel with the previous mtSPR
+        * instructions, but for now that can't be done because the TM handling
+        * in load_vcpu_state can change some SPRs and vcpu state (nip, msr).
+        * But TM could be split out if this would be a significant benefit.
+        */
 
-       local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
+       /*
+        * MSR[RI] does not need to be cleared (and is not, for radix guests
+        * with no prefetch bug), because in_guest is set. If we take a SRESET
+        * or MCE with in_guest set but still in HV mode, then
+        * kvmppc_p9_bad_interrupt handles the interrupt, which effectively
+        * clears MSR[RI] and doesn't return.
+        */
+       WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9);
+       barrier(); /* Open in_guest critical section */
 
        /*
         * Hash host, hash guest, or radix guest with prefetch bug, all have
@@ -299,17 +926,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
        save_clear_host_mmu(kvm);
 
-       if (kvm_is_radix(kvm)) {
+       if (kvm_is_radix(kvm))
                switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
-               if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
-                       __mtmsrd(0, 1); /* clear RI */
-
-       } else {
+       else
                switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
-       }
 
        /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
-       kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+       check_need_tlb_flush(kvm, vc->pcpu, nested);
 
        /*
         * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
@@ -317,6 +940,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
         */
        mtspr(SPRN_HDEC, hdec);
 
+       mtspr(SPRN_DEC, vcpu->arch.dec_expires - *tb);
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 tm_return_to_guest:
 #endif
@@ -327,7 +952,9 @@ tm_return_to_guest:
 
        accumulate_time(vcpu, &vcpu->arch.guest_time);
 
+       switch_pmu_to_guest(vcpu, &host_os_sprs);
        kvmppc_p9_enter_guest(vcpu);
+       switch_pmu_to_host(vcpu, &host_os_sprs);
 
        accumulate_time(vcpu, &vcpu->arch.rm_intr);
 
@@ -340,36 +967,27 @@ tm_return_to_guest:
        /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
        trap = local_paca->kvm_hstate.scratch0 & ~0x2;
 
-       /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
-       ri_set = false;
-       if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
-               if (trap != BOOK3S_INTERRUPT_SYSCALL &&
-                               (vcpu->arch.shregs.msr & MSR_RI))
-                       ri_set = true;
+       if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK))
                exsave = local_paca->exgen;
-       } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
+       else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET)
                exsave = local_paca->exnmi;
-       } else { /* trap == 0x200 */
+       else /* trap == 0x200 */
                exsave = local_paca->exmc;
-       }
 
        vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
        vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
 
        /*
-        * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
-        * and hstate scratch (which we need to move into exsave to make
-        * re-entrant vs SRESET/MCE)
+        * After reading machine check regs (DAR, DSISR, SRR0/1) and hstate
+        * scratch (which we need to move into exsave to make re-entrant vs
+        * SRESET/MCE), register state is protected from reentrancy. However
+        * timebase, MMU, among other state is still set to guest, so don't
+        * enable MSR[RI] here. It gets enabled at the end, after in_guest
+        * is cleared.
+        *
+        * It is possible an NMI could come in here, which is why it is
+        * important to save the above state early so it can be debugged.
         */
-       if (ri_set) {
-               if (unlikely(!(mfmsr() & MSR_RI))) {
-                       __mtmsrd(MSR_RI, 1);
-                       WARN_ON_ONCE(1);
-               }
-       } else {
-               WARN_ON_ONCE(mfmsr() & MSR_RI);
-               __mtmsrd(MSR_RI, 1);
-       }
 
        vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
        vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
@@ -388,7 +1006,7 @@ tm_return_to_guest:
                kvmppc_realmode_machine_check(vcpu);
 
        } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
-               kvmppc_realmode_hmi_handler();
+               kvmppc_p9_realmode_hmi_handler(vcpu);
 
        } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
                vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
@@ -427,13 +1045,6 @@ tm_return_to_guest:
                                 */
                                mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
                                mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
-
-                               /*
-                                * tm_return_to_guest re-loads SRR0/1, DAR,
-                                * DSISR after RI is cleared, in case they had
-                                * been clobbered by a MCE.
-                                */
-                               __mtmsrd(0, 1); /* clear RI */
                                goto tm_return_to_guest;
                        }
                }
@@ -445,81 +1056,109 @@ tm_return_to_guest:
        /* Advance host PURR/SPURR by the amount used by guest */
        purr = mfspr(SPRN_PURR);
        spurr = mfspr(SPRN_SPURR);
-       mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
-             purr - vcpu->arch.purr);
-       mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
-             spurr - vcpu->arch.spurr);
+       local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr;
+       local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr;
        vcpu->arch.purr = purr;
        vcpu->arch.spurr = spurr;
 
        vcpu->arch.ic = mfspr(SPRN_IC);
        vcpu->arch.pid = mfspr(SPRN_PID);
-       vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+       vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
 
        vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
        vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
        vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
        vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
 
-       /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
-       mtspr(SPRN_PSSCR, host_psscr |
-             (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
-       mtspr(SPRN_HFSCR, host_hfscr);
-       mtspr(SPRN_CIABR, host_ciabr);
-       mtspr(SPRN_DAWR0, host_dawr0);
-       mtspr(SPRN_DAWRX0, host_dawrx0);
-       if (cpu_has_feature(CPU_FTR_DAWR1)) {
-               mtspr(SPRN_DAWR1, host_dawr1);
-               mtspr(SPRN_DAWRX1, host_dawrx1);
-       }
-
-       if (kvm_is_radix(kvm)) {
-               /*
-                * Since this is radix, do a eieio; tlbsync; ptesync sequence
-                * in case we interrupted the guest between a tlbie and a
-                * ptesync.
-                */
-               asm volatile("eieio; tlbsync; ptesync");
-       }
+       dpdes = mfspr(SPRN_DPDES);
+       if (dpdes)
+               vcpu->arch.doorbell_request = 1;
 
-       /*
-        * cp_abort is required if the processor supports local copy-paste
-        * to clear the copy buffer that was under control of the guest.
-        */
-       if (cpu_has_feature(CPU_FTR_ARCH_31))
-               asm volatile(PPC_CP_ABORT);
-
-       vc->dpdes = mfspr(SPRN_DPDES);
        vc->vtb = mfspr(SPRN_VTB);
-       mtspr(SPRN_DPDES, 0);
-       if (vc->pcr)
-               mtspr(SPRN_PCR, PCR_MASK);
+
+       dec = mfspr(SPRN_DEC);
+       if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+               dec = (s32) dec;
+       *tb = mftb();
+       vcpu->arch.dec_expires = dec + *tb;
 
        if (vc->tb_offset_applied) {
-               u64 new_tb = mftb() - vc->tb_offset_applied;
+               u64 new_tb = *tb - vc->tb_offset_applied;
                mtspr(SPRN_TBU40, new_tb);
-               tb = mftb();
-               if ((tb & 0xffffff) < (new_tb & 0xffffff))
-                       mtspr(SPRN_TBU40, new_tb + 0x1000000);
+               if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+                       new_tb += 0x1000000;
+                       mtspr(SPRN_TBU40, new_tb);
+               }
+               *tb = new_tb;
                vc->tb_offset_applied = 0;
        }
 
-       mtspr(SPRN_HDEC, 0x7fffffff);
-
        save_clear_guest_mmu(kvm, vcpu);
        switch_mmu_to_host(kvm, host_pidr);
-       local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
 
        /*
-        * If we are in real mode, only switch MMU on after the MMU is
-        * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
+        * Enable MSR here in order to have facilities enabled to save
+        * guest registers. This enables MMU (if we were in realmode), so
+        * only switch MMU on after the MMU is switched to host, to avoid
+        * the P9_RADIX_PREFETCH_BUG or hash guest context.
         */
        if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
-           vcpu->arch.shregs.msr & MSR_TS_MASK)
+                       vcpu->arch.shregs.msr & MSR_TS_MASK)
                msr |= MSR_TS_S;
-
        __mtmsrd(msr, 0);
 
+       store_vcpu_state(vcpu);
+
+       mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr);
+       mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr);
+
+       if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+               /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+               mtspr(SPRN_PSSCR, host_hpsscr |
+                     (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+       }
+
+       mtspr(SPRN_HFSCR, host_hfscr);
+       if (vcpu->arch.ciabr != host_ciabr)
+               mtspr(SPRN_CIABR, host_ciabr);
+
+       if (dawr_enabled()) {
+               if (vcpu->arch.dawr0 != host_dawr0)
+                       mtspr(SPRN_DAWR0, host_dawr0);
+               if (vcpu->arch.dawrx0 != host_dawrx0)
+                       mtspr(SPRN_DAWRX0, host_dawrx0);
+               if (cpu_has_feature(CPU_FTR_DAWR1)) {
+                       if (vcpu->arch.dawr1 != host_dawr1)
+                               mtspr(SPRN_DAWR1, host_dawr1);
+                       if (vcpu->arch.dawrx1 != host_dawrx1)
+                               mtspr(SPRN_DAWRX1, host_dawrx1);
+               }
+       }
+
+       if (dpdes)
+               mtspr(SPRN_DPDES, 0);
+       if (vc->pcr)
+               mtspr(SPRN_PCR, PCR_MASK);
+
+       /* HDEC must be at least as large as DEC, so decrementer_max fits */
+       mtspr(SPRN_HDEC, decrementer_max);
+
+       timer_rearm_host_dec(*tb);
+
+       restore_p9_host_os_sprs(vcpu, &host_os_sprs);
+
+       barrier(); /* Close in_guest critical section */
+       WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE);
+       /* Interrupts are recoverable at this point */
+
+       /*
+        * cp_abort is required if the processor supports local copy-paste
+        * to clear the copy buffer that was under control of the guest.
+        */
+       if (cpu_has_feature(CPU_FTR_ARCH_31))
+               asm volatile(PPC_CP_ABORT);
+
+out:
        end_timing(vcpu);
 
        return trap;
index d4bca93..ccfd969 100644 (file)
@@ -136,6 +136,60 @@ void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
        vcpu->arch.mce_evt = mce_evt;
 }
 
+
+long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_vcore *vc = vcpu->arch.vcore;
+       long ret = 0;
+
+       /*
+        * Unapply and clear the offset first. That way, if the TB was not
+        * resynced then it will remain in host-offset, and if it was resynced
+        * then it is brought into host-offset. Then the tb offset is
+        * re-applied before continuing with the KVM exit.
+        *
+        * This way, we don't need to actually know whether not OPAL resynced
+        * the timebase or do any of the complicated dance that the P7/8
+        * path requires.
+        */
+       if (vc->tb_offset_applied) {
+               u64 new_tb = mftb() - vc->tb_offset_applied;
+               mtspr(SPRN_TBU40, new_tb);
+               if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+                       new_tb += 0x1000000;
+                       mtspr(SPRN_TBU40, new_tb);
+               }
+               vc->tb_offset_applied = 0;
+       }
+
+       local_paca->hmi_irqs++;
+
+       if (hmi_handle_debugtrig(NULL) >= 0) {
+               ret = 1;
+               goto out;
+       }
+
+       if (ppc_md.hmi_exception_early)
+               ppc_md.hmi_exception_early(NULL);
+
+out:
+       if (vc->tb_offset) {
+               u64 new_tb = mftb() + vc->tb_offset;
+               mtspr(SPRN_TBU40, new_tb);
+               if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+                       new_tb += 0x1000000;
+                       mtspr(SPRN_TBU40, new_tb);
+               }
+               vc->tb_offset_applied = vc->tb_offset;
+       }
+
+       return ret;
+}
+
+/*
+ * The following subcore HMI handling is all only for pre-POWER9 CPUs.
+ */
+
 /* Check if dynamic split is in force and return subcore size accordingly. */
 static inline int kvmppc_cur_subcore_size(void)
 {
index 2c1f3c6..2257fb1 100644 (file)
@@ -55,12 +55,6 @@ static int global_invalidates(struct kvm *kvm)
                smp_wmb();
                cpumask_setall(&kvm->arch.need_tlb_flush);
                cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
-               /*
-                * On POWER9, threads are independent but the TLB is shared,
-                * so use the bit for the first thread to represent the core.
-                */
-               if (cpu_has_feature(CPU_FTR_ARCH_300))
-                       cpu = cpu_first_tlb_thread_sibling(cpu);
                cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
        }
 
index 32a4b4d..3f1aeff 100644 (file)
@@ -778,17 +778,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        /* Restore AMR and UAMOR, set AMOR to all 1s */
        ld      r5,VCPU_AMR(r4)
        ld      r6,VCPU_UAMOR(r4)
-       li      r7,-1
        mtspr   SPRN_AMR,r5
        mtspr   SPRN_UAMOR,r6
-       mtspr   SPRN_AMOR,r7
 
-       /* Restore state of CTRL run bit; assume 1 on entry */
+       /* Restore state of CTRL run bit; the host currently has it set to 1 */
        lwz     r5,VCPU_CTRL(r4)
        andi.   r5,r5,1
        bne     4f
-       mfspr   r6,SPRN_CTRLF
-       clrrdi  r6,r6,1
+       li      r6,0
        mtspr   SPRN_CTRLT,r6
 4:
        /* Secondary threads wait for primary to have done partition switch */
@@ -817,10 +814,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
         * Set the decrementer to the guest decrementer.
         */
        ld      r8,VCPU_DEC_EXPIRES(r4)
-       /* r8 is a host timebase value here, convert to guest TB */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       ld      r6,VCORE_TB_OFFSET_APPL(r5)
-       add     r8,r8,r6
        mftb    r7
        subf    r3,r7,r8
        mtspr   SPRN_DEC,r3
@@ -1195,9 +1188,6 @@ guest_bypass:
        mftb    r6
        extsw   r5,r5
 16:    add     r5,r5,r6
-       /* r5 is a guest timebase value here, convert to host TB */
-       ld      r4,VCORE_TB_OFFSET_APPL(r3)
-       subf    r5,r4,r5
        std     r5,VCPU_DEC_EXPIRES(r9)
 
        /* Increment exit count, poke other threads to exit */
@@ -1211,12 +1201,12 @@ guest_bypass:
        stw     r0, VCPU_CPU(r9)
        stw     r0, VCPU_THREAD_CPU(r9)
 
-       /* Save guest CTRL register, set runlatch to 1 */
+       /* Save guest CTRL register, set runlatch to 1 if it was clear */
        mfspr   r6,SPRN_CTRLF
        stw     r6,VCPU_CTRL(r9)
        andi.   r0,r6,1
        bne     4f
-       ori     r6,r6,1
+       li      r6,1
        mtspr   SPRN_CTRLT,r6
 4:
        /*
@@ -2163,9 +2153,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
        /* save expiry time of guest decrementer */
        add     r3, r3, r5
        ld      r4, HSTATE_KVM_VCPU(r13)
-       ld      r5, HSTATE_KVM_VCORE(r13)
-       ld      r6, VCORE_TB_OFFSET_APPL(r5)
-       subf    r3, r6, r3      /* convert to host TB value */
        std     r3, VCPU_DEC_EXPIRES(r4)
 
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
@@ -2186,8 +2173,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
         * Also clear the runlatch bit before napping.
         */
 kvm_do_nap:
-       mfspr   r0, SPRN_CTRLF
-       clrrdi  r0, r0, 1
+       li      r0,0
        mtspr   SPRN_CTRLT, r0
 
        li      r0,1
@@ -2206,8 +2192,7 @@ kvm_nap_sequence:         /* desired LPCR value in r5 */
 
        bl      isa206_idle_insn_mayloss
 
-       mfspr   r0, SPRN_CTRLF
-       ori     r0, r0, 1
+       li      r0,1
        mtspr   SPRN_CTRLT, r0
 
        mtspr   SPRN_SRR1, r3
@@ -2264,9 +2249,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 
        /* Restore guest decrementer */
        ld      r3, VCPU_DEC_EXPIRES(r4)
-       ld      r5, HSTATE_KVM_VCORE(r13)
-       ld      r6, VCORE_TB_OFFSET_APPL(r5)
-       add     r3, r3, r6      /* convert host TB to guest TB value */
        mftb    r7
        subf    r3, r7, r3
        mtspr   SPRN_DEC, r3
@@ -2778,10 +2760,11 @@ kvmppc_msr_interrupt:
        blr
 
 /*
+ * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
+ *
  * Load up guest PMU state.  R3 points to the vcpu struct.
  */
-_GLOBAL(kvmhv_load_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_guest_pmu)
+kvmhv_load_guest_pmu:
        mr      r4, r3
        mflr    r0
        li      r3, 1
@@ -2815,27 +2798,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
        mtspr   SPRN_MMCRA, r6
        mtspr   SPRN_SIAR, r7
        mtspr   SPRN_SDAR, r8
-BEGIN_FTR_SECTION
-       ld      r5, VCPU_MMCR + 24(r4)
-       ld      r6, VCPU_SIER + 8(r4)
-       ld      r7, VCPU_SIER + 16(r4)
-       mtspr   SPRN_MMCR3, r5
-       mtspr   SPRN_SIER2, r6
-       mtspr   SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 BEGIN_FTR_SECTION
        ld      r5, VCPU_MMCR + 16(r4)
        ld      r6, VCPU_SIER(r4)
        mtspr   SPRN_MMCR2, r5
        mtspr   SPRN_SIER, r6
-BEGIN_FTR_SECTION_NESTED(96)
        lwz     r7, VCPU_PMC + 24(r4)
        lwz     r8, VCPU_PMC + 28(r4)
        ld      r9, VCPU_MMCRS(r4)
        mtspr   SPRN_SPMC1, r7
        mtspr   SPRN_SPMC2, r8
        mtspr   SPRN_MMCRS, r9
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mtspr   SPRN_MMCR0, r3
        isync
@@ -2843,10 +2816,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        blr
 
 /*
+ * void kvmhv_load_host_pmu(void)
+ *
  * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
  */
-_GLOBAL(kvmhv_load_host_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_load_host_pmu)
+kvmhv_load_host_pmu:
        mflr    r0
        lbz     r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
        cmpwi   r4, 0
@@ -2884,25 +2858,18 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_MMCR2, r8
        mtspr   SPRN_SIER, r9
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-       ld      r5, HSTATE_MMCR3(r13)
-       ld      r6, HSTATE_SIER2(r13)
-       ld      r7, HSTATE_SIER3(r13)
-       mtspr   SPRN_MMCR3, r5
-       mtspr   SPRN_SIER2, r6
-       mtspr   SPRN_SIER3, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
        mtspr   SPRN_MMCR0, r3
        isync
        mtlr    r0
 23:    blr
 
 /*
+ * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
+ *
  * Save guest PMU state into the vcpu struct.
  * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
  */
-_GLOBAL(kvmhv_save_guest_pmu)
-EXPORT_SYMBOL_GPL(kvmhv_save_guest_pmu)
+kvmhv_save_guest_pmu:
        mr      r9, r3
        mr      r8, r4
 BEGIN_FTR_SECTION
@@ -2951,14 +2918,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 BEGIN_FTR_SECTION
        std     r10, VCPU_MMCR + 16(r9)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-       mfspr   r5, SPRN_MMCR3
-       mfspr   r6, SPRN_SIER2
-       mfspr   r7, SPRN_SIER3
-       std     r5, VCPU_MMCR + 24(r9)
-       std     r6, VCPU_SIER + 8(r9)
-       std     r7, VCPU_SIER + 16(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
        std     r7, VCPU_SIAR(r9)
        std     r8, VCPU_SDAR(r9)
        mfspr   r3, SPRN_PMC1
@@ -2976,7 +2935,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 BEGIN_FTR_SECTION
        mfspr   r5, SPRN_SIER
        std     r5, VCPU_SIER(r9)
-BEGIN_FTR_SECTION_NESTED(96)
        mfspr   r6, SPRN_SPMC1
        mfspr   r7, SPRN_SPMC2
        mfspr   r8, SPRN_MMCRS
@@ -2985,7 +2943,6 @@ BEGIN_FTR_SECTION_NESTED(96)
        std     r8, VCPU_MMCRS(r9)
        lis     r4, 0x8000
        mtspr   SPRN_MMCRS, r4
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 22:    blr
 
index 28c436d..e414ca4 100644 (file)
@@ -459,7 +459,7 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot, *m;
        int ret = H_SUCCESS;
-       int srcu_idx;
+       int srcu_idx, bkt;
 
        kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START;
 
@@ -478,7 +478,7 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 
        /* register the memslot */
        slots = kvm_memslots(kvm);
-       kvm_for_each_memslot(memslot, slots) {
+       kvm_for_each_memslot(memslot, bkt, slots) {
                ret = __kvmppc_uvmem_memslot_create(kvm, memslot);
                if (ret)
                        break;
@@ -486,7 +486,7 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
 
        if (ret) {
                slots = kvm_memslots(kvm);
-               kvm_for_each_memslot(m, slots) {
+               kvm_for_each_memslot(m, bkt, slots) {
                        if (m == memslot)
                                break;
                        __kvmppc_uvmem_memslot_delete(kvm, memslot);
@@ -647,7 +647,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
 
 unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
 {
-       int srcu_idx;
+       int srcu_idx, bkt;
        struct kvm_memory_slot *memslot;
 
        /*
@@ -662,7 +662,7 @@ unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
 
        srcu_idx = srcu_read_lock(&kvm->srcu);
 
-       kvm_for_each_memslot(memslot, kvm_memslots(kvm))
+       kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
                kvmppc_uvmem_drop_pages(memslot, kvm, false);
 
        srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -821,7 +821,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
-       int srcu_idx;
+       int srcu_idx, bkt;
        long ret = H_SUCCESS;
 
        if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
@@ -830,7 +830,7 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
        /* migrate any unmoved normal pfn to device pfns*/
        srcu_idx = srcu_read_lock(&kvm->srcu);
        slots = kvm_memslots(kvm);
-       kvm_for_each_memslot(memslot, slots) {
+       kvm_for_each_memslot(memslot, bkt, slots) {
                ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
                if (ret) {
                        /*
index 6bc9425..34a801c 100644 (file)
@@ -428,7 +428,7 @@ static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
 /************* MMU Notifiers *************/
 static bool do_kvm_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-       long i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm)
@@ -492,7 +492,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 
        if (msr & MSR_POW) {
                if (!vcpu->arch.pending_exceptions) {
-                       kvm_vcpu_block(vcpu);
+                       kvm_vcpu_halt(vcpu);
                        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
                        vcpu->stat.generic.halt_wakeup++;
 
@@ -1899,16 +1899,15 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
 }
 
 static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
-                                       struct kvm_memory_slot *memslot,
-                                       const struct kvm_userspace_memory_region *mem,
-                                       enum kvm_mr_change change)
+                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *new,
+                               enum kvm_mr_change change)
 {
        return 0;
 }
 
 static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
-                               const struct kvm_userspace_memory_region *mem,
-                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
index ac14239..1f10e7d 100644 (file)
@@ -376,7 +376,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
                return kvmppc_h_pr_stuff_tce(vcpu);
        case H_CEDE:
                kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
-               kvm_vcpu_block(vcpu);
+               kvm_vcpu_halt(vcpu);
                kvm_clear_request(KVM_REQ_UNHALT, vcpu);
                vcpu->stat.generic.halt_wakeup++;
                return EMULATE_DONE;
index ebd5d92..9cc4660 100644 (file)
@@ -942,8 +942,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
        struct kvmppc_xics *xics = m->private;
        struct kvm *kvm = xics->kvm;
        struct kvm_vcpu *vcpu;
-       int icsid, i;
-       unsigned long flags;
+       int icsid;
+       unsigned long flags, i;
        unsigned long t_rm_kick_vcpu, t_rm_check_resend;
        unsigned long t_rm_notify_eoi;
        unsigned long t_reject, t_check_resend;
@@ -1340,7 +1340,7 @@ static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 static void kvmppc_xics_release(struct kvm_device *dev)
 {
        struct kvmppc_xics *xics = dev->private;
-       int i;
+       unsigned long i;
        struct kvm *kvm = xics->kvm;
        struct kvm_vcpu *vcpu;
 
index 6231f76..8e4c79e 100644 (file)
@@ -116,7 +116,7 @@ static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
                                                         u32 nr)
 {
        struct kvm_vcpu *vcpu = NULL;
-       int i;
+       unsigned long i;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
index 2250088..e216c06 100644 (file)
@@ -368,7 +368,8 @@ static int xive_check_provisioning(struct kvm *kvm, u8 prio)
 {
        struct kvmppc_xive *xive = kvm->arch.xive;
        struct kvm_vcpu *vcpu;
-       int i, rc;
+       unsigned long i;
+       int rc;
 
        lockdep_assert_held(&xive->lock);
 
@@ -439,7 +440,8 @@ static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
 int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
 {
        struct kvm_vcpu *vcpu;
-       int i, rc;
+       unsigned long i;
+       int rc;
 
        /* Locate target server */
        vcpu = kvmppc_xive_find_server(kvm, *server);
@@ -1519,7 +1521,8 @@ static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
 static void xive_pre_save_scan(struct kvmppc_xive *xive)
 {
        struct kvm_vcpu *vcpu = NULL;
-       int i, j;
+       unsigned long i;
+       int j;
 
        /*
         * See comment in xive_get_source() about how this
@@ -1700,7 +1703,7 @@ static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
 {
        struct kvm *kvm = xive->kvm;
        struct kvm_vcpu *vcpu = NULL;
-       int i;
+       unsigned long i;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -2037,7 +2040,7 @@ static void kvmppc_xive_release(struct kvm_device *dev)
        struct kvmppc_xive *xive = dev->private;
        struct kvm *kvm = xive->kvm;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        pr_devel("Releasing xive device\n");
 
@@ -2291,7 +2294,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
        u64 t_vm_h_cppr = 0;
        u64 t_vm_h_eoi = 0;
        u64 t_vm_h_ipi = 0;
-       unsigned int i;
+       unsigned long i;
 
        if (!kvm)
                return 0;
index e6a9651..09d0657 100644 (file)
@@ -199,7 +199,7 @@ struct kvmppc_xive_vcpu {
 static inline struct kvm_vcpu *kvmppc_xive_find_server(struct kvm *kvm, u32 nr)
 {
        struct kvm_vcpu *vcpu = NULL;
-       int i;
+       unsigned long i;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                if (vcpu->arch.xive_vcpu && nr == vcpu->arch.xive_vcpu->server_num)
@@ -240,7 +240,7 @@ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
 static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id)
 {
        struct kvm_vcpu *vcpu = NULL;
-       int i;
+       unsigned long i;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id)
index 99db9ac..561a5bf 100644 (file)
@@ -807,7 +807,7 @@ static int kvmppc_xive_reset(struct kvmppc_xive *xive)
 {
        struct kvm *kvm = xive->kvm;
        struct kvm_vcpu *vcpu;
-       unsigned int i;
+       unsigned long i;
 
        pr_devel("%s\n", __func__);
 
@@ -916,7 +916,7 @@ static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
 {
        struct kvm *kvm = xive->kvm;
        struct kvm_vcpu *vcpu;
-       unsigned int i;
+       unsigned long i;
 
        pr_devel("%s\n", __func__);
 
@@ -1017,7 +1017,7 @@ static void kvmppc_xive_native_release(struct kvm_device *dev)
        struct kvmppc_xive *xive = dev->private;
        struct kvm *kvm = xive->kvm;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        pr_devel("Releasing xive native device\n");
 
@@ -1214,7 +1214,7 @@ static int xive_native_debug_show(struct seq_file *m, void *private)
        struct kvmppc_xive *xive = m->private;
        struct kvm *kvm = xive->kvm;
        struct kvm_vcpu *vcpu;
-       unsigned int i;
+       unsigned long i;
 
        if (!kvm)
                return 0;
index 8c15c90..06c5830 100644 (file)
@@ -718,7 +718,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 
        if (vcpu->arch.shared->msr & MSR_WE) {
                local_irq_enable();
-               kvm_vcpu_block(vcpu);
+               kvm_vcpu_halt(vcpu);
                kvm_clear_request(KVM_REQ_UNHALT, vcpu);
                hard_irq_disable();
 
@@ -1821,16 +1821,15 @@ void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
 }
 
 int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-                                     struct kvm_memory_slot *memslot,
-                                     const struct kvm_userspace_memory_region *mem,
+                                     const struct kvm_memory_slot *old,
+                                     struct kvm_memory_slot *new,
                                      enum kvm_mr_change change)
 {
        return 0;
 }
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
-                               const struct kvm_userspace_memory_region *mem,
-                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
index 64eb833..051102d 100644 (file)
@@ -65,7 +65,7 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
        ulong param = vcpu->arch.regs.gpr[rb];
        int prio = dbell2prio(rb);
        int pir = param & PPC_DBELL_PIR_MASK;
-       int i;
+       unsigned long i;
        struct kvm_vcpu *cvcpu;
 
        if (prio < 0)
index a72920f..2ad0ccd 100644 (file)
@@ -236,7 +236,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
                break;
        case EV_HCALL_TOKEN(EV_IDLE):
                r = EV_SUCCESS;
-               kvm_vcpu_block(vcpu);
+               kvm_vcpu_halt(vcpu);
                kvm_clear_request(KVM_REQ_UNHALT, vcpu);
                break;
        default:
@@ -463,9 +463,6 @@ err_out:
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-       unsigned int i;
-       struct kvm_vcpu *vcpu;
-
 #ifdef CONFIG_KVM_XICS
        /*
         * We call kick_all_cpus_sync() to ensure that all
@@ -476,14 +473,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                kick_all_cpus_sync();
 #endif
 
-       kvm_for_each_vcpu(i, vcpu, kvm)
-               kvm_vcpu_destroy(vcpu);
+       kvm_destroy_vcpus(kvm);
 
        mutex_lock(&kvm->lock);
-       for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
-               kvm->vcpus[i] = NULL;
-
-       atomic_set(&kvm->online_vcpus, 0);
 
        kvmppc_core_destroy_vm(kvm);
 
@@ -706,20 +698,19 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-                                  struct kvm_memory_slot *memslot,
-                                  const struct kvm_userspace_memory_region *mem,
+                                  const struct kvm_memory_slot *old,
+                                  struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
 {
-       return kvmppc_core_prepare_memory_region(kvm, memslot, mem, change);
+       return kvmppc_core_prepare_memory_region(kvm, old, new, change);
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-                                  const struct kvm_userspace_memory_region *mem,
                                   struct kvm_memory_slot *old,
                                   const struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
 {
-       kvmppc_core_commit_memory_region(kvm, mem, old, new, change);
+       kvmppc_core_commit_memory_region(kvm, old, new, change);
 }
 
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -762,7 +753,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
        if (err)
                goto out_vcpu_uninit;
 
-       vcpu->arch.waitp = &vcpu->wait;
+       rcuwait_init(&vcpu->arch.wait);
+       vcpu->arch.waitp = &vcpu->arch.wait;
        kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
        return 0;
 
index 3a600bd..7782003 100644 (file)
@@ -572,18 +572,6 @@ void __init radix__early_init_devtree(void)
        return;
 }
 
-static void radix_init_amor(void)
-{
-       /*
-       * In HV mode, we init AMOR (Authority Mask Override Register) so that
-       * the hypervisor and guest can setup IAMR (Instruction Authority Mask
-       * Register), enable key 0 and set it to 1.
-       *
-       * AMOR = 0b1100 .... 0000 (Mask for key 0 is 11)
-       */
-       mtspr(SPRN_AMOR, (3ul << 62));
-}
-
 void __init radix__early_init_mmu(void)
 {
        unsigned long lpcr;
@@ -644,7 +632,6 @@ void __init radix__early_init_mmu(void)
                lpcr = mfspr(SPRN_LPCR);
                mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
                radix_init_partition_table();
-               radix_init_amor();
        } else {
                radix_init_pseries();
        }
@@ -668,8 +655,6 @@ void radix__early_init_mmu_secondary(void)
 
                set_ptcr_when_no_uv(__pa(partition_tb) |
                                    (PATB_SIZE_SHIFT - 12));
-
-               radix_init_amor();
        }
 
        radix__switch_mmu_context(NULL, &init_mm);
index 73e62e9..8d4ff93 100644 (file)
@@ -2419,8 +2419,24 @@ int register_power_pmu(struct power_pmu *pmu)
 }
 
 #ifdef CONFIG_PPC64
+static bool pmu_override = false;
+static unsigned long pmu_override_val;
+static void do_pmu_override(void *data)
+{
+       ppc_set_pmu_inuse(1);
+       if (pmu_override_val)
+               mtspr(SPRN_MMCR1, pmu_override_val);
+       mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+}
+
 static int __init init_ppc64_pmu(void)
 {
+       if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
+               pr_warn("disabling perf due to pmu_override= command line option.\n");
+               on_each_cpu(do_pmu_override, NULL, 1);
+               return 0;
+       }
+
        /* run through all the pmu drivers one at a time */
        if (!init_power5_pmu())
                return 0;
@@ -2442,4 +2458,23 @@ static int __init init_ppc64_pmu(void)
                return init_generic_compat_pmu();
 }
 early_initcall(init_ppc64_pmu);
+
+static int __init pmu_setup(char *str)
+{
+       unsigned long val;
+
+       if (!early_cpu_has_feature(CPU_FTR_HVMODE))
+               return 0;
+
+       pmu_override = true;
+
+       if (kstrtoul(str, 0, &val))
+               val = 0;
+
+       pmu_override_val = val;
+
+       return 1;
+}
+__setup("pmu_override=", pmu_setup);
+
 #endif
index e3ffdc8..3bc84e2 100644 (file)
@@ -306,8 +306,8 @@ struct p7_sprs {
        /* per thread SPRs that get lost in shallow states */
        u64 amr;
        u64 iamr;
-       u64 amor;
        u64 uamor;
+       /* amor is restored to constant ~0 */
 };
 
 static unsigned long power7_idle_insn(unsigned long type)
@@ -378,7 +378,6 @@ static unsigned long power7_idle_insn(unsigned long type)
        if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
                sprs.amr        = mfspr(SPRN_AMR);
                sprs.iamr       = mfspr(SPRN_IAMR);
-               sprs.amor       = mfspr(SPRN_AMOR);
                sprs.uamor      = mfspr(SPRN_UAMOR);
        }
 
@@ -397,7 +396,7 @@ static unsigned long power7_idle_insn(unsigned long type)
                         */
                        mtspr(SPRN_AMR,         sprs.amr);
                        mtspr(SPRN_IAMR,        sprs.iamr);
-                       mtspr(SPRN_AMOR,        sprs.amor);
+                       mtspr(SPRN_AMOR,        ~0);
                        mtspr(SPRN_UAMOR,       sprs.uamor);
                }
        }
@@ -589,7 +588,6 @@ struct p9_sprs {
        u64 purr;
        u64 spurr;
        u64 dscr;
-       u64 wort;
        u64 ciabr;
 
        u64 mmcra;
@@ -687,7 +685,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 
        sprs.amr        = mfspr(SPRN_AMR);
        sprs.iamr       = mfspr(SPRN_IAMR);
-       sprs.amor       = mfspr(SPRN_AMOR);
        sprs.uamor      = mfspr(SPRN_UAMOR);
 
        srr1 = isa300_idle_stop_mayloss(psscr);         /* go idle */
@@ -708,7 +705,7 @@ static unsigned long power9_idle_stop(unsigned long psscr)
                 */
                mtspr(SPRN_AMR,         sprs.amr);
                mtspr(SPRN_IAMR,        sprs.iamr);
-               mtspr(SPRN_AMOR,        sprs.amor);
+               mtspr(SPRN_AMOR,        ~0);
                mtspr(SPRN_UAMOR,       sprs.uamor);
 
                /*
index 8b28ff9..83100c6 100644 (file)
@@ -2107,8 +2107,14 @@ static void dump_300_sprs(void)
        if (!cpu_has_feature(CPU_FTR_ARCH_300))
                return;
 
-       printf("pidr   = %.16lx  tidr  = %.16lx\n",
-               mfspr(SPRN_PID), mfspr(SPRN_TIDR));
+       if (cpu_has_feature(CPU_FTR_P9_TIDR)) {
+               printf("pidr   = %.16lx  tidr  = %.16lx\n",
+                       mfspr(SPRN_PID), mfspr(SPRN_TIDR));
+       } else {
+               printf("pidr   = %.16lx\n",
+                       mfspr(SPRN_PID));
+       }
+
        printf("psscr  = %.16lx\n",
                hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR));
 
index 25ba21f..907fafe 100644 (file)
 #include <linux/types.h>
 #include <linux/kvm.h>
 #include <linux/kvm_types.h>
+#include <asm/csr.h>
 #include <asm/kvm_vcpu_fp.h>
 #include <asm/kvm_vcpu_timer.h>
 
-#ifdef CONFIG_64BIT
-#define KVM_MAX_VCPUS                  (1U << 16)
-#else
-#define KVM_MAX_VCPUS                  (1U << 9)
-#endif
+#define KVM_MAX_VCPUS                  \
+       ((HGATP_VMID_MASK >> HGATP_VMID_SHIFT) + 1)
 
 #define KVM_HALT_POLL_NS_DEFAULT       500000
 
@@ -210,7 +208,6 @@ struct kvm_vcpu_arch {
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
index 30cdd1d..3005902 100644 (file)
@@ -5,14 +5,10 @@
 
 ccflags-y += -I $(srctree)/$(src)
 
-KVM := ../../../virt/kvm
+include $(srctree)/virt/kvm/Makefile.kvm
 
 obj-$(CONFIG_KVM) += kvm.o
 
-kvm-y += $(KVM)/kvm_main.o
-kvm-y += $(KVM)/coalesced_mmio.o
-kvm-y += $(KVM)/binary_stats.o
-kvm-y += $(KVM)/eventfd.o
 kvm-y += main.o
 kvm-y += vm.o
 kvm-y += vmid.o
index d81bae8..7d884b1 100644 (file)
@@ -453,10 +453,15 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
                                   struct kvm_memory_slot *slot)
 {
+       gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+       phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+       spin_lock(&kvm->mmu_lock);
+       stage2_unmap_range(kvm, gpa, size, false);
+       spin_unlock(&kvm->mmu_lock);
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-                               const struct kvm_userspace_memory_region *mem,
                                struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
@@ -466,18 +471,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
         * allocated dirty_bitmap[], dirty pages will be tracked while
         * the memory slot is write protected.
         */
-       if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
-               stage2_wp_memory_region(kvm, mem->slot);
+       if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES)
+               stage2_wp_memory_region(kvm, new->id);
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-                               struct kvm_memory_slot *memslot,
-                               const struct kvm_userspace_memory_region *mem,
+                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
-       hva_t hva = mem->userspace_addr;
-       hva_t reg_end = hva + mem->memory_size;
-       bool writable = !(mem->flags & KVM_MEM_READONLY);
+       hva_t hva, reg_end, size;
+       gpa_t base_gpa;
+       bool writable;
        int ret = 0;
 
        if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -488,10 +493,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
         * Prevent userspace from creating a memory region outside of the GPA
         * space addressable by the KVM guest GPA space.
         */
-       if ((memslot->base_gfn + memslot->npages) >=
+       if ((new->base_gfn + new->npages) >=
            (stage2_gpa_size >> PAGE_SHIFT))
                return -EFAULT;
 
+       hva = new->userspace_addr;
+       size = new->npages << PAGE_SHIFT;
+       reg_end = hva + size;
+       base_gpa = new->base_gfn << PAGE_SHIFT;
+       writable = !(new->flags & KVM_MEM_READONLY);
+
        mmap_read_lock(current->mm);
 
        /*
@@ -527,15 +538,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                vm_end = min(reg_end, vma->vm_end);
 
                if (vma->vm_flags & VM_PFNMAP) {
-                       gpa_t gpa = mem->guest_phys_addr +
-                                   (vm_start - mem->userspace_addr);
+                       gpa_t gpa = base_gpa + (vm_start - hva);
                        phys_addr_t pa;
 
                        pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
                        pa += vm_start - vma->vm_start;
 
                        /* IO region dirty page logging not allowed */
-                       if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+                       if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
                                ret = -EINVAL;
                                goto out;
                        }
@@ -553,8 +563,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
        spin_lock(&kvm->mmu_lock);
        if (ret)
-               stage2_unmap_range(kvm, mem->guest_phys_addr,
-                                  mem->memory_size, false);
+               stage2_unmap_range(kvm, base_gpa, size, false);
        spin_unlock(&kvm->mmu_lock);
 
 out:
index 7f2d742..571f319 100644 (file)
@@ -146,7 +146,7 @@ static int system_opcode_insn(struct kvm_vcpu *vcpu,
                vcpu->stat.wfi_exit_stat++;
                if (!kvm_arch_vcpu_runnable(vcpu)) {
                        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
-                       kvm_vcpu_block(vcpu);
+                       kvm_vcpu_halt(vcpu);
                        vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
                        kvm_clear_request(KVM_REQ_UNHALT, vcpu);
                }
index 3b0e703..d0d2bca 100644 (file)
@@ -60,7 +60,7 @@ int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
 static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
                                    struct kvm_run *run, u32 type)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *tmp;
 
        kvm_for_each_vcpu(i, tmp, vcpu->kvm)
index fb18af3..7619691 100644 (file)
@@ -46,15 +46,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-       int i;
-
-       for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-               if (kvm->vcpus[i]) {
-                       kvm_vcpu_destroy(kvm->vcpus[i]);
-                       kvm->vcpus[i] = NULL;
-               }
-       }
-       atomic_set(&kvm->online_vcpus, 0);
+       kvm_destroy_vcpus(kvm);
 }
 
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
index 2c6253b..807228f 100644 (file)
@@ -65,7 +65,7 @@ bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
 
 void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *v;
        struct cpumask hmask;
        struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
index fd82509..b626bc6 100644 (file)
@@ -403,7 +403,6 @@ CONFIG_DEVTMPFS=y
 CONFIG_CONNECTOR=y
 CONFIG_ZRAM=y
 CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
@@ -476,6 +475,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_VXLAN=m
 CONFIG_BAREUDP=m
+CONFIG_AMT=m
 CONFIG_TUN=m
 CONFIG_VETH=m
 CONFIG_VIRTIO_NET=m
@@ -489,6 +489,7 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_AMD is not set
 # CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ASIX is not set
 # CONFIG_NET_VENDOR_ATHEROS is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_BROCADE is not set
@@ -571,6 +572,7 @@ CONFIG_WATCHDOG=y
 CONFIG_WATCHDOG_NOWAYOUT=y
 CONFIG_SOFT_WATCHDOG=m
 CONFIG_DIAG288_WATCHDOG=m
+# CONFIG_DRM_DEBUG_MODESET_LOCK is not set
 CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
@@ -775,12 +777,14 @@ CONFIG_CRC4=m
 CONFIG_CRC7=m
 CONFIG_CRC8=m
 CONFIG_RANDOM32_SELFTEST=y
+CONFIG_XZ_DEC_MICROLZMA=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_INFO_BTF=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_HEADERS_INSTALL=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
@@ -807,6 +811,7 @@ CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_DEBUG_PER_CPU_MAPS=y
 CONFIG_KFENCE=y
+CONFIG_KFENCE_STATIC_KEYS=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DETECT_HUNG_TASK=y
@@ -842,6 +847,7 @@ CONFIG_FTRACE_STARTUP_TEST=y
 CONFIG_SAMPLES=y
 CONFIG_SAMPLE_TRACE_PRINTK=m
 CONFIG_SAMPLE_FTRACE_DIRECT=m
+CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
 CONFIG_DEBUG_ENTRY=y
 CONFIG_CIO_INJECT=y
 CONFIG_KUNIT=m
@@ -860,7 +866,7 @@ CONFIG_FAIL_FUNCTION=y
 CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
 CONFIG_LKDTM=m
 CONFIG_TEST_MIN_HEAP=y
-CONFIG_KPROBES_SANITY_TEST=y
+CONFIG_KPROBES_SANITY_TEST=m
 CONFIG_RBTREE_TEST=y
 CONFIG_INTERVAL_TREE_TEST=m
 CONFIG_PERCPU_TEST=m
index c9c3ced..0056cab 100644 (file)
@@ -394,7 +394,6 @@ CONFIG_DEVTMPFS=y
 CONFIG_CONNECTOR=y
 CONFIG_ZRAM=y
 CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
@@ -467,6 +466,7 @@ CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
 CONFIG_VXLAN=m
 CONFIG_BAREUDP=m
+CONFIG_AMT=m
 CONFIG_TUN=m
 CONFIG_VETH=m
 CONFIG_VIRTIO_NET=m
@@ -480,6 +480,7 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_AMD is not set
 # CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ASIX is not set
 # CONFIG_NET_VENDOR_ATHEROS is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_BROCADE is not set
@@ -762,12 +763,14 @@ CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC4=m
 CONFIG_CRC7=m
 CONFIG_CRC8=m
+CONFIG_XZ_DEC_MICROLZMA=y
 CONFIG_DMA_CMA=y
 CONFIG_CMA_SIZE_MBYTES=0
 CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_INFO_BTF=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
@@ -792,9 +795,11 @@ CONFIG_HIST_TRIGGERS=y
 CONFIG_SAMPLES=y
 CONFIG_SAMPLE_TRACE_PRINTK=m
 CONFIG_SAMPLE_FTRACE_DIRECT=m
+CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
 CONFIG_KUNIT=m
 CONFIG_KUNIT_DEBUGFS=y
 CONFIG_LKDTM=m
+CONFIG_KPROBES_SANITY_TEST=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
 CONFIG_TEST_BPF=m
index aceccf3..eed3b9a 100644 (file)
@@ -65,9 +65,11 @@ CONFIG_ZFCP=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_LSM="yama,loadpin,safesetid,integrity"
 # CONFIG_ZLIB_DFLTCC is not set
+CONFIG_XZ_DEC_MICROLZMA=y
 CONFIG_PRINTK_TIME=y
 # CONFIG_SYMBOLIC_ERRNAME is not set
 CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_BTF=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_PANIC_ON_OOPS=y
index a604d51..a22c926 100644 (file)
@@ -1010,6 +1010,4 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
-void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
-
 #endif
index e4dc64c..287bb88 100644 (file)
 
 /* I/O Map */
 #define ZPCI_IOMAP_SHIFT               48
-#define ZPCI_IOMAP_ADDR_BASE           0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_SHIFT          62
+#define ZPCI_IOMAP_ADDR_BASE           (1UL << ZPCI_IOMAP_ADDR_SHIFT)
 #define ZPCI_IOMAP_ADDR_OFF_MASK       ((1UL << ZPCI_IOMAP_SHIFT) - 1)
 #define ZPCI_IOMAP_MAX_ENTRIES                                                 \
-       ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+       (1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT))
 #define ZPCI_IOMAP_ADDR_IDX_MASK                                               \
-       (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
+       ((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK)
 
 struct zpci_iomap_entry {
        u32 fh;
index 67a8e77..2e84d39 100644 (file)
@@ -33,6 +33,7 @@ config KVM
        select HAVE_KVM_NO_POLL
        select SRCU
        select KVM_VFIO
+       select INTERVAL_TREE
        help
          Support hosting paravirtualized guest machines using the SIE
          virtualization capability on the mainframe. This should work
index b3aaadc..26f4a74 100644 (file)
@@ -3,13 +3,11 @@
 #
 # Copyright IBM Corp. 2008
 
-KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o \
-             $(KVM)/irqchip.o $(KVM)/vfio.o $(KVM)/binary_stats.o
+include $(srctree)/virt/kvm/Makefile.kvm
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
-kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
 
 obj-$(CONFIG_KVM) += kvm.o
index 0576d5c..db933c2 100644 (file)
@@ -1335,7 +1335,8 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
        VCPU_EVENT(vcpu, 4, "enabled wait: %llu ns", sltime);
 no_timer:
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-       kvm_vcpu_block(vcpu);
+       kvm_vcpu_halt(vcpu);
+       vcpu->valid_wakeup = false;
        __unset_cpu_idle(vcpu);
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
@@ -2666,7 +2667,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
 static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
        int r = 0;
-       unsigned int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        switch (attr->group) {
index ef299aa..9c6d45d 100644 (file)
@@ -295,7 +295,7 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 {
        struct kvm *kvm;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
        unsigned long long *delta = v;
 
        list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -682,7 +682,7 @@ out:
 
 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 {
-       unsigned int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -936,7 +936,7 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        kvm_s390_vcpu_block_all(kvm);
 
@@ -1021,7 +1021,7 @@ static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 
 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 {
-       int cx;
+       unsigned long cx;
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(cx, vcpu, kvm)
@@ -1037,13 +1037,13 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
        struct kvm_memory_slot *ms;
        struct kvm_memslots *slots;
        unsigned long ram_pages = 0;
-       int slotnr;
+       int bkt;
 
        /* migration mode already enabled */
        if (kvm->arch.migration_mode)
                return 0;
        slots = kvm_memslots(kvm);
-       if (!slots || !slots->used_slots)
+       if (!slots || kvm_memslots_empty(slots))
                return -EINVAL;
 
        if (!kvm->arch.use_cmma) {
@@ -1051,8 +1051,7 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
                return 0;
        }
        /* mark all the pages in active slots as dirty */
-       for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
-               ms = slots->memslots + slotnr;
+       kvm_for_each_memslot(ms, bkt, slots) {
                if (!ms->dirty_bitmap)
                        return -EINVAL;
                /*
@@ -1943,41 +1942,6 @@ out:
 /* for consistency */
 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
 
-/*
- * Similar to gfn_to_memslot, but returns the index of a memslot also when the
- * address falls in a hole. In that case the index of one of the memslots
- * bordering the hole is returned.
- */
-static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
-{
-       int start = 0, end = slots->used_slots;
-       int slot = atomic_read(&slots->last_used_slot);
-       struct kvm_memory_slot *memslots = slots->memslots;
-
-       if (gfn >= memslots[slot].base_gfn &&
-           gfn < memslots[slot].base_gfn + memslots[slot].npages)
-               return slot;
-
-       while (start < end) {
-               slot = start + (end - start) / 2;
-
-               if (gfn >= memslots[slot].base_gfn)
-                       end = slot;
-               else
-                       start = slot + 1;
-       }
-
-       if (start >= slots->used_slots)
-               return slots->used_slots - 1;
-
-       if (gfn >= memslots[start].base_gfn &&
-           gfn < memslots[start].base_gfn + memslots[start].npages) {
-               atomic_set(&slots->last_used_slot, start);
-       }
-
-       return start;
-}
-
 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
                              u8 *res, unsigned long bufsize)
 {
@@ -2001,26 +1965,31 @@ static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
        return 0;
 }
 
+static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
+                                                    gfn_t gfn)
+{
+       return ____gfn_to_memslot(slots, gfn, true);
+}
+
 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
                                              unsigned long cur_gfn)
 {
-       int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
-       struct kvm_memory_slot *ms = slots->memslots + slotidx;
+       struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
        unsigned long ofs = cur_gfn - ms->base_gfn;
+       struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
 
        if (ms->base_gfn + ms->npages <= cur_gfn) {
-               slotidx--;
+               mnode = rb_next(mnode);
                /* If we are above the highest slot, wrap around */
-               if (slotidx < 0)
-                       slotidx = slots->used_slots - 1;
+               if (!mnode)
+                       mnode = rb_first(&slots->gfn_tree);
 
-               ms = slots->memslots + slotidx;
+               ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
                ofs = 0;
        }
        ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
-       while ((slotidx > 0) && (ofs >= ms->npages)) {
-               slotidx--;
-               ms = slots->memslots + slotidx;
+       while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
+               ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
                ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
        }
        return ms->base_gfn + ofs;
@@ -2033,7 +2002,7 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
        struct kvm_memslots *slots = kvm_memslots(kvm);
        struct kvm_memory_slot *ms;
 
-       if (unlikely(!slots->used_slots))
+       if (unlikely(kvm_memslots_empty(slots)))
                return 0;
 
        cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
@@ -2043,7 +2012,7 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
        if (!ms)
                return 0;
        next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
-       mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
+       mem_end = kvm_s390_get_gfn_end(slots);
 
        while (args->count < bufsize) {
                hva = gfn_to_hva(kvm, cur_gfn);
@@ -2206,7 +2175,7 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
        struct kvm_vcpu *vcpu;
        u16 rc, rrc;
        int ret = 0;
-       int i;
+       unsigned long i;
 
        /*
         * We ignore failures and try to destroy as many CPUs as possible.
@@ -2230,7 +2199,8 @@ static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
 
 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
 {
-       int i, r = 0;
+       unsigned long i;
+       int r = 0;
        u16 dummy;
 
        struct kvm_vcpu *vcpu;
@@ -2821,27 +2791,11 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
        free_page((unsigned long)(vcpu->arch.sie_block));
 }
 
-static void kvm_free_vcpus(struct kvm *kvm)
-{
-       unsigned int i;
-       struct kvm_vcpu *vcpu;
-
-       kvm_for_each_vcpu(i, vcpu, kvm)
-               kvm_vcpu_destroy(vcpu);
-
-       mutex_lock(&kvm->lock);
-       for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
-               kvm->vcpus[i] = NULL;
-
-       atomic_set(&kvm->online_vcpus, 0);
-       mutex_unlock(&kvm->lock);
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
        u16 rc, rrc;
 
-       kvm_free_vcpus(kvm);
+       kvm_destroy_vcpus(kvm);
        sca_dispose(kvm);
        kvm_s390_gisa_destroy(kvm);
        /*
@@ -2945,7 +2899,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
        struct bsca_block *old_sca = kvm->arch.sca;
        struct esca_block *new_sca;
        struct kvm_vcpu *vcpu;
-       unsigned int vcpu_idx;
+       unsigned long vcpu_idx;
        u32 scaol, scaoh;
 
        if (kvm->arch.use_esca)
@@ -3427,7 +3381,7 @@ static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
        struct kvm *kvm = gmap->private;
        struct kvm_vcpu *vcpu;
        unsigned long prefix;
-       int i;
+       unsigned long i;
 
        if (gmap_is_shadow(gmap))
                return;
@@ -3449,7 +3403,7 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
 {
        /* do not poll with more than halt_poll_max_steal percent of steal time */
        if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
-           halt_poll_max_steal) {
+           READ_ONCE(halt_poll_max_steal)) {
                vcpu->stat.halt_no_poll_steal++;
                return true;
        }
@@ -3920,7 +3874,7 @@ void kvm_s390_set_tod_clock(struct kvm *kvm,
 {
        struct kvm_vcpu *vcpu;
        union tod_clock clk;
-       int i;
+       unsigned long i;
 
        mutex_lock(&kvm->lock);
        preempt_disable();
@@ -4552,7 +4506,7 @@ static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
 
 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
 {
-       unsigned int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -4590,7 +4544,7 @@ int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
        }
 
        for (i = 0; i < online_vcpus; i++) {
-               if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+               if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
                        started_vcpus++;
        }
 
@@ -4657,9 +4611,11 @@ int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
        __disable_ibs_on_vcpu(vcpu);
 
        for (i = 0; i < online_vcpus; i++) {
-               if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+               struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
+
+               if (!is_vcpu_stopped(tmp)) {
                        started_vcpus++;
-                       started_vcpu = vcpu->kvm->vcpus[i];
+                       started_vcpu = tmp;
                }
        }
 
@@ -5025,32 +4981,38 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-                                  struct kvm_memory_slot *memslot,
-                                  const struct kvm_userspace_memory_region *mem,
+                                  const struct kvm_memory_slot *old,
+                                  struct kvm_memory_slot *new,
                                   enum kvm_mr_change change)
 {
+       gpa_t size;
+
+       /* When we are protected, we should not change the memory slots */
+       if (kvm_s390_pv_get_handle(kvm))
+               return -EINVAL;
+
+       if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
+               return 0;
+
        /* A few sanity checks. We can have memory slots which have to be
           located/ended at a segment boundary (1MB). The memory in userland is
           ok to be fragmented into various different vmas. It is okay to mmap()
           and munmap() stuff in this slot after doing this call at any time */
 
-       if (mem->userspace_addr & 0xffffful)
+       if (new->userspace_addr & 0xffffful)
                return -EINVAL;
 
-       if (mem->memory_size & 0xffffful)
+       size = new->npages * PAGE_SIZE;
+       if (size & 0xffffful)
                return -EINVAL;
 
-       if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
+       if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
                return -EINVAL;
 
-       /* When we are protected, we should not change the memory slots */
-       if (kvm_s390_pv_get_handle(kvm))
-               return -EINVAL;
        return 0;
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-                               const struct kvm_userspace_memory_region *mem,
                                struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
@@ -5069,8 +5031,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                        break;
                fallthrough;
        case KVM_MR_CREATE:
-               rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
-                                     mem->guest_phys_addr, mem->memory_size);
+               rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
+                                     new->base_gfn * PAGE_SIZE,
+                                     new->npages * PAGE_SIZE);
                break;
        case KVM_MR_FLAGS_ONLY:
                break;
@@ -5089,11 +5052,6 @@ static inline unsigned long nonhyp_mask(int i)
        return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
 }
 
-void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
-{
-       vcpu->valid_wakeup = false;
-}
-
 static int __init kvm_s390_init(void)
 {
        int i;
index 1876ab0..098831e 100644 (file)
@@ -217,6 +217,20 @@ static inline void kvm_s390_set_user_cpu_state_ctrl(struct kvm *kvm)
        kvm->arch.user_cpu_state_ctrl = 1;
 }
 
+/* get the end gfn of the last (highest gfn) memslot */
+static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
+{
+       struct rb_node *node;
+       struct kvm_memory_slot *ms;
+
+       if (WARN_ON(kvm_memslots_empty(slots)))
+               return 0;
+
+       node = rb_last(&slots->gfn_tree);
+       ms = container_of(node, struct kvm_memory_slot, gfn_node[slots->node_idx]);
+       return ms->base_gfn + ms->npages;
+}
+
 /* implemented in pv.c */
 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
@@ -357,7 +371,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
 
 static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        WARN_ON(!mutex_is_locked(&kvm->lock));
@@ -367,7 +381,7 @@ static inline void kvm_s390_vcpu_block_all(struct kvm *kvm)
 
 static inline void kvm_s390_vcpu_unblock_all(struct kvm *kvm)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm)
index 00d272d..7f7c0d6 100644 (file)
@@ -116,7 +116,6 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
        unsigned long base = uv_info.guest_base_stor_len;
        unsigned long virt = uv_info.guest_virt_var_stor_len;
        unsigned long npages = 0, vlen = 0;
-       struct kvm_memory_slot *memslot;
 
        kvm->arch.pv.stor_var = NULL;
        kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
@@ -130,8 +129,7 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
         * Slots are sorted by GFN
         */
        mutex_lock(&kvm->slots_lock);
-       memslot = kvm_memslots(kvm)->memslots;
-       npages = memslot->base_gfn + memslot->npages;
+       npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
        mutex_unlock(&kvm->slots_lock);
 
        kvm->arch.pv.guest_len = npages * PAGE_SIZE;
index cfc5f55..bc79733 100644 (file)
@@ -173,10 +173,11 @@ static noinline int unwindme_func4(struct unwindme *u)
                }
 
                /*
-                * trigger specification exception
+                * Trigger operation exception; use insn notation to bypass
+                * llvm's integrated assembler sanity checks.
                 */
                asm volatile(
-                       "       mvcl    %%r1,%%r1\n"
+                       "       .insn   e,0x0000\n"     /* illegal opcode */
                        "0:     nopr    %%r7\n"
                        EX_TABLE(0b, 0b)
                        :);
index c7a97f3..481a664 100644 (file)
@@ -43,7 +43,6 @@ extern void flush_cache_range(struct vm_area_struct *vma,
                                 unsigned long start, unsigned long end);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 #define flush_icache_user_range flush_icache_range
 extern void flush_icache_page(struct vm_area_struct *vma,
index 208f131..d9539d2 100644 (file)
 446    common  landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    common  process_mrelease                sys_process_mrelease
+449    common  futex_waitv                     sys_futex_waitv
index c37764d..46adabc 100644 (file)
 446    common  landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    common  process_mrelease                sys_process_mrelease
+449    common  futex_waitv                     sys_futex_waitv
index e38a4cf..97b1f84 100644 (file)
@@ -574,6 +574,10 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
        ud2
 1:
 #endif
+#ifdef CONFIG_XEN_PV
+       ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
+#endif
+
        POP_REGS pop_rdi=0
 
        /*
@@ -890,6 +894,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 .Lparanoid_entry_checkgs:
        /* EBX = 1 -> kernel GSBASE active, no restore required */
        movl    $1, %ebx
+
        /*
         * The kernel-enforced convention is a negative GSBASE indicates
         * a kernel value. No SWAPGS needed on entry and exit.
@@ -897,21 +902,14 @@ SYM_CODE_START_LOCAL(paranoid_entry)
        movl    $MSR_GS_BASE, %ecx
        rdmsr
        testl   %edx, %edx
-       jns     .Lparanoid_entry_swapgs
-       ret
+       js      .Lparanoid_kernel_gsbase
 
-.Lparanoid_entry_swapgs:
+       /* EBX = 0 -> SWAPGS required on exit */
+       xorl    %ebx, %ebx
        swapgs
+.Lparanoid_kernel_gsbase:
 
-       /*
-        * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
-        * unconditional CR3 write, even in the PTI case.  So do an lfence
-        * to prevent GS speculation, regardless of whether PTI is enabled.
-        */
        FENCE_SWAPGS_KERNEL_ENTRY
-
-       /* EBX = 0 -> SWAPGS required on exit */
-       xorl    %ebx, %ebx
        ret
 SYM_CODE_END(paranoid_entry)
 
@@ -993,11 +991,6 @@ SYM_CODE_START_LOCAL(error_entry)
        pushq   %r12
        ret
 
-.Lerror_entry_done_lfence:
-       FENCE_SWAPGS_KERNEL_ENTRY
-.Lerror_entry_done:
-       ret
-
        /*
         * There are two places in the kernel that can potentially fault with
         * usergs. Handle them here.  B stepping K8s sometimes report a
@@ -1020,8 +1013,14 @@ SYM_CODE_START_LOCAL(error_entry)
         * .Lgs_change's error handler with kernel gsbase.
         */
        SWAPGS
-       FENCE_SWAPGS_USER_ENTRY
-       jmp .Lerror_entry_done
+
+       /*
+        * Issue an LFENCE to prevent GS speculation, regardless of whether it is a
+        * kernel or user gsbase.
+        */
+.Lerror_entry_done_lfence:
+       FENCE_SWAPGS_KERNEL_ENTRY
+       ret
 
 .Lbstep_iret:
        /* Fix truncated RIP */
index 6053674..c2767a6 100644 (file)
@@ -102,12 +102,6 @@ extern void switch_fpu_return(void);
  */
 extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
 
-/*
- * Tasks that are not using SVA have mm->pasid set to zero to note that they
- * will not have the valid bit set in MSR_IA32_PASID while they are running.
- */
-#define PASID_DISABLED 0
-
 /* Trap handling */
 extern int  fpu__exception_code(struct fpu *fpu, int trap_nr);
 extern void fpu_sync_fpstate(struct fpu *fpu);
index 5a0bcf8..048b6d5 100644 (file)
 #define INTEL_FAM6_ALDERLAKE           0x97    /* Golden Cove / Gracemont */
 #define INTEL_FAM6_ALDERLAKE_L         0x9A    /* Golden Cove / Gracemont */
 
-#define INTEL_FAM6_RAPTOR_LAKE         0xB7
+#define INTEL_FAM6_RAPTORLAKE          0xB7
 
 /* "Small Core" Processors (Atom) */
 
index cefe1d8..9e50da3 100644 (file)
@@ -47,6 +47,7 @@ KVM_X86_OP(set_dr7)
 KVM_X86_OP(cache_reg)
 KVM_X86_OP(get_rflags)
 KVM_X86_OP(set_rflags)
+KVM_X86_OP(get_if_flag)
 KVM_X86_OP(tlb_flush_all)
 KVM_X86_OP(tlb_flush_current)
 KVM_X86_OP_NULL(tlb_remote_flush)
index 6ac61f8..344ced0 100644 (file)
@@ -97,7 +97,7 @@
        KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_TLB_FLUSH_CURRENT      KVM_ARCH_REQ(26)
 #define KVM_REQ_TLB_FLUSH_GUEST \
-       KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
+       KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_APF_READY              KVM_ARCH_REQ(28)
 #define KVM_REQ_MSR_FILTER_CHANGED     KVM_ARCH_REQ(29)
 #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
 #define KVM_HPAGE_MASK(x)      (~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
-#define KVM_PERMILLE_MMU_PAGES 20
+#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50
 #define KVM_MIN_ALLOC_MMU_PAGES 64UL
 #define KVM_MMU_HASH_SHIFT 12
 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
@@ -291,25 +291,31 @@ struct kvm_kernel_irq_routing_entry;
  * the number of unique SPs that can theoretically be created is 2^n, where n
  * is the number of bits that are used to compute the role.
  *
- * But, even though there are 18 bits in the mask below, not all combinations
- * of modes and flags are possible.  The maximum number of possible upper-level
- * shadow pages for a single gfn is in the neighborhood of 2^13.
+ * But, even though there are 19 bits in the mask below, not all combinations
+ * of modes and flags are possible:
  *
- *   - invalid shadow pages are not accounted.
- *   - level is effectively limited to four combinations, not 16 as the number
- *     bits would imply, as 4k SPs are not tracked (allowed to go unsync).
- *   - level is effectively unused for non-PAE paging because there is exactly
- *     one upper level (see 4k SP exception above).
- *   - quadrant is used only for non-PAE paging and is exclusive with
- *     gpte_is_8_bytes.
- *   - execonly and ad_disabled are used only for nested EPT, which makes it
- *     exclusive with quadrant.
+ *   - invalid shadow pages are not accounted, so the bits are effectively 18
+ *
+ *   - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
+ *     execonly and ad_disabled are only used for nested EPT which has
+ *     has_4_byte_gpte=0.  Therefore, 2 bits are always unused.
+ *
+ *   - the 4 bits of level are effectively limited to the values 2/3/4/5,
+ *     as 4k SPs are not tracked (allowed to go unsync).  In addition non-PAE
+ *     paging has exactly one upper level, making level completely redundant
+ *     when has_4_byte_gpte=1.
+ *
+ *   - on top of this, smep_andnot_wp and smap_andnot_wp are only set if
+ *     cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
+ *
+ * Therefore, the maximum number of possible upper-level shadow pages for a
+ * single gfn is a bit less than 2^13.
  */
 union kvm_mmu_page_role {
        u32 word;
        struct {
                unsigned level:4;
-               unsigned gpte_is_8_bytes:1;
+               unsigned has_4_byte_gpte:1;
                unsigned quadrant:2;
                unsigned direct:1;
                unsigned access:3;
@@ -420,10 +426,9 @@ struct kvm_mmu {
        int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
        void (*inject_page_fault)(struct kvm_vcpu *vcpu,
                                  struct x86_exception *fault);
-       gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa,
-                           u32 access, struct x86_exception *exception);
-       gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
-                              struct x86_exception *exception);
+       gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+                           gpa_t gva_or_gpa, u32 access,
+                           struct x86_exception *exception);
        int (*sync_page)(struct kvm_vcpu *vcpu,
                         struct kvm_mmu_page *sp);
        void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa);
@@ -1036,6 +1041,7 @@ struct kvm_x86_msr_filter {
 #define APICV_INHIBIT_REASON_PIT_REINJ  4
 #define APICV_INHIBIT_REASON_X2APIC    5
 #define APICV_INHIBIT_REASON_BLOCKIRQ  6
+#define APICV_INHIBIT_REASON_ABSENT    7
 
 struct kvm_arch {
        unsigned long n_used_mmu_pages;
@@ -1348,6 +1354,7 @@ struct kvm_x86_ops {
        void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
        unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
        void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+       bool (*get_if_flag)(struct kvm_vcpu *vcpu);
 
        void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
        void (*tlb_flush_current)(struct kvm_vcpu *vcpu);
@@ -1585,10 +1592,9 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot);
 void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
-unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
 
-int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 
 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
                          const void *val, int bytes);
@@ -1638,7 +1644,8 @@ extern u64 kvm_mce_cap_supported;
  *
  * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
  *                decode the instruction length.  For use *only* by
- *                kvm_x86_ops.skip_emulated_instruction() implementations.
+ *                kvm_x86_ops.skip_emulated_instruction() implementations if
+ *                EMULTYPE_COMPLETE_USER_EXIT is not set.
  *
  * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
  *                          retry native execution under certain conditions,
@@ -1658,6 +1665,10 @@ extern u64 kvm_mce_cap_supported;
  *
  * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
  *              case the CR2/GPA value pass on the stack is valid.
+ *
+ * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
+ *                              state and inject single-step #DBs after skipping
+ *                              an instruction (after completing userspace I/O).
  */
 #define EMULTYPE_NO_DECODE         (1 << 0)
 #define EMULTYPE_TRAP_UD           (1 << 1)
@@ -1666,6 +1677,7 @@ extern u64 kvm_mce_cap_supported;
 #define EMULTYPE_TRAP_UD_FORCED            (1 << 4)
 #define EMULTYPE_VMWARE_GP         (1 << 5)
 #define EMULTYPE_PF                (1 << 6)
+#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
 
 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
@@ -1690,7 +1702,7 @@ int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
 int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
 int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu);
 int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
@@ -1756,12 +1768,9 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 void kvm_update_dr7(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
-void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                        ulong roots_to_free);
 void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu);
-gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
-                          struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
                              struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
@@ -1925,8 +1934,6 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
        static_call_cond(kvm_x86_vcpu_unblocking)(vcpu);
 }
 
-static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
-
 static inline int kvm_cpu_get_apicid(int mps_cpu)
 {
 #ifdef CONFIG_X86_LOCAL_APIC
index 9d4a3b1..eb186bc 100644 (file)
@@ -63,9 +63,9 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
 void kvm_slot_page_track_remove_page(struct kvm *kvm,
                                     struct kvm_memory_slot *slot, gfn_t gfn,
                                     enum kvm_page_track_mode mode);
-bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
-                                  struct kvm_memory_slot *slot, gfn_t gfn,
-                                  enum kvm_page_track_mode mode);
+bool kvm_slot_page_track_is_active(struct kvm *kvm,
+                                  const struct kvm_memory_slot *slot,
+                                  gfn_t gfn, enum kvm_page_track_mode mode);
 
 void
 kvm_page_track_register_notifier(struct kvm *kvm,
index 2cef6c5..6acaf5a 100644 (file)
 
 #define GHCB_RESP_CODE(v)              ((v) & GHCB_MSR_INFO_MASK)
 
+/*
+ * Error codes related to GHCB input that can be communicated back to the guest
+ * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2.
+ */
+#define GHCB_ERR_NOT_REGISTERED                1
+#define GHCB_ERR_INVALID_USAGE         2
+#define GHCB_ERR_INVALID_SCRATCH_AREA  3
+#define GHCB_ERR_MISSING_INPUT         4
+#define GHCB_ERR_INVALID_INPUT         5
+#define GHCB_ERR_INVALID_EVENT         6
+
 #endif
index 0575f58..e5e0fe1 100644 (file)
@@ -281,13 +281,13 @@ HYPERVISOR_callback_op(int cmd, void *arg)
        return _hypercall2(int, callback_op, cmd, arg);
 }
 
-static inline int
+static __always_inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
        return _hypercall2(int, set_debugreg, reg, value);
 }
 
-static inline unsigned long
+static __always_inline unsigned long
 HYPERVISOR_get_debugreg(int reg)
 {
        return _hypercall1(unsigned long, get_debugreg, reg);
index 4957f59..5adab89 100644 (file)
@@ -64,6 +64,7 @@ void xen_arch_unregister_cpu(int num);
 
 #ifdef CONFIG_PVH
 void __init xen_pvh_init(struct boot_params *boot_params);
+void __init mem_map_via_hcall(struct boot_params *boot_params_p);
 #endif
 
 #endif /* _ASM_X86_XEN_HYPERVISOR_H */
index d595827..91d4b6d 100644 (file)
@@ -118,7 +118,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
                                      struct fpstate *fpstate)
 {
        struct xregs_state __user *x = buf;
-       struct _fpx_sw_bytes sw_bytes;
+       struct _fpx_sw_bytes sw_bytes = {};
        u32 xfeatures;
        int err;
 
index c410be7..6a190c7 100644 (file)
@@ -742,7 +742,7 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
        return 0;
 }
 
-static char *prepare_command_line(void)
+static char * __init prepare_command_line(void)
 {
 #ifdef CONFIG_CMDLINE_BOOL
 #ifdef CONFIG_CMDLINE_OVERRIDE
index 74f0ec9..a9fc2ac 100644 (file)
@@ -294,11 +294,6 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
                                   char *dst, char *buf, size_t size)
 {
        unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
-       char __user *target = (char __user *)dst;
-       u64 d8;
-       u32 d4;
-       u16 d2;
-       u8  d1;
 
        /*
         * This function uses __put_user() independent of whether kernel or user
@@ -320,26 +315,42 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
         * instructions here would cause infinite nesting.
         */
        switch (size) {
-       case 1:
+       case 1: {
+               u8 d1;
+               u8 __user *target = (u8 __user *)dst;
+
                memcpy(&d1, buf, 1);
                if (__put_user(d1, target))
                        goto fault;
                break;
-       case 2:
+       }
+       case 2: {
+               u16 d2;
+               u16 __user *target = (u16 __user *)dst;
+
                memcpy(&d2, buf, 2);
                if (__put_user(d2, target))
                        goto fault;
                break;
-       case 4:
+       }
+       case 4: {
+               u32 d4;
+               u32 __user *target = (u32 __user *)dst;
+
                memcpy(&d4, buf, 4);
                if (__put_user(d4, target))
                        goto fault;
                break;
-       case 8:
+       }
+       case 8: {
+               u64 d8;
+               u64 __user *target = (u64 __user *)dst;
+
                memcpy(&d8, buf, 8);
                if (__put_user(d8, target))
                        goto fault;
                break;
+       }
        default:
                WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
                return ES_UNSUPPORTED;
@@ -362,11 +373,6 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
                                  char *src, char *buf, size_t size)
 {
        unsigned long error_code = X86_PF_PROT;
-       char __user *s = (char __user *)src;
-       u64 d8;
-       u32 d4;
-       u16 d2;
-       u8  d1;
 
        /*
         * This function uses __get_user() independent of whether kernel or user
@@ -388,26 +394,41 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
         * instructions here would cause infinite nesting.
         */
        switch (size) {
-       case 1:
+       case 1: {
+               u8 d1;
+               u8 __user *s = (u8 __user *)src;
+
                if (__get_user(d1, s))
                        goto fault;
                memcpy(buf, &d1, 1);
                break;
-       case 2:
+       }
+       case 2: {
+               u16 d2;
+               u16 __user *s = (u16 __user *)src;
+
                if (__get_user(d2, s))
                        goto fault;
                memcpy(buf, &d2, 2);
                break;
-       case 4:
+       }
+       case 4: {
+               u32 d4;
+               u32 __user *s = (u32 __user *)src;
+
                if (__get_user(d4, s))
                        goto fault;
                memcpy(buf, &d4, 4);
                break;
-       case 8:
+       }
+       case 8: {
+               u64 d8;
+               u64 __user *s = (u64 __user *)src;
                if (__get_user(d8, s))
                        goto fault;
                memcpy(buf, &d8, 8);
                break;
+       }
        default:
                WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
                return ES_UNSUPPORTED;
index 2e076a4..a698196 100644 (file)
@@ -1180,6 +1180,12 @@ void mark_tsc_unstable(char *reason)
 
 EXPORT_SYMBOL_GPL(mark_tsc_unstable);
 
+static void __init tsc_disable_clocksource_watchdog(void)
+{
+       clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+       clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+}
+
 static void __init check_system_tsc_reliable(void)
 {
 #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
@@ -1196,6 +1202,23 @@ static void __init check_system_tsc_reliable(void)
 #endif
        if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
                tsc_clocksource_reliable = 1;
+
+       /*
+        * Disable the clocksource watchdog when the system has:
+        *  - TSC running at constant frequency
+        *  - TSC which does not stop in C-States
+        *  - the TSC_ADJUST register which allows to detect even minimal
+        *    modifications
+        *  - not more than two sockets. As the number of sockets cannot be
+        *    evaluated at the early boot stage where this has to be
+        *    invoked, check the number of online memory nodes as a
+        *    fallback solution which is an reasonable estimate.
+        */
+       if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
+           boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
+           boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
+           nr_online_nodes <= 2)
+               tsc_disable_clocksource_watchdog();
 }
 
 /*
@@ -1387,9 +1410,6 @@ static int __init init_tsc_clocksource(void)
        if (tsc_unstable)
                goto unreg;
 
-       if (tsc_clocksource_reliable || no_tsc_watchdog)
-               clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
-
        if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
                clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
 
@@ -1527,7 +1547,7 @@ void __init tsc_init(void)
        }
 
        if (tsc_clocksource_reliable || no_tsc_watchdog)
-               clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+               tsc_disable_clocksource_watchdog();
 
        clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
        detect_art();
index 50a4515..9452dc9 100644 (file)
@@ -30,6 +30,7 @@ struct tsc_adjust {
 };
 
 static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
+static struct timer_list tsc_sync_check_timer;
 
 /*
  * TSC's on different sockets may be reset asynchronously.
@@ -77,6 +78,46 @@ void tsc_verify_tsc_adjust(bool resume)
        }
 }
 
+/*
+ * Normally the tsc_sync will be checked every time system enters idle
+ * state, but there is still caveat that a system won't enter idle,
+ * either because it's too busy or configured purposely to not enter
+ * idle.
+ *
+ * So setup a periodic timer (every 10 minutes) to make sure the check
+ * is always on.
+ */
+
+#define SYNC_CHECK_INTERVAL            (HZ * 600)
+
+static void tsc_sync_check_timer_fn(struct timer_list *unused)
+{
+       int next_cpu;
+
+       tsc_verify_tsc_adjust(false);
+
+       /* Run the check for all onlined CPUs in turn */
+       next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+       if (next_cpu >= nr_cpu_ids)
+               next_cpu = cpumask_first(cpu_online_mask);
+
+       tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL;
+       add_timer_on(&tsc_sync_check_timer, next_cpu);
+}
+
+static int __init start_sync_check_timer(void)
+{
+       if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable)
+               return 0;
+
+       timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0);
+       tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL;
+       add_timer(&tsc_sync_check_timer);
+
+       return 0;
+}
+late_initcall(start_sync_check_timer);
+
 static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
                                   unsigned int cpu, bool bootcpu)
 {
index 6191861..03b2ce3 100644 (file)
@@ -27,6 +27,7 @@ config KVM
        select MMU_NOTIFIER
        select HAVE_KVM_IRQCHIP
        select HAVE_KVM_IRQFD
+       select HAVE_KVM_DIRTY_RING
        select IRQ_BYPASS_MANAGER
        select HAVE_KVM_IRQ_BYPASS
        select HAVE_KVM_IRQ_ROUTING
@@ -43,6 +44,7 @@ config KVM
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
        select KVM_VFIO
        select SRCU
+       select INTERVAL_TREE
        select HAVE_KVM_PM_NOTIFIER if PM
        help
          Support hosting fully virtualized guest machines using hardware
index 75dfd27..30f244b 100644 (file)
@@ -7,12 +7,7 @@ ifeq ($(CONFIG_FRAME_POINTER),y)
 OBJECT_FILES_NON_STANDARD_vmenter.o := y
 endif
 
-KVM := ../../../virt/kvm
-
-kvm-y                  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
-                               $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \
-                               $(KVM)/dirty_ring.o $(KVM)/binary_stats.o
-kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
+include $(srctree)/virt/kvm/Makefile.kvm
 
 kvm-y                  += x86.o emulate.o i8259.o irq.o lapic.o \
                           i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
index 07e9215..0b920e1 100644 (file)
@@ -523,7 +523,7 @@ void kvm_set_cpu_caps(void)
                F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
                F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
                0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
-               F(TOPOEXT) | F(PERFCTR_CORE)
+               F(TOPOEXT) | 0 /* PERFCTR_CORE */
        );
 
        kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
index 54a83a7..543a8c0 100644 (file)
@@ -107,9 +107,10 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
        write_lock(&kvm->mmu_lock);
 
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               int bkt;
+
                slots = __kvm_memslots(kvm, i);
-               for (j = 0; j < slots->used_slots; j++) {
-                       slot = &slots->memslots[j];
+               kvm_for_each_memslot(slot, bkt, slots)
                        for (k = 0; k < KVM_NR_PAGE_SIZES; k++) {
                                rmap = slot->arch.rmap[k];
                                lpage_size = kvm_mmu_slot_lpages(slot, k + 1);
@@ -121,7 +122,6 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
                                        cur[index]++;
                                }
                        }
-               }
        }
 
        write_unlock(&kvm->mmu_lock);
index 5e19e6e..6e38a7d 100644 (file)
@@ -164,7 +164,7 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
 static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
 {
        struct kvm_vcpu *vcpu = NULL;
-       int i;
+       unsigned long i;
 
        if (vpidx >= KVM_MAX_VCPUS)
                return NULL;
@@ -1716,7 +1716,8 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
 {
        struct kvm_hv *hv = to_kvm_hv(kvm);
        struct kvm_vcpu *vcpu;
-       int i, bank, sbank = 0;
+       int bank, sbank = 0;
+       unsigned long i;
 
        memset(vp_bitmap, 0,
               KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
@@ -1863,7 +1864,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
                .vector = vector
        };
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
@@ -1922,11 +1923,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 
                all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
 
+               if (all_cpus)
+                       goto check_and_send_ipi;
+
                if (!sparse_banks_len)
                        goto ret_success;
 
-               if (!all_cpus &&
-                   kvm_read_guest(kvm,
+               if (kvm_read_guest(kvm,
                                   hc->ingpa + offsetof(struct hv_send_ipi_ex,
                                                        vp_set.bank_contents),
                                   sparse_banks,
@@ -1934,6 +1937,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
                        return HV_STATUS_INVALID_HYPERCALL_INPUT;
        }
 
+check_and_send_ipi:
        if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
                return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
@@ -2516,6 +2520,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 
                case HYPERV_CPUID_NESTED_FEATURES:
                        ent->eax = evmcs_ver;
+                       if (evmcs_ver)
+                               ent->eax |= HV_X64_NESTED_MSR_BITMAP;
 
                        break;
 
index 5a69cce..0b65a76 100644 (file)
@@ -242,7 +242,7 @@ static void pit_do_work(struct kthread_work *work)
        struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
        struct kvm *kvm = pit->kvm;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
        struct kvm_kpit_state *ps = &pit->pit_state;
 
        if (atomic_read(&ps->reinject) && !atomic_xchg(&ps->irq_ack, 0))
index 0b80263..814064d 100644 (file)
@@ -50,7 +50,7 @@ static void pic_unlock(struct kvm_pic *s)
 {
        bool wakeup = s->wakeup_needed;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        s->wakeup_needed = false;
 
@@ -270,7 +270,8 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
 static void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-       int irq, i;
+       int irq;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
        u8 edge_irr = s->irr & ~s->elcr;
        bool found = false;
index 816a825..decfa36 100644 (file)
@@ -149,7 +149,7 @@ void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
 static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
 {
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        if (RTC_GSI >= IOAPIC_NUM_PINS)
                return;
@@ -184,7 +184,7 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
 
 static void ioapic_lazy_update_eoi(struct kvm_ioapic *ioapic, int irq)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
        union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
 
index e66e620..539333a 100644 (file)
@@ -81,7 +81,6 @@ struct kvm_ioapic {
        unsigned long irq_states[IOAPIC_NUM_PINS];
        struct kvm_io_device dev;
        struct kvm *kvm;
-       void (*ack_notifier)(void *opaque, int irq);
        spinlock_t lock;
        struct rtc_status rtc_status;
        struct delayed_work eoi_inject;
index 650642b..c2d7cfe 100644 (file)
@@ -56,7 +56,6 @@ struct kvm_pic {
        struct kvm_io_device dev_master;
        struct kvm_io_device dev_slave;
        struct kvm_io_device dev_elcr;
-       void (*ack_notifier)(void *opaque, int irq);
        unsigned long irq_states[PIC_NUM_PINS];
 };
 
index d5b72a0..39ad02d 100644 (file)
@@ -45,9 +45,9 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
                struct kvm_lapic_irq *irq, struct dest_map *dest_map)
 {
-       int i, r = -1;
+       int r = -1;
        struct kvm_vcpu *vcpu, *lowest = NULL;
-       unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+       unsigned long i, dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
        unsigned int dest_vcpus = 0;
 
        if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
@@ -320,7 +320,8 @@ int kvm_set_routing_entry(struct kvm *kvm,
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
                             struct kvm_vcpu **dest_vcpu)
 {
-       int i, r = 0;
+       int r = 0;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
index 90e1ffd..3febc34 100644 (file)
@@ -9,6 +9,12 @@
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
         | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
 
+#define X86_CR0_PDPTR_BITS    (X86_CR0_CD | X86_CR0_NW | X86_CR0_PG)
+#define X86_CR4_TLBFLUSH_BITS (X86_CR4_PGE | X86_CR4_PCIDE | X86_CR4_PAE | X86_CR4_SMEP)
+#define X86_CR4_PDPTR_BITS    (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP)
+
+static_assert(!(KVM_POSSIBLE_CR0_GUEST_BITS & X86_CR0_PDPTR_BITS));
+
 #define BUILD_KVM_GPR_ACCESSORS(lname, uname)                                \
 static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
 {                                                                            \
@@ -37,6 +43,13 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
 BUILD_KVM_GPR_ACCESSORS(r15, R15)
 #endif
 
+/*
+ * avail  dirty
+ * 0     0       register in VMCS/VMCB
+ * 0     1       *INVALID*
+ * 1     0       register in vcpu->arch
+ * 1     1       register in vcpu->arch, needs to be stored back
+ */
 static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
                                             enum kvm_reg reg)
 {
@@ -55,13 +68,6 @@ static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
        __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
 }
 
-static inline void kvm_register_clear_available(struct kvm_vcpu *vcpu,
-                                              enum kvm_reg reg)
-{
-       __clear_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-       __clear_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
 static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
                                           enum kvm_reg reg)
 {
index c7db2df..b469f45 100644 (file)
@@ -33,7 +33,8 @@ int hv_remote_flush_tlb_with_range(struct kvm *kvm,
 {
        struct kvm_arch *kvm_arch = &kvm->arch;
        struct kvm_vcpu *vcpu;
-       int ret = 0, i, nr_unique_valid_roots;
+       int ret = 0, nr_unique_valid_roots;
+       unsigned long i;
        hpa_t root;
 
        spin_lock(&kvm_arch->hv_root_tdp_lock);
index 759952d..c5028e6 100644 (file)
@@ -185,7 +185,7 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
 {
        struct kvm_apic_map *new, *old = NULL;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
        u32 max_id = 255; /* enough space for any xAPIC ID */
 
        /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
@@ -673,41 +673,40 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
        return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 }
 
-static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
-{
-       u8 val;
-       if (pv_eoi_get_user(vcpu, &val) < 0) {
-               printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
-                          (unsigned long long)vcpu->arch.pv_eoi.msr_val);
-               return false;
-       }
-       return val & KVM_PV_EOI_ENABLED;
-}
-
 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 {
-       if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
-               printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
-                          (unsigned long long)vcpu->arch.pv_eoi.msr_val);
+       if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
                return;
-       }
+
        __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 }
 
-static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
+static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
 {
-       if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
-               printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
-                          (unsigned long long)vcpu->arch.pv_eoi.msr_val);
-               return;
-       }
+       u8 val;
+
+       if (pv_eoi_get_user(vcpu, &val) < 0)
+               return false;
+
+       val &= KVM_PV_EOI_ENABLED;
+
+       if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
+               return false;
+
+       /*
+        * Clear pending bit in any case: it will be set again on vmentry.
+        * While this might not be ideal from performance point of view,
+        * this makes sure pv eoi is only enabled when we know it's safe.
+        */
        __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
+
+       return val;
 }
 
 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 {
        int highest_irr;
-       if (apic->vcpu->arch.apicv_active)
+       if (kvm_x86_ops.sync_pir_to_irr)
                highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
        else
                highest_irr = apic_find_highest_irr(apic);
@@ -1101,6 +1100,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
                        kvm_lapic_set_irr(vector, apic);
                        kvm_make_request(KVM_REQ_EVENT, vcpu);
                        kvm_vcpu_kick(vcpu);
+               } else {
+                       trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode,
+                                                  trig_mode, vector);
                }
                break;
 
@@ -1172,8 +1174,8 @@ void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
        struct kvm_lapic *src = NULL;
        struct kvm_apic_map *map;
        struct kvm_vcpu *vcpu;
-       unsigned long bitmap;
-       int i, vcpu_idx;
+       unsigned long bitmap, i;
+       int vcpu_idx;
        bool ret;
 
        rcu_read_lock();
@@ -1931,7 +1933,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
        /* If the preempt notifier has already run, it also called apic_timer_expired */
        if (!apic->lapic_timer.hv_timer_in_use)
                goto out;
-       WARN_ON(rcuwait_active(&vcpu->wait));
+       WARN_ON(kvm_vcpu_is_blocking(vcpu));
        apic_timer_expired(apic, false);
        cancel_hv_timer(apic);
 
@@ -2677,7 +2679,6 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
                                        struct kvm_lapic *apic)
 {
-       bool pending;
        int vector;
        /*
         * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
@@ -2691,14 +2692,8 @@ static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
         *      -> host enabled PV EOI, guest executed EOI.
         */
        BUG_ON(!pv_eoi_enabled(vcpu));
-       pending = pv_eoi_get_pending(vcpu);
-       /*
-        * Clear pending bit in any case: it will be set again on vmentry.
-        * While this might not be ideal from performance point of view,
-        * this makes sure pv eoi is only enabled when we know it's safe.
-        */
-       pv_eoi_clr_pending(vcpu);
-       if (pending)
+
+       if (pv_eoi_test_and_clr_pending(vcpu))
                return;
        vector = apic_set_eoi(apic);
        trace_kvm_pv_eoi(apic, vector);
index 9ae6168..e9fbb2c 100644 (file)
@@ -71,7 +71,8 @@ void kvm_init_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
                             unsigned long cr4, u64 efer, gpa_t nested_cr3);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
-                            bool accessed_dirty, gpa_t new_eptp);
+                            int huge_page_level, bool accessed_dirty,
+                            gpa_t new_eptp);
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
 int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
                                u64 fault_address, char *insn, int insn_len);
@@ -351,4 +352,17 @@ static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count)
 {
        atomic64_add(count, &kvm->stat.pages[level - 1]);
 }
+
+gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
+                          struct x86_exception *exception);
+
+static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu,
+                                     struct kvm_mmu *mmu,
+                                     gpa_t gpa, u32 access,
+                                     struct x86_exception *exception)
+{
+       if (mmu != &vcpu->arch.nested_mmu)
+               return gpa;
+       return translate_nested_gpa(vcpu, gpa, access, exception);
+}
 #endif
index 3be9bee..1d275e9 100644 (file)
@@ -335,12 +335,6 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
        return likely(kvm_gen == spte_gen);
 }
 
-static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
-                                  struct x86_exception *exception)
-{
-        return gpa;
-}
-
 static int is_cpuid_PSE36(void)
 {
        return 1;
@@ -1454,7 +1448,7 @@ static bool kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 {
        u64 *sptep;
        struct rmap_iterator iter;
-       int need_flush = 0;
+       bool need_flush = false;
        u64 new_spte;
        kvm_pfn_t new_pfn;
 
@@ -1466,7 +1460,7 @@ restart:
                rmap_printk("spte %p %llx gfn %llx (%d)\n",
                            sptep, *sptep, gfn, level);
 
-               need_flush = 1;
+               need_flush = true;
 
                if (pte_write(pte)) {
                        pte_list_remove(kvm, rmap_head, sptep);
@@ -1482,7 +1476,7 @@ restart:
 
        if (need_flush && kvm_available_flush_tlb_with_range()) {
                kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
-               return 0;
+               return false;
        }
 
        return need_flush;
@@ -1582,7 +1576,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
                flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
 
        if (is_tdp_mmu_enabled(kvm))
-               flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
+               flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
 
        return flush;
 }
@@ -1623,8 +1617,8 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 
        for_each_rmap_spte(rmap_head, &iter, sptep)
                if (is_accessed_spte(*sptep))
-                       return 1;
-       return 0;
+                       return true;
+       return false;
 }
 
 #define RMAP_RECYCLE_THRESHOLD 1000
@@ -1936,7 +1930,11 @@ static void mmu_audit_disable(void) { }
 
 static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-       return sp->role.invalid ||
+       if (sp->role.invalid)
+               return true;
+
+       /* TDP MMU pages due not use the MMU generation. */
+       return !sp->tdp_mmu_page &&
               unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
 }
 
@@ -2082,10 +2080,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
        role = vcpu->arch.mmu->mmu_role.base;
        role.level = level;
        role.direct = direct;
-       if (role.direct)
-               role.gpte_is_8_bytes = true;
        role.access = access;
-       if (!direct_mmu && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
+       if (role.has_4_byte_gpte) {
                quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
                quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
                role.quadrant = quadrant;
@@ -2173,10 +2169,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
        iterator->shadow_addr = root;
        iterator->level = vcpu->arch.mmu->shadow_root_level;
 
-       if (iterator->level == PT64_ROOT_4LEVEL &&
+       if (iterator->level >= PT64_ROOT_4LEVEL &&
            vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
            !vcpu->arch.mmu->direct_map)
-               --iterator->level;
+               iterator->level = PT32E_ROOT_LEVEL;
 
        if (iterator->level == PT32E_ROOT_LEVEL) {
                /*
@@ -2561,10 +2557,10 @@ static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
        return r;
 }
 
-static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static void kvm_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
        trace_kvm_mmu_unsync_page(sp);
-       ++vcpu->kvm->stat.mmu_unsync;
+       ++kvm->stat.mmu_unsync;
        sp->unsync = 1;
 
        kvm_mmu_mark_parents_unsync(sp);
@@ -2576,7 +2572,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
  * were marked unsync (or if there is no shadow page), -EPERM if the SPTE must
  * be write-protected.
  */
-int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
+int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
                            gfn_t gfn, bool can_unsync, bool prefetch)
 {
        struct kvm_mmu_page *sp;
@@ -2587,7 +2583,7 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
         * track machinery is used to write-protect upper-level shadow pages,
         * i.e. this guards the role.level == 4K assertion below!
         */
-       if (kvm_slot_page_track_is_active(vcpu, slot, gfn, KVM_PAGE_TRACK_WRITE))
+       if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE))
                return -EPERM;
 
        /*
@@ -2596,7 +2592,7 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
         * that case, KVM must complete emulation of the guest TLB flush before
         * allowing shadow pages to become unsync (writable by the guest).
         */
-       for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
+       for_each_gfn_indirect_valid_sp(kvm, sp, gfn) {
                if (!can_unsync)
                        return -EPERM;
 
@@ -2615,7 +2611,7 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
                 */
                if (!locked) {
                        locked = true;
-                       spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
+                       spin_lock(&kvm->arch.mmu_unsync_pages_lock);
 
                        /*
                         * Recheck after taking the spinlock, a different vCPU
@@ -2630,10 +2626,10 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
                }
 
                WARN_ON(sp->role.level != PG_LEVEL_4K);
-               kvm_unsync_page(vcpu, sp);
+               kvm_unsync_page(kvm, sp);
        }
        if (locked)
-               spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
+               spin_unlock(&kvm->arch.mmu_unsync_pages_lock);
 
        /*
         * We need to ensure that the marking of unsync pages is visible
@@ -3405,7 +3401,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *slot;
-       int r = 0, i;
+       int r = 0, i, bkt;
 
        /*
         * Check if this is the first shadow root being allocated before
@@ -3430,7 +3426,7 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
 
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
                slots = __kvm_memslots(kvm, i);
-               kvm_for_each_memslot(slot, slots) {
+               kvm_for_each_memslot(slot, bkt, slots) {
                        /*
                         * Both of these functions are no-ops if the target is
                         * already allocated, so unconditionally calling both
@@ -3730,21 +3726,13 @@ void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu)
        kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
 }
 
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr,
-                                 u32 access, struct x86_exception *exception)
-{
-       if (exception)
-               exception->error_code = 0;
-       return vaddr;
-}
-
-static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr,
-                                        u32 access,
-                                        struct x86_exception *exception)
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+                                 gpa_t vaddr, u32 access,
+                                 struct x86_exception *exception)
 {
        if (exception)
                exception->error_code = 0;
-       return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
+       return kvm_translate_gpa(vcpu, mmu, vaddr, access, exception);
 }
 
 static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
@@ -3884,7 +3872,7 @@ static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
         * guest is writing the page which is write tracked which can
         * not be fixed by page fault handler.
         */
-       if (kvm_slot_page_track_is_active(vcpu, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
+       if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
                return true;
 
        return false;
@@ -3976,6 +3964,34 @@ out_retry:
        return true;
 }
 
+/*
+ * Returns true if the page fault is stale and needs to be retried, i.e. if the
+ * root was invalidated by a memslot update or a relevant mmu_notifier fired.
+ */
+static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
+                               struct kvm_page_fault *fault, int mmu_seq)
+{
+       struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root_hpa);
+
+       /* Special roots, e.g. pae_root, are not backed by shadow pages. */
+       if (sp && is_obsolete_sp(vcpu->kvm, sp))
+               return true;
+
+       /*
+        * Roots without an associated shadow page are considered invalid if
+        * there is a pending request to free obsolete roots.  The request is
+        * only a hint that the current root _may_ be obsolete and needs to be
+        * reloaded, e.g. if the guest frees a PGD that KVM is tracking as a
+        * previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs
+        * to reload even if no vCPU is actively using the root.
+        */
+       if (!sp && kvm_test_request(KVM_REQ_MMU_RELOAD, vcpu))
+               return true;
+
+       return fault->slot &&
+              mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
+}
+
 static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 {
        bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
@@ -4013,8 +4029,9 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
        else
                write_lock(&vcpu->kvm->mmu_lock);
 
-       if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
+       if (is_page_fault_stale(vcpu, fault, mmu_seq))
                goto out_unlock;
+
        r = make_mmu_pages_available(vcpu);
        if (r)
                goto out_unlock;
@@ -4355,22 +4372,28 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 
 static void
 __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
-                           u64 pa_bits_rsvd, bool execonly)
+                           u64 pa_bits_rsvd, bool execonly, int huge_page_level)
 {
        u64 high_bits_rsvd = pa_bits_rsvd & rsvd_bits(0, 51);
+       u64 large_1g_rsvd = 0, large_2m_rsvd = 0;
        u64 bad_mt_xwr;
 
+       if (huge_page_level < PG_LEVEL_1G)
+               large_1g_rsvd = rsvd_bits(7, 7);
+       if (huge_page_level < PG_LEVEL_2M)
+               large_2m_rsvd = rsvd_bits(7, 7);
+
        rsvd_check->rsvd_bits_mask[0][4] = high_bits_rsvd | rsvd_bits(3, 7);
        rsvd_check->rsvd_bits_mask[0][3] = high_bits_rsvd | rsvd_bits(3, 7);
-       rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6);
-       rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6);
+       rsvd_check->rsvd_bits_mask[0][2] = high_bits_rsvd | rsvd_bits(3, 6) | large_1g_rsvd;
+       rsvd_check->rsvd_bits_mask[0][1] = high_bits_rsvd | rsvd_bits(3, 6) | large_2m_rsvd;
        rsvd_check->rsvd_bits_mask[0][0] = high_bits_rsvd;
 
        /* large page */
        rsvd_check->rsvd_bits_mask[1][4] = rsvd_check->rsvd_bits_mask[0][4];
        rsvd_check->rsvd_bits_mask[1][3] = rsvd_check->rsvd_bits_mask[0][3];
-       rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29);
-       rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20);
+       rsvd_check->rsvd_bits_mask[1][2] = high_bits_rsvd | rsvd_bits(12, 29) | large_1g_rsvd;
+       rsvd_check->rsvd_bits_mask[1][1] = high_bits_rsvd | rsvd_bits(12, 20) | large_2m_rsvd;
        rsvd_check->rsvd_bits_mask[1][0] = rsvd_check->rsvd_bits_mask[0][0];
 
        bad_mt_xwr = 0xFFull << (2 * 8);        /* bits 3..5 must not be 2 */
@@ -4386,10 +4409,11 @@ __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
 }
 
 static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
-               struct kvm_mmu *context, bool execonly)
+               struct kvm_mmu *context, bool execonly, int huge_page_level)
 {
        __reset_rsvds_bits_mask_ept(&context->guest_rsvd_check,
-                                   vcpu->arch.reserved_gpa_bits, execonly);
+                                   vcpu->arch.reserved_gpa_bits, execonly,
+                                   huge_page_level);
 }
 
 static inline u64 reserved_hpa_bits(void)
@@ -4465,7 +4489,8 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
                                        false, true);
        else
                __reset_rsvds_bits_mask_ept(shadow_zero_check,
-                                           reserved_hpa_bits(), false);
+                                           reserved_hpa_bits(), false,
+                                           max_huge_page_level);
 
        if (!shadow_me_mask)
                return;
@@ -4485,7 +4510,8 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
                                struct kvm_mmu *context, bool execonly)
 {
        __reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
-                                   reserved_hpa_bits(), execonly);
+                                   reserved_hpa_bits(), execonly,
+                                   max_huge_page_level);
 }
 
 #define BYTE_MASK(access) \
@@ -4734,7 +4760,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
        role.base.ad_disabled = (shadow_accessed_mask == 0);
        role.base.level = kvm_mmu_get_tdp_level(vcpu);
        role.base.direct = true;
-       role.base.gpte_is_8_bytes = true;
+       role.base.has_4_byte_gpte = false;
 
        return role;
 }
@@ -4779,7 +4805,7 @@ kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu,
 
        role.base.smep_andnot_wp = role.ext.cr4_smep && !____is_cr0_wp(regs);
        role.base.smap_andnot_wp = role.ext.cr4_smap && !____is_cr0_wp(regs);
-       role.base.gpte_is_8_bytes = ____is_cr0_pg(regs) && ____is_cr4_pae(regs);
+       role.base.has_4_byte_gpte = ____is_cr0_pg(regs) && !____is_cr4_pae(regs);
 
        return role;
 }
@@ -4855,7 +4881,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
        struct kvm_mmu *context = &vcpu->arch.guest_mmu;
        struct kvm_mmu_role_regs regs = {
                .cr0 = cr0,
-               .cr4 = cr4,
+               .cr4 = cr4 & ~X86_CR4_PKE,
                .efer = efer,
        };
        union kvm_mmu_role new_role;
@@ -4878,7 +4904,7 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
        role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;
 
        role.base.level = level;
-       role.base.gpte_is_8_bytes = true;
+       role.base.has_4_byte_gpte = false;
        role.base.direct = false;
        role.base.ad_disabled = !accessed_dirty;
        role.base.guest_mode = true;
@@ -4893,7 +4919,8 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
 }
 
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
-                            bool accessed_dirty, gpa_t new_eptp)
+                            int huge_page_level, bool accessed_dirty,
+                            gpa_t new_eptp)
 {
        struct kvm_mmu *context = &vcpu->arch.guest_mmu;
        u8 level = vmx_eptp_page_walk_level(new_eptp);
@@ -4919,8 +4946,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
        context->direct_map = false;
 
        update_permission_bitmask(context, true);
-       update_pkru_bitmask(context);
-       reset_rsvds_bits_mask_ept(vcpu, context, execonly);
+       context->pkru_mask = 0;
+       reset_rsvds_bits_mask_ept(vcpu, context, execonly, huge_page_level);
        reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
@@ -4984,13 +5011,13 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
         * the gva_to_gpa functions between mmu and nested_mmu are swapped.
         */
        if (!is_paging(vcpu))
-               g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
+               g_context->gva_to_gpa = nonpaging_gva_to_gpa;
        else if (is_long_mode(vcpu))
-               g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
+               g_context->gva_to_gpa = paging64_gva_to_gpa;
        else if (is_pae(vcpu))
-               g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
+               g_context->gva_to_gpa = paging64_gva_to_gpa;
        else
-               g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
+               g_context->gva_to_gpa = paging32_gva_to_gpa;
 
        reset_guest_paging_metadata(vcpu, g_context);
 }
@@ -5025,6 +5052,14 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
        /*
         * Invalidate all MMU roles to force them to reinitialize as CPUID
         * information is factored into reserved bit calculations.
+        *
+        * Correctly handling multiple vCPU models with respect to paging and
+        * physical address properties) in a single VM would require tracking
+        * all relevant CPUID information in kvm_mmu_page_role. That is very
+        * undesirable as it would increase the memory requirements for
+        * gfn_track (see struct kvm_mmu_page_role comments).  For now that
+        * problem is swept under the rug; KVM's CPUID API is horrific and
+        * it's all but impossible to solve it without introducing a new API.
         */
        vcpu->arch.root_mmu.mmu_role.ext.valid = 0;
        vcpu->arch.guest_mmu.mmu_role.ext.valid = 0;
@@ -5032,24 +5067,10 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
        kvm_mmu_reset_context(vcpu);
 
        /*
-        * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
-        * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
-        * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
-        * faults due to reusing SPs/SPTEs.  Alert userspace, but otherwise
-        * sweep the problem under the rug.
-        *
-        * KVM's horrific CPUID ABI makes the problem all but impossible to
-        * solve, as correctly handling multiple vCPU models (with respect to
-        * paging and physical address properties) in a single VM would require
-        * tracking all relevant CPUID information in kvm_mmu_page_role.  That
-        * is very undesirable as it would double the memory requirements for
-        * gfn_track (see struct kvm_mmu_page_role comments), and in practice
-        * no sane VMM mucks with the core vCPU model on the fly.
+        * Changing guest CPUID after KVM_RUN is forbidden, see the comment in
+        * kvm_arch_vcpu_ioctl().
         */
-       if (vcpu->arch.last_vmentry_cpu != -1) {
-               pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} after KVM_RUN may cause guest instability\n");
-               pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} will fail after KVM_RUN starting with Linux 5.16\n");
-       }
+       KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm);
 }
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
@@ -5161,7 +5182,7 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa,
                 gpa, bytes, sp->role.word);
 
        offset = offset_in_page(gpa);
-       pte_size = sp->role.gpte_is_8_bytes ? 8 : 4;
+       pte_size = sp->role.has_4_byte_gpte ? 4 : 8;
 
        /*
         * Sometimes, the OS only writes the last one bytes to update status
@@ -5185,7 +5206,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
        page_offset = offset_in_page(gpa);
        level = sp->role.level;
        *nspte = 1;
-       if (!sp->role.gpte_is_8_bytes) {
+       if (sp->role.has_4_byte_gpte) {
                page_offset <<= 1;      /* 32->64 */
                /*
                 * A 32-bit pde maps 4MB while the shadow pdes map
@@ -5369,7 +5390,7 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
-       kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE);
+       kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE);
        ++vcpu->stat.invlpg;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_invlpg);
@@ -5497,10 +5518,13 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
 
        mmu->root_hpa = INVALID_PAGE;
        mmu->root_pgd = 0;
-       mmu->translate_gpa = translate_gpa;
        for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
                mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
 
+       /* vcpu->arch.guest_mmu isn't used when !tdp_enabled. */
+       if (!tdp_enabled && mmu == &vcpu->arch.guest_mmu)
+               return 0;
+
        /*
         * When using PAE paging, the four PDPTEs are treated as 'root' pages,
         * while the PDP table is a per-vCPU construct that's allocated at MMU
@@ -5510,7 +5534,7 @@ static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
         * generally doesn't use PAE paging and can skip allocating the PDP
         * table.  The main exception, handled here, is SVM's 32-bit NPT.  The
         * other exception is for shadowing L1's 32-bit or PAE NPT on 64-bit
-        * KVM; that horror is handled on-demand by mmu_alloc_shadow_roots().
+        * KVM; that horror is handled on-demand by mmu_alloc_special_roots().
         */
        if (tdp_enabled && kvm_mmu_get_tdp_level(vcpu) > PT32E_ROOT_LEVEL)
                return 0;
@@ -5555,8 +5579,6 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
        vcpu->arch.mmu = &vcpu->arch.root_mmu;
        vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
 
-       vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
-
        ret = __kvm_mmu_create(vcpu, &vcpu->arch.guest_mmu);
        if (ret)
                return ret;
@@ -5715,6 +5737,7 @@ static bool __kvm_zap_rmaps(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 {
        const struct kvm_memory_slot *memslot;
        struct kvm_memslots *slots;
+       struct kvm_memslot_iter iter;
        bool flush = false;
        gfn_t start, end;
        int i;
@@ -5724,10 +5747,12 @@ static bool __kvm_zap_rmaps(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
                slots = __kvm_memslots(kvm, i);
-               kvm_for_each_memslot(memslot, slots) {
+
+               kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
+                       memslot = iter.slot;
                        start = max(gfn_start, memslot->base_gfn);
                        end = min(gfn_end, memslot->base_gfn + memslot->npages);
-                       if (start >= end)
+                       if (WARN_ON_ONCE(start >= end))
                                continue;
 
                        flush = slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
@@ -5748,6 +5773,9 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
        bool flush;
        int i;
 
+       if (WARN_ON_ONCE(gfn_end <= gfn_start))
+               return;
+
        write_lock(&kvm->mmu_lock);
 
        kvm_inc_notifier_count(kvm, gfn_start, gfn_end);
@@ -5854,8 +5882,6 @@ restart:
 void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
                                   const struct kvm_memory_slot *slot)
 {
-       bool flush = false;
-
        if (kvm_memslots_have_rmaps(kvm)) {
                write_lock(&kvm->mmu_lock);
                /*
@@ -5863,17 +5889,14 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
                 * logging at a 4k granularity and never creates collapsible
                 * 2m SPTEs during dirty logging.
                 */
-               flush = slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
-               if (flush)
+               if (slot_handle_level_4k(kvm, slot, kvm_mmu_zap_collapsible_spte, true))
                        kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
                write_unlock(&kvm->mmu_lock);
        }
 
        if (is_tdp_mmu_enabled(kvm)) {
                read_lock(&kvm->mmu_lock);
-               flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
-               if (flush)
-                       kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+               kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
                read_unlock(&kvm->mmu_lock);
        }
 }
@@ -6142,30 +6165,6 @@ out:
        return ret;
 }
 
-/*
- * Calculate mmu pages needed for kvm.
- */
-unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
-{
-       unsigned long nr_mmu_pages;
-       unsigned long nr_pages = 0;
-       struct kvm_memslots *slots;
-       struct kvm_memory_slot *memslot;
-       int i;
-
-       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
-               slots = __kvm_memslots(kvm, i);
-
-               kvm_for_each_memslot(memslot, slots)
-                       nr_pages += memslot->npages;
-       }
-
-       nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
-       nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
-
-       return nr_mmu_pages;
-}
-
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
 {
        kvm_mmu_unload(vcpu);
@@ -6182,23 +6181,46 @@ void kvm_mmu_module_exit(void)
        mmu_audit_disable();
 }
 
+/*
+ * Calculate the effective recovery period, accounting for '0' meaning "let KVM
+ * select a halving time of 1 hour".  Returns true if recovery is enabled.
+ */
+static bool calc_nx_huge_pages_recovery_period(uint *period)
+{
+       /*
+        * Use READ_ONCE to get the params, this may be called outside of the
+        * param setters, e.g. by the kthread to compute its next timeout.
+        */
+       bool enabled = READ_ONCE(nx_huge_pages);
+       uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+
+       if (!enabled || !ratio)
+               return false;
+
+       *period = READ_ONCE(nx_huge_pages_recovery_period_ms);
+       if (!*period) {
+               /* Make sure the period is not less than one second.  */
+               ratio = min(ratio, 3600u);
+               *period = 60 * 60 * 1000 / ratio;
+       }
+       return true;
+}
+
 static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp)
 {
        bool was_recovery_enabled, is_recovery_enabled;
        uint old_period, new_period;
        int err;
 
-       was_recovery_enabled = nx_huge_pages_recovery_ratio;
-       old_period = nx_huge_pages_recovery_period_ms;
+       was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
 
        err = param_set_uint(val, kp);
        if (err)
                return err;
 
-       is_recovery_enabled = nx_huge_pages_recovery_ratio;
-       new_period = nx_huge_pages_recovery_period_ms;
+       is_recovery_enabled = calc_nx_huge_pages_recovery_period(&new_period);
 
-       if (READ_ONCE(nx_huge_pages) && is_recovery_enabled &&
+       if (is_recovery_enabled &&
            (!was_recovery_enabled || old_period > new_period)) {
                struct kvm *kvm;
 
@@ -6262,18 +6284,13 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
 
 static long get_nx_lpage_recovery_timeout(u64 start_time)
 {
-       uint ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
-       uint period = READ_ONCE(nx_huge_pages_recovery_period_ms);
+       bool enabled;
+       uint period;
 
-       if (!period && ratio) {
-               /* Make sure the period is not less than one second.  */
-               ratio = min(ratio, 3600u);
-               period = 60 * 60 * 1000 / ratio;
-       }
+       enabled = calc_nx_huge_pages_recovery_period(&period);
 
-       return READ_ONCE(nx_huge_pages) && ratio
-               ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
-               : MAX_SCHEDULE_TIMEOUT;
+       return enabled ? start_time + msecs_to_jiffies(period) - get_jiffies_64()
+                      : MAX_SCHEDULE_TIMEOUT;
 }
 
 static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
index 52c6527..da6166b 100644 (file)
@@ -104,7 +104,7 @@ static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
        return kvm_mmu_role_as_id(sp->role);
 }
 
-static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
+static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
 {
        /*
         * When using the EPT page-modification log, the GPAs in the CPU dirty
@@ -112,13 +112,12 @@ static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
         * on write protection to record dirty pages, which bypasses PML, since
         * writes now result in a vmexit.  Note, the check on CPU dirty logging
         * being enabled is mandatory as the bits used to denote WP-only SPTEs
-        * are reserved for NPT w/ PAE (32-bit KVM).
+        * are reserved for PAE paging (32-bit KVM).
         */
-       return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
-              kvm_x86_ops.cpu_dirty_log_size;
+       return kvm_x86_ops.cpu_dirty_log_size && sp->role.guest_mode;
 }
 
-int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
+int mmu_try_to_unsync_pages(struct kvm *kvm, const struct kvm_memory_slot *slot,
                            gfn_t gfn, bool can_unsync, bool prefetch);
 
 void kvm_mmu_gfn_disallow_lpage(const struct kvm_memory_slot *slot, gfn_t gfn);
index b8151bb..de5e8e4 100644 (file)
@@ -35,7 +35,7 @@
                         " %snxe %sad root %u %s%c",                    \
                         __entry->mmu_valid_gen,                        \
                         __entry->gfn, role.level,                      \
-                        role.gpte_is_8_bytes ? 8 : 4,                  \
+                        role.has_4_byte_gpte ? 4 : 8,                  \
                         role.quadrant,                                 \
                         role.direct ? " direct" : "",                  \
                         access_str[role.access],                       \
index cc4eb5b..68eb1fb 100644 (file)
@@ -173,9 +173,9 @@ EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
 /*
  * check if the corresponding access on the specified guest page is tracked.
  */
-bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
-                                  struct kvm_memory_slot *slot, gfn_t gfn,
-                                  enum kvm_page_track_mode mode)
+bool kvm_slot_page_track_is_active(struct kvm *kvm,
+                                  const struct kvm_memory_slot *slot,
+                                  gfn_t gfn, enum kvm_page_track_mode mode)
 {
        int index;
 
@@ -186,7 +186,7 @@ bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu,
                return false;
 
        if (mode == KVM_PAGE_TRACK_WRITE &&
-           !kvm_page_track_write_tracking_enabled(vcpu->kvm))
+           !kvm_page_track_write_tracking_enabled(kvm))
                return false;
 
        index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
index f87d368..5b5bdac 100644 (file)
@@ -403,9 +403,8 @@ retry_walk:
                walker->table_gfn[walker->level - 1] = table_gfn;
                walker->pte_gpa[walker->level - 1] = pte_gpa;
 
-               real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
-                                             nested_access,
-                                             &walker->fault);
+               real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(table_gfn),
+                                            nested_access, &walker->fault);
 
                /*
                 * FIXME: This can happen if emulation (for of an INS/OUTS
@@ -467,7 +466,7 @@ retry_walk:
        if (PTTYPE == 32 && walker->level > PG_LEVEL_4K && is_cpuid_PSE36())
                gfn += pse36_gfn_delta(pte);
 
-       real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault);
+       real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(gfn), access, &walker->fault);
        if (real_gpa == UNMAPPED_GVA)
                return 0;
 
@@ -547,16 +546,6 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
                                        access);
 }
 
-#if PTTYPE != PTTYPE_EPT
-static int FNAME(walk_addr_nested)(struct guest_walker *walker,
-                                  struct kvm_vcpu *vcpu, gva_t addr,
-                                  u32 access)
-{
-       return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu,
-                                       addr, access);
-}
-#endif
-
 static bool
 FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                     u64 *spte, pt_element_t gpte, bool no_dirty_log)
@@ -911,7 +900,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 
        r = RET_PF_RETRY;
        write_lock(&vcpu->kvm->mmu_lock);
-       if (fault->slot && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
+
+       if (is_page_fault_stale(vcpu, fault, mmu_seq))
                goto out_unlock;
 
        kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
@@ -999,50 +989,29 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa)
 }
 
 /* Note, @addr is a GPA when gva_to_gpa() translates an L2 GPA to an L1 GPA. */
-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t addr, u32 access,
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+                              gpa_t addr, u32 access,
                               struct x86_exception *exception)
 {
        struct guest_walker walker;
        gpa_t gpa = UNMAPPED_GVA;
        int r;
 
-       r = FNAME(walk_addr)(&walker, vcpu, addr, access);
-
-       if (r) {
-               gpa = gfn_to_gpa(walker.gfn);
-               gpa |= addr & ~PAGE_MASK;
-       } else if (exception)
-               *exception = walker.fault;
-
-       return gpa;
-}
-
-#if PTTYPE != PTTYPE_EPT
-/* Note, gva_to_gpa_nested() is only used to translate L2 GVAs. */
-static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
-                                     u32 access,
-                                     struct x86_exception *exception)
-{
-       struct guest_walker walker;
-       gpa_t gpa = UNMAPPED_GVA;
-       int r;
-
 #ifndef CONFIG_X86_64
        /* A 64-bit GVA should be impossible on 32-bit KVM. */
-       WARN_ON_ONCE(vaddr >> 32);
+       WARN_ON_ONCE((addr >> 32) && mmu == vcpu->arch.walk_mmu);
 #endif
 
-       r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access);
+       r = FNAME(walk_addr_generic)(&walker, vcpu, mmu, addr, access);
 
        if (r) {
                gpa = gfn_to_gpa(walker.gfn);
-               gpa |= vaddr & ~PAGE_MASK;
+               gpa |= addr & ~PAGE_MASK;
        } else if (exception)
                *exception = walker.fault;
 
        return gpa;
 }
-#endif
 
 /*
  * Using the cached information from sp->gfns is safe because:
index 0c76c45..8a7b032 100644 (file)
@@ -90,7 +90,7 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
 }
 
 bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-              struct kvm_memory_slot *slot,
+              const struct kvm_memory_slot *slot,
               unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
               u64 old_spte, bool prefetch, bool can_unsync,
               bool host_writable, u64 *new_spte)
@@ -101,7 +101,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 
        if (sp->role.ad_disabled)
                spte |= SPTE_TDP_AD_DISABLED_MASK;
-       else if (kvm_vcpu_ad_need_write_protect(vcpu))
+       else if (kvm_mmu_page_ad_need_write_protect(sp))
                spte |= SPTE_TDP_AD_WRPROT_ONLY_MASK;
 
        /*
@@ -161,7 +161,7 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
                 * e.g. it's write-tracked (upper-level SPs) or has one or more
                 * shadow pages and unsync'ing pages is not allowed.
                 */
-               if (mmu_try_to_unsync_pages(vcpu, slot, gfn, can_unsync, prefetch)) {
+               if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) {
                        pgprintk("%s: found shadow page for %llx, marking ro\n",
                                 __func__, gfn);
                        wrprot = true;
index cc432f9..a4af2a4 100644 (file)
@@ -330,7 +330,7 @@ static inline u64 get_mmio_spte_generation(u64 spte)
 }
 
 bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-              struct kvm_memory_slot *slot,
+              const struct kvm_memory_slot *slot,
               unsigned int pte_access, gfn_t gfn, kvm_pfn_t pfn,
               u64 old_spte, bool prefetch, bool can_unsync,
               bool host_writable, u64 *new_spte);
index b3ed302..caa96c2 100644 (file)
@@ -26,6 +26,7 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
  */
 void tdp_iter_restart(struct tdp_iter *iter)
 {
+       iter->yielded = false;
        iter->yielded_gfn = iter->next_last_level_gfn;
        iter->level = iter->root_level;
 
@@ -160,6 +161,11 @@ static bool try_step_up(struct tdp_iter *iter)
  */
 void tdp_iter_next(struct tdp_iter *iter)
 {
+       if (iter->yielded) {
+               tdp_iter_restart(iter);
+               return;
+       }
+
        if (try_step_down(iter))
                return;
 
index b1748b9..e19cabb 100644 (file)
@@ -45,6 +45,12 @@ struct tdp_iter {
         * iterator walks off the end of the paging structure.
         */
        bool valid;
+       /*
+        * True if KVM dropped mmu_lock and yielded in the middle of a walk, in
+        * which case tdp_iter_next() needs to restart the walk at the root
+        * level instead of advancing to the next entry.
+        */
+       bool yielded;
 };
 
 /*
index a54c349..7b1bc81 100644 (file)
@@ -165,7 +165,7 @@ static union kvm_mmu_page_role page_role_for_level(struct kvm_vcpu *vcpu,
        role = vcpu->arch.mmu->mmu_role.base;
        role.level = level;
        role.direct = true;
-       role.gpte_is_8_bytes = true;
+       role.has_4_byte_gpte = false;
        role.access = ACC_ALL;
        role.ad_disabled = !shadow_accessed_mask;
 
@@ -317,9 +317,6 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
        struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
        int level = sp->role.level;
        gfn_t base_gfn = sp->gfn;
-       u64 old_child_spte;
-       u64 *sptep;
-       gfn_t gfn;
        int i;
 
        trace_kvm_mmu_prepare_zap_page(sp);
@@ -327,8 +324,9 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
        tdp_mmu_unlink_page(kvm, sp, shared);
 
        for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
-               sptep = rcu_dereference(pt) + i;
-               gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
+               u64 *sptep = rcu_dereference(pt) + i;
+               gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
+               u64 old_child_spte;
 
                if (shared) {
                        /*
@@ -374,7 +372,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
                                    shared);
        }
 
-       kvm_flush_remote_tlbs_with_address(kvm, gfn,
+       kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
                                           KVM_PAGES_PER_HPAGE(level + 1));
 
        call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
@@ -504,6 +502,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
                                           struct tdp_iter *iter,
                                           u64 new_spte)
 {
+       WARN_ON_ONCE(iter->yielded);
+
        lockdep_assert_held_read(&kvm->mmu_lock);
 
        /*
@@ -577,6 +577,8 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
                                      u64 new_spte, bool record_acc_track,
                                      bool record_dirty_log)
 {
+       WARN_ON_ONCE(iter->yielded);
+
        lockdep_assert_held_write(&kvm->mmu_lock);
 
        /*
@@ -642,18 +644,19 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
  * If this function should yield and flush is set, it will perform a remote
  * TLB flush before yielding.
  *
- * If this function yields, it will also reset the tdp_iter's walk over the
- * paging structure and the calling function should skip to the next
- * iteration to allow the iterator to continue its traversal from the
- * paging structure root.
+ * If this function yields, iter->yielded is set and the caller must skip to
+ * the next iteration, where tdp_iter_next() will reset the tdp_iter's walk
+ * over the paging structures to allow the iterator to continue its traversal
+ * from the paging structure root.
  *
- * Return true if this function yielded and the iterator's traversal was reset.
- * Return false if a yield was not needed.
+ * Returns true if this function yielded.
  */
-static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
-                                            struct tdp_iter *iter, bool flush,
-                                            bool shared)
+static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
+                                                         struct tdp_iter *iter,
+                                                         bool flush, bool shared)
 {
+       WARN_ON(iter->yielded);
+
        /* Ensure forward progress has been made before yielding. */
        if (iter->next_last_level_gfn == iter->yielded_gfn)
                return false;
@@ -673,12 +676,10 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
 
                WARN_ON(iter->gfn > iter->next_last_level_gfn);
 
-               tdp_iter_restart(iter);
-
-               return true;
+               iter->yielded = true;
        }
 
-       return false;
+       return iter->yielded;
 }
 
 /*
@@ -1033,9 +1034,9 @@ bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
 {
        struct kvm_mmu_page *root;
 
-       for_each_tdp_mmu_root(kvm, root, range->slot->as_id)
-               flush |= zap_gfn_range(kvm, root, range->start, range->end,
-                                      range->may_block, flush, false);
+       for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false)
+               flush = zap_gfn_range(kvm, root, range->start, range->end,
+                                     range->may_block, flush, false);
 
        return flush;
 }
@@ -1364,10 +1365,9 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
  * Clear leaf entries which could be replaced by large mappings, for
  * GFNs within the slot.
  */
-static bool zap_collapsible_spte_range(struct kvm *kvm,
+static void zap_collapsible_spte_range(struct kvm *kvm,
                                       struct kvm_mmu_page *root,
-                                      const struct kvm_memory_slot *slot,
-                                      bool flush)
+                                      const struct kvm_memory_slot *slot)
 {
        gfn_t start = slot->base_gfn;
        gfn_t end = start + slot->npages;
@@ -1378,10 +1378,8 @@ static bool zap_collapsible_spte_range(struct kvm *kvm,
 
        tdp_root_for_each_pte(iter, root, start, end) {
 retry:
-               if (tdp_mmu_iter_cond_resched(kvm, &iter, flush, true)) {
-                       flush = false;
+               if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
                        continue;
-               }
 
                if (!is_shadow_present_pte(iter.old_spte) ||
                    !is_last_spte(iter.old_spte, iter.level))
@@ -1393,6 +1391,7 @@ retry:
                                                            pfn, PG_LEVEL_NUM))
                        continue;
 
+               /* Note, a successful atomic zap also does a remote TLB flush. */
                if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
                        /*
                         * The iter must explicitly re-read the SPTE because
@@ -1401,30 +1400,24 @@ retry:
                        iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
                        goto retry;
                }
-               flush = true;
        }
 
        rcu_read_unlock();
-
-       return flush;
 }
 
 /*
  * Clear non-leaf entries (and free associated page tables) which could
  * be replaced by large mappings, for GFNs within the slot.
  */
-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
-                                      const struct kvm_memory_slot *slot,
-                                      bool flush)
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                                      const struct kvm_memory_slot *slot)
 {
        struct kvm_mmu_page *root;
 
        lockdep_assert_held_read(&kvm->mmu_lock);
 
        for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
-               flush = zap_collapsible_spte_range(kvm, root, slot, flush);
-
-       return flush;
+               zap_collapsible_spte_range(kvm, root, slot);
 }
 
 /*
index 476b133..3899004 100644 (file)
@@ -64,9 +64,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
                                       struct kvm_memory_slot *slot,
                                       gfn_t gfn, unsigned long mask,
                                       bool wrprot);
-bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
-                                      const struct kvm_memory_slot *slot,
-                                      bool flush);
+void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                                      const struct kvm_memory_slot *slot);
 
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
                                   struct kvm_memory_slot *slot, gfn_t gfn,
index affc0ea..0e5b492 100644 (file)
@@ -293,7 +293,7 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
                                   u32 icrl, u32 icrh)
 {
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                bool m = kvm_apic_match_dest(vcpu, source,
@@ -675,10 +675,18 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
        smp_mb__after_atomic();
 
        if (avic_vcpu_is_running(vcpu)) {
-               int cpuid = vcpu->cpu;
+               int cpu = READ_ONCE(vcpu->cpu);
 
-               if (cpuid != get_cpu())
-                       wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
+               /*
+                * Note, the vCPU could get migrated to a different pCPU at any
+                * point, which could result in signalling the wrong/previous
+                * pCPU.  But if that happens the vCPU is guaranteed to do a
+                * VMRUN (after being migrated) and thus will process pending
+                * interrupts, i.e. a doorbell is not needed (and the spurious
+                * one is harmless).
+                */
+               if (cpu != get_cpu())
+                       wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
                put_cpu();
        } else
                kvm_vcpu_wake_up(vcpu);
@@ -900,6 +908,7 @@ out:
 bool svm_check_apicv_inhibit_reasons(ulong bit)
 {
        ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
+                         BIT(APICV_INHIBIT_REASON_ABSENT) |
                          BIT(APICV_INHIBIT_REASON_HYPERV) |
                          BIT(APICV_INHIBIT_REASON_NESTED) |
                          BIT(APICV_INHIBIT_REASON_IRQWIN) |
@@ -989,16 +998,18 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
 static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
+       int cpu = get_cpu();
 
+       WARN_ON(cpu != vcpu->cpu);
        svm->avic_is_running = is_run;
 
-       if (!kvm_vcpu_apicv_active(vcpu))
-               return;
-
-       if (is_run)
-               avic_vcpu_load(vcpu, vcpu->cpu);
-       else
-               avic_vcpu_put(vcpu);
+       if (kvm_vcpu_apicv_active(vcpu)) {
+               if (is_run)
+                       avic_vcpu_load(vcpu, cpu);
+               else
+                       avic_vcpu_put(vcpu);
+       }
+       put_cpu();
 }
 
 void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
index f8b7bc0..cf20685 100644 (file)
@@ -58,8 +58,9 @@ static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_excep
        struct vcpu_svm *svm = to_svm(vcpu);
        WARN_ON(!is_guest_mode(vcpu));
 
-       if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
-          !svm->nested.nested_run_pending) {
+       if (vmcb12_is_intercept(&svm->nested.ctl,
+                               INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
+           !svm->nested.nested_run_pending) {
                svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
                svm->vmcb->control.exit_code_hi = 0;
                svm->vmcb->control.exit_info_1 = fault->error_code;
@@ -121,7 +122,8 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
 
 void recalc_intercepts(struct vcpu_svm *svm)
 {
-       struct vmcb_control_area *c, *h, *g;
+       struct vmcb_control_area *c, *h;
+       struct vmcb_ctrl_area_cached *g;
        unsigned int i;
 
        vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
@@ -163,37 +165,6 @@ void recalc_intercepts(struct vcpu_svm *svm)
        vmcb_set_intercept(c, INTERCEPT_VMSAVE);
 }
 
-static void copy_vmcb_control_area(struct vmcb_control_area *dst,
-                                  struct vmcb_control_area *from)
-{
-       unsigned int i;
-
-       for (i = 0; i < MAX_INTERCEPT; i++)
-               dst->intercepts[i] = from->intercepts[i];
-
-       dst->iopm_base_pa         = from->iopm_base_pa;
-       dst->msrpm_base_pa        = from->msrpm_base_pa;
-       dst->tsc_offset           = from->tsc_offset;
-       /* asid not copied, it is handled manually for svm->vmcb.  */
-       dst->tlb_ctl              = from->tlb_ctl;
-       dst->int_ctl              = from->int_ctl;
-       dst->int_vector           = from->int_vector;
-       dst->int_state            = from->int_state;
-       dst->exit_code            = from->exit_code;
-       dst->exit_code_hi         = from->exit_code_hi;
-       dst->exit_info_1          = from->exit_info_1;
-       dst->exit_info_2          = from->exit_info_2;
-       dst->exit_int_info        = from->exit_int_info;
-       dst->exit_int_info_err    = from->exit_int_info_err;
-       dst->nested_ctl           = from->nested_ctl;
-       dst->event_inj            = from->event_inj;
-       dst->event_inj_err        = from->event_inj_err;
-       dst->nested_cr3           = from->nested_cr3;
-       dst->virt_ext              = from->virt_ext;
-       dst->pause_filter_count   = from->pause_filter_count;
-       dst->pause_filter_thresh  = from->pause_filter_thresh;
-}
-
 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 {
        /*
@@ -203,7 +174,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
         */
        int i;
 
-       if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
+       if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
                return true;
 
        for (i = 0; i < MSRPM_OFFSETS; i++) {
@@ -250,10 +221,10 @@ static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl)
        }
 }
 
-static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
-                                      struct vmcb_control_area *control)
+static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
+                                        struct vmcb_ctrl_area_cached *control)
 {
-       if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN)))
+       if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
                return false;
 
        if (CC(control->asid == 0))
@@ -275,9 +246,20 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
        return true;
 }
 
-static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
-                                     struct vmcb_save_area *save)
+/* Common checks that apply to both L1 and L2 state.  */
+static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
+                                    struct vmcb_save_area_cached *save)
 {
+       if (CC(!(save->efer & EFER_SVME)))
+               return false;
+
+       if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
+           CC(save->cr0 & ~0xffffffffULL))
+               return false;
+
+       if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
+               return false;
+
        /*
         * These checks are also performed by KVM_SET_SREGS,
         * except that EFER.LMA is not checked by SVM against
@@ -293,48 +275,90 @@ static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
        if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
                return false;
 
+       if (CC(!kvm_valid_efer(vcpu, save->efer)))
+               return false;
+
        return true;
 }
 
-/* Common checks that apply to both L1 and L2 state.  */
-static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
-                                   struct vmcb_save_area *save)
+static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu)
 {
-       /*
-        * FIXME: these should be done after copying the fields,
-        * to avoid TOC/TOU races.  For these save area checks
-        * the possible damage is limited since kvm_set_cr0 and
-        * kvm_set_cr4 handle failure; EFER_SVME is an exception
-        * so it is force-set later in nested_prepare_vmcb_save.
-        */
-       if (CC(!(save->efer & EFER_SVME)))
-               return false;
+       struct vcpu_svm *svm = to_svm(vcpu);
+       struct vmcb_save_area_cached *save = &svm->nested.save;
 
-       if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
-           CC(save->cr0 & ~0xffffffffULL))
-               return false;
+       return __nested_vmcb_check_save(vcpu, save);
+}
 
-       if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
-               return false;
+static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl;
 
-       if (!nested_vmcb_check_cr3_cr4(vcpu, save))
-               return false;
+       return __nested_vmcb_check_controls(vcpu, ctl);
+}
 
-       if (CC(!kvm_valid_efer(vcpu, save->efer)))
-               return false;
+static
+void __nested_copy_vmcb_control_to_cache(struct vmcb_ctrl_area_cached *to,
+                                        struct vmcb_control_area *from)
+{
+       unsigned int i;
 
-       return true;
+       for (i = 0; i < MAX_INTERCEPT; i++)
+               to->intercepts[i] = from->intercepts[i];
+
+       to->iopm_base_pa        = from->iopm_base_pa;
+       to->msrpm_base_pa       = from->msrpm_base_pa;
+       to->tsc_offset          = from->tsc_offset;
+       to->tlb_ctl             = from->tlb_ctl;
+       to->int_ctl             = from->int_ctl;
+       to->int_vector          = from->int_vector;
+       to->int_state           = from->int_state;
+       to->exit_code           = from->exit_code;
+       to->exit_code_hi        = from->exit_code_hi;
+       to->exit_info_1         = from->exit_info_1;
+       to->exit_info_2         = from->exit_info_2;
+       to->exit_int_info       = from->exit_int_info;
+       to->exit_int_info_err   = from->exit_int_info_err;
+       to->nested_ctl          = from->nested_ctl;
+       to->event_inj           = from->event_inj;
+       to->event_inj_err       = from->event_inj_err;
+       to->nested_cr3          = from->nested_cr3;
+       to->virt_ext            = from->virt_ext;
+       to->pause_filter_count  = from->pause_filter_count;
+       to->pause_filter_thresh = from->pause_filter_thresh;
+
+       /* Copy asid here because nested_vmcb_check_controls will check it.  */
+       to->asid           = from->asid;
+       to->msrpm_base_pa &= ~0x0fffULL;
+       to->iopm_base_pa  &= ~0x0fffULL;
 }
 
-void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
-                                    struct vmcb_control_area *control)
+void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
+                                      struct vmcb_control_area *control)
 {
-       copy_vmcb_control_area(&svm->nested.ctl, control);
+       __nested_copy_vmcb_control_to_cache(&svm->nested.ctl, control);
+}
+
+static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to,
+                                            struct vmcb_save_area *from)
+{
+       /*
+        * Copy only fields that are validated, as we need them
+        * to avoid TOC/TOU races.
+        */
+       to->efer = from->efer;
+       to->cr0 = from->cr0;
+       to->cr3 = from->cr3;
+       to->cr4 = from->cr4;
 
-       /* Copy it here because nested_svm_check_controls will check it.  */
-       svm->nested.ctl.asid           = control->asid;
-       svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL;
-       svm->nested.ctl.iopm_base_pa  &= ~0x0fffULL;
+       to->dr6 = from->dr6;
+       to->dr7 = from->dr7;
+}
+
+void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
+                                   struct vmcb_save_area *save)
+{
+       __nested_copy_vmcb_save_to_cache(&svm->nested.save, save);
 }
 
 /*
@@ -437,14 +461,13 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
                return -EINVAL;
 
        if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
-           CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
+           CC(!load_pdptrs(vcpu, cr3)))
                return -EINVAL;
 
        if (!nested_npt)
                kvm_mmu_new_pgd(vcpu, cr3);
 
        vcpu->arch.cr3 = cr3;
-       kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
        /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
        kvm_init_mmu(vcpu);
@@ -490,15 +513,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 
        kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
 
-       /*
-        * Force-set EFER_SVME even though it is checked earlier on the
-        * VMCB12, because the guest can flip the bit between the check
-        * and now.  Clearing EFER_SVME would call svm_free_nested.
-        */
-       svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME);
+       svm_set_efer(&svm->vcpu, svm->nested.save.efer);
 
-       svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
-       svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
+       svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
+       svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
 
        svm->vcpu.arch.cr2 = vmcb12->save.cr2;
 
@@ -513,8 +531,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 
        /* These bits will be set properly on the first execution when new_vmc12 is true */
        if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
-               svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
-               svm->vcpu.arch.dr6  = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
+               svm->vmcb->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
+               svm->vcpu.arch.dr6  = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
                vmcb_mark_dirty(svm->vmcb, VMCB_DR);
        }
 }
@@ -628,7 +646,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
        nested_vmcb02_prepare_control(svm);
        nested_vmcb02_prepare_save(svm, vmcb12);
 
-       ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
+       ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
                                  nested_npt_enabled(svm), from_vmrun);
        if (ret)
                return ret;
@@ -678,10 +696,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
        if (WARN_ON_ONCE(!svm->nested.initialized))
                return -EINVAL;
 
-       nested_load_control_from_vmcb12(svm, &vmcb12->control);
+       nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
+       nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
 
-       if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
-           !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
+       if (!nested_vmcb_check_save(vcpu) ||
+           !nested_vmcb_check_controls(vcpu)) {
                vmcb12->control.exit_code    = SVM_EXIT_ERR;
                vmcb12->control.exit_code_hi = 0;
                vmcb12->control.exit_info_1  = 0;
@@ -988,7 +1007,7 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
        u32 offset, msr, value;
        int write, mask;
 
-       if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
+       if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
                return NESTED_EXIT_HOST;
 
        msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
@@ -1015,7 +1034,7 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
        u8 start_bit;
        u64 gpa;
 
-       if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
+       if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
                return NESTED_EXIT_HOST;
 
        port = svm->vmcb->control.exit_info_1 >> 16;
@@ -1046,12 +1065,12 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
                vmexit = nested_svm_intercept_ioio(svm);
                break;
        case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
-               if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
+               if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
                        vmexit = NESTED_EXIT_DONE;
                break;
        }
        case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
-               if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
+               if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
                        vmexit = NESTED_EXIT_DONE;
                break;
        }
@@ -1069,7 +1088,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
                break;
        }
        default: {
-               if (vmcb_is_intercept(&svm->nested.ctl, exit_code))
+               if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
                        vmexit = NESTED_EXIT_DONE;
        }
        }
@@ -1147,7 +1166,7 @@ static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm)
 
 static inline bool nested_exit_on_init(struct vcpu_svm *svm)
 {
-       return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
+       return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
 }
 
 static int svm_check_nested_events(struct kvm_vcpu *vcpu)
@@ -1251,11 +1270,47 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
        svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
 }
 
+/* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
+static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
+                                             struct vmcb_ctrl_area_cached *from)
+{
+       unsigned int i;
+
+       memset(dst, 0, sizeof(struct vmcb_control_area));
+
+       for (i = 0; i < MAX_INTERCEPT; i++)
+               dst->intercepts[i] = from->intercepts[i];
+
+       dst->iopm_base_pa         = from->iopm_base_pa;
+       dst->msrpm_base_pa        = from->msrpm_base_pa;
+       dst->tsc_offset           = from->tsc_offset;
+       dst->asid                 = from->asid;
+       dst->tlb_ctl              = from->tlb_ctl;
+       dst->int_ctl              = from->int_ctl;
+       dst->int_vector           = from->int_vector;
+       dst->int_state            = from->int_state;
+       dst->exit_code            = from->exit_code;
+       dst->exit_code_hi         = from->exit_code_hi;
+       dst->exit_info_1          = from->exit_info_1;
+       dst->exit_info_2          = from->exit_info_2;
+       dst->exit_int_info        = from->exit_int_info;
+       dst->exit_int_info_err    = from->exit_int_info_err;
+       dst->nested_ctl           = from->nested_ctl;
+       dst->event_inj            = from->event_inj;
+       dst->event_inj_err        = from->event_inj_err;
+       dst->nested_cr3           = from->nested_cr3;
+       dst->virt_ext              = from->virt_ext;
+       dst->pause_filter_count   = from->pause_filter_count;
+       dst->pause_filter_thresh  = from->pause_filter_thresh;
+}
+
 static int svm_get_nested_state(struct kvm_vcpu *vcpu,
                                struct kvm_nested_state __user *user_kvm_nested_state,
                                u32 user_data_size)
 {
        struct vcpu_svm *svm;
+       struct vmcb_control_area *ctl;
+       unsigned long r;
        struct kvm_nested_state kvm_state = {
                .flags = 0,
                .format = KVM_STATE_NESTED_FORMAT_SVM,
@@ -1297,9 +1352,18 @@ static int svm_get_nested_state(struct kvm_vcpu *vcpu,
         */
        if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
                return -EFAULT;
-       if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
-                        sizeof(user_vmcb->control)))
+
+       ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
+       if (!ctl)
+               return -ENOMEM;
+
+       nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl);
+       r = copy_to_user(&user_vmcb->control, ctl,
+                        sizeof(user_vmcb->control));
+       kfree(ctl);
+       if (r)
                return -EFAULT;
+
        if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
                         sizeof(user_vmcb->save)))
                return -EFAULT;
@@ -1316,6 +1380,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
                &user_kvm_nested_state->data.svm[0];
        struct vmcb_control_area *ctl;
        struct vmcb_save_area *save;
+       struct vmcb_save_area_cached save_cached;
+       struct vmcb_ctrl_area_cached ctl_cached;
        unsigned long cr0;
        int ret;
 
@@ -1368,7 +1434,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
                goto out_free;
 
        ret = -EINVAL;
-       if (!nested_vmcb_check_controls(vcpu, ctl))
+       __nested_copy_vmcb_control_to_cache(&ctl_cached, ctl);
+       if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))
                goto out_free;
 
        /*
@@ -1383,10 +1450,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
         * Validate host state saved from before VMRUN (see
         * nested_svm_check_permissions).
         */
+       __nested_copy_vmcb_save_to_cache(&save_cached, save);
        if (!(save->cr0 & X86_CR0_PG) ||
            !(save->cr0 & X86_CR0_PE) ||
            (save->rflags & X86_EFLAGS_VM) ||
-           !nested_vmcb_valid_sregs(vcpu, save))
+           !__nested_vmcb_check_save(vcpu, &save_cached))
                goto out_free;
 
        /*
@@ -1422,7 +1490,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
        svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
 
        svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
-       nested_load_control_from_vmcb12(svm, ctl);
+       nested_copy_vmcb_control_to_cache(svm, ctl);
 
        svm_switch_vmcb(svm, &svm->nested.vmcb02);
        nested_vmcb02_prepare_control(svm);
@@ -1449,7 +1517,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
                 * the guest CR3 might be restored prior to setting the nested
                 * state which can lead to a load of wrong PDPTRs.
                 */
-               if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
+               if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
                        return false;
 
        if (!nested_svm_vmrun_msrpm(svm)) {
index 871c426..0cf05e4 100644 (file)
@@ -16,6 +16,7 @@
 #include "cpuid.h"
 #include "lapic.h"
 #include "pmu.h"
+#include "svm.h"
 
 enum pmu_type {
        PMU_TYPE_COUNTER = 0,
@@ -100,6 +101,9 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
 {
        struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
 
+       if (!pmu)
+               return NULL;
+
        switch (msr) {
        case MSR_F15H_PERF_CTL0:
        case MSR_F15H_PERF_CTL1:
@@ -281,7 +285,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
                pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
 
        pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
-       pmu->reserved_bits = 0xffffffff00200000ull;
+       pmu->reserved_bits = 0xfffffff000280000ull;
        pmu->version = 1;
        /* not applicable to AMD; but clean them to prevent any fall out */
        pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
index 21ac0a5..3225533 100644 (file)
@@ -636,7 +636,8 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
 static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
        struct kvm_vcpu *vcpu;
-       int i, ret;
+       unsigned long i;
+       int ret;
 
        if (!sev_es_guest(kvm))
                return -ENOTTY;
@@ -1543,35 +1544,57 @@ static bool is_cmd_allowed_from_mirror(u32 cmd_id)
        return false;
 }
 
-static int sev_lock_for_migration(struct kvm *kvm)
+static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
 {
-       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+       struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
+       struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
+       int r = -EBUSY;
+
+       if (dst_kvm == src_kvm)
+               return -EINVAL;
 
        /*
-        * Bail if this VM is already involved in a migration to avoid deadlock
-        * between two VMs trying to migrate to/from each other.
+        * Bail if these VMs are already involved in a migration to avoid
+        * deadlock between two VMs trying to migrate to/from each other.
         */
-       if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1))
+       if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1))
                return -EBUSY;
 
-       mutex_lock(&kvm->lock);
+       if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1))
+               goto release_dst;
 
+       r = -EINTR;
+       if (mutex_lock_killable(&dst_kvm->lock))
+               goto release_src;
+       if (mutex_lock_killable(&src_kvm->lock))
+               goto unlock_dst;
        return 0;
+
+unlock_dst:
+       mutex_unlock(&dst_kvm->lock);
+release_src:
+       atomic_set_release(&src_sev->migration_in_progress, 0);
+release_dst:
+       atomic_set_release(&dst_sev->migration_in_progress, 0);
+       return r;
 }
 
-static void sev_unlock_after_migration(struct kvm *kvm)
+static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
 {
-       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+       struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
+       struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
 
-       mutex_unlock(&kvm->lock);
-       atomic_set_release(&sev->migration_in_progress, 0);
+       mutex_unlock(&dst_kvm->lock);
+       mutex_unlock(&src_kvm->lock);
+       atomic_set_release(&dst_sev->migration_in_progress, 0);
+       atomic_set_release(&src_sev->migration_in_progress, 0);
 }
 
 
 static int sev_lock_vcpus_for_migration(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
-       int i, j;
+       unsigned long i, j;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                if (mutex_lock_killable(&vcpu->mutex))
@@ -1593,7 +1616,7 @@ out_unlock:
 static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
                mutex_unlock(&vcpu->mutex);
@@ -1607,19 +1630,20 @@ static void sev_migrate_from(struct kvm_sev_info *dst,
        dst->asid = src->asid;
        dst->handle = src->handle;
        dst->pages_locked = src->pages_locked;
+       dst->enc_context_owner = src->enc_context_owner;
 
        src->asid = 0;
        src->active = false;
        src->handle = 0;
        src->pages_locked = 0;
+       src->enc_context_owner = NULL;
 
-       INIT_LIST_HEAD(&dst->regions_list);
-       list_replace_init(&src->regions_list, &dst->regions_list);
+       list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
 }
 
 static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *dst_vcpu, *src_vcpu;
        struct vcpu_svm *dst_svm, *src_svm;
 
@@ -1666,15 +1690,6 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
        bool charged = false;
        int ret;
 
-       ret = sev_lock_for_migration(kvm);
-       if (ret)
-               return ret;
-
-       if (sev_guest(kvm)) {
-               ret = -EINVAL;
-               goto out_unlock;
-       }
-
        source_kvm_file = fget(source_fd);
        if (!file_is_kvm(source_kvm_file)) {
                ret = -EBADF;
@@ -1682,16 +1697,26 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
        }
 
        source_kvm = source_kvm_file->private_data;
-       ret = sev_lock_for_migration(source_kvm);
+       ret = sev_lock_two_vms(kvm, source_kvm);
        if (ret)
                goto out_fput;
 
-       if (!sev_guest(source_kvm)) {
+       if (sev_guest(kvm) || !sev_guest(source_kvm)) {
                ret = -EINVAL;
-               goto out_source;
+               goto out_unlock;
        }
 
        src_sev = &to_kvm_svm(source_kvm)->sev_info;
+
+       /*
+        * VMs mirroring src's encryption context rely on it to keep the
+        * ASID allocated, but below we are clearing src_sev->asid.
+        */
+       if (src_sev->num_mirrored_vms) {
+               ret = -EBUSY;
+               goto out_unlock;
+       }
+
        dst_sev->misc_cg = get_current_misc_cg();
        cg_cleanup_sev = dst_sev;
        if (dst_sev->misc_cg != src_sev->misc_cg) {
@@ -1728,13 +1753,11 @@ out_dst_cgroup:
                sev_misc_cg_uncharge(cg_cleanup_sev);
        put_misc_cg(cg_cleanup_sev->misc_cg);
        cg_cleanup_sev->misc_cg = NULL;
-out_source:
-       sev_unlock_after_migration(source_kvm);
+out_unlock:
+       sev_unlock_two_vms(kvm, source_kvm);
 out_fput:
        if (source_kvm_file)
                fput(source_kvm_file);
-out_unlock:
-       sev_unlock_after_migration(kvm);
        return ret;
 }
 
@@ -1953,76 +1976,60 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
 {
        struct file *source_kvm_file;
        struct kvm *source_kvm;
-       struct kvm_sev_info source_sev, *mirror_sev;
+       struct kvm_sev_info *source_sev, *mirror_sev;
        int ret;
 
        source_kvm_file = fget(source_fd);
        if (!file_is_kvm(source_kvm_file)) {
                ret = -EBADF;
-               goto e_source_put;
+               goto e_source_fput;
        }
 
        source_kvm = source_kvm_file->private_data;
-       mutex_lock(&source_kvm->lock);
-
-       if (!sev_guest(source_kvm)) {
-               ret = -EINVAL;
-               goto e_source_unlock;
-       }
+       ret = sev_lock_two_vms(kvm, source_kvm);
+       if (ret)
+               goto e_source_fput;
 
-       /* Mirrors of mirrors should work, but let's not get silly */
-       if (is_mirroring_enc_context(source_kvm) || source_kvm == kvm) {
+       /*
+        * Mirrors of mirrors should work, but let's not get silly.  Also
+        * disallow out-of-band SEV/SEV-ES init if the target is already an
+        * SEV guest, or if vCPUs have been created.  KVM relies on vCPUs being
+        * created after SEV/SEV-ES initialization, e.g. to init intercepts.
+        */
+       if (sev_guest(kvm) || !sev_guest(source_kvm) ||
+           is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) {
                ret = -EINVAL;
-               goto e_source_unlock;
+               goto e_unlock;
        }
 
-       memcpy(&source_sev, &to_kvm_svm(source_kvm)->sev_info,
-              sizeof(source_sev));
-
        /*
         * The mirror kvm holds an enc_context_owner ref so its asid can't
         * disappear until we're done with it
         */
+       source_sev = &to_kvm_svm(source_kvm)->sev_info;
        kvm_get_kvm(source_kvm);
-
-       fput(source_kvm_file);
-       mutex_unlock(&source_kvm->lock);
-       mutex_lock(&kvm->lock);
-
-       /*
-        * Disallow out-of-band SEV/SEV-ES init if the target is already an
-        * SEV guest, or if vCPUs have been created.  KVM relies on vCPUs being
-        * created after SEV/SEV-ES initialization, e.g. to init intercepts.
-        */
-       if (sev_guest(kvm) || kvm->created_vcpus) {
-               ret = -EINVAL;
-               goto e_mirror_unlock;
-       }
+       source_sev->num_mirrored_vms++;
 
        /* Set enc_context_owner and copy its encryption context over */
        mirror_sev = &to_kvm_svm(kvm)->sev_info;
        mirror_sev->enc_context_owner = source_kvm;
        mirror_sev->active = true;
-       mirror_sev->asid = source_sev.asid;
-       mirror_sev->fd = source_sev.fd;
-       mirror_sev->es_active = source_sev.es_active;
-       mirror_sev->handle = source_sev.handle;
+       mirror_sev->asid = source_sev->asid;
+       mirror_sev->fd = source_sev->fd;
+       mirror_sev->es_active = source_sev->es_active;
+       mirror_sev->handle = source_sev->handle;
+       INIT_LIST_HEAD(&mirror_sev->regions_list);
+       ret = 0;
+
        /*
         * Do not copy ap_jump_table. Since the mirror does not share the same
         * KVM contexts as the original, and they may have different
         * memory-views.
         */
 
-       mutex_unlock(&kvm->lock);
-       return 0;
-
-e_mirror_unlock:
-       mutex_unlock(&kvm->lock);
-       kvm_put_kvm(source_kvm);
-       return ret;
-e_source_unlock:
-       mutex_unlock(&source_kvm->lock);
-e_source_put:
+e_unlock:
+       sev_unlock_two_vms(kvm, source_kvm);
+e_source_fput:
        if (source_kvm_file)
                fput(source_kvm_file);
        return ret;
@@ -2034,17 +2041,24 @@ void sev_vm_destroy(struct kvm *kvm)
        struct list_head *head = &sev->regions_list;
        struct list_head *pos, *q;
 
+       WARN_ON(sev->num_mirrored_vms);
+
        if (!sev_guest(kvm))
                return;
 
        /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
        if (is_mirroring_enc_context(kvm)) {
-               kvm_put_kvm(sev->enc_context_owner);
+               struct kvm *owner_kvm = sev->enc_context_owner;
+               struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;
+
+               mutex_lock(&owner_kvm->lock);
+               if (!WARN_ON(!owner_sev->num_mirrored_vms))
+                       owner_sev->num_mirrored_vms--;
+               mutex_unlock(&owner_kvm->lock);
+               kvm_put_kvm(owner_kvm);
                return;
        }
 
-       mutex_lock(&kvm->lock);
-
        /*
         * Ensure that all guest tagged cache entries are flushed before
         * releasing the pages back to the system for use. CLFLUSH will
@@ -2064,8 +2078,6 @@ void sev_vm_destroy(struct kvm *kvm)
                }
        }
 
-       mutex_unlock(&kvm->lock);
-
        sev_unbind_asid(kvm, sev->handle);
        sev_asid_free(sev);
 }
@@ -2249,7 +2261,7 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
        __free_page(virt_to_page(svm->sev_es.vmsa));
 
        if (svm->sev_es.ghcb_sa_free)
-               kfree(svm->sev_es.ghcb_sa);
+               kvfree(svm->sev_es.ghcb_sa);
 }
 
 static void dump_ghcb(struct vcpu_svm *svm)
@@ -2341,24 +2353,29 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
        memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
 }
 
-static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
+static bool sev_es_validate_vmgexit(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu;
        struct ghcb *ghcb;
-       u64 exit_code = 0;
+       u64 exit_code;
+       u64 reason;
 
        ghcb = svm->sev_es.ghcb;
 
-       /* Only GHCB Usage code 0 is supported */
-       if (ghcb->ghcb_usage)
-               goto vmgexit_err;
-
        /*
-        * Retrieve the exit code now even though is may not be marked valid
+        * Retrieve the exit code now even though it may not be marked valid
         * as it could help with debugging.
         */
        exit_code = ghcb_get_sw_exit_code(ghcb);
 
+       /* Only GHCB Usage code 0 is supported */
+       if (ghcb->ghcb_usage) {
+               reason = GHCB_ERR_INVALID_USAGE;
+               goto vmgexit_err;
+       }
+
+       reason = GHCB_ERR_MISSING_INPUT;
+
        if (!ghcb_sw_exit_code_is_valid(ghcb) ||
            !ghcb_sw_exit_info_1_is_valid(ghcb) ||
            !ghcb_sw_exit_info_2_is_valid(ghcb))
@@ -2437,30 +2454,34 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
        case SVM_VMGEXIT_UNSUPPORTED_EVENT:
                break;
        default:
+               reason = GHCB_ERR_INVALID_EVENT;
                goto vmgexit_err;
        }
 
-       return 0;
+       return true;
 
 vmgexit_err:
        vcpu = &svm->vcpu;
 
-       if (ghcb->ghcb_usage) {
+       if (reason == GHCB_ERR_INVALID_USAGE) {
                vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
                            ghcb->ghcb_usage);
+       } else if (reason == GHCB_ERR_INVALID_EVENT) {
+               vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
+                           exit_code);
        } else {
-               vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
+               vcpu_unimpl(vcpu, "vmgexit: exit code %#llx input is not valid\n",
                            exit_code);
                dump_ghcb(svm);
        }
 
-       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-       vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
-       vcpu->run->internal.ndata = 2;
-       vcpu->run->internal.data[0] = exit_code;
-       vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+       /* Clear the valid entries fields */
+       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+
+       ghcb_set_sw_exit_info_1(ghcb, 2);
+       ghcb_set_sw_exit_info_2(ghcb, reason);
 
-       return -EINVAL;
+       return false;
 }
 
 void sev_es_unmap_ghcb(struct vcpu_svm *svm)
@@ -2482,7 +2503,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
                        svm->sev_es.ghcb_sa_sync = false;
                }
 
-               kfree(svm->sev_es.ghcb_sa);
+               kvfree(svm->sev_es.ghcb_sa);
                svm->sev_es.ghcb_sa = NULL;
                svm->sev_es.ghcb_sa_free = false;
        }
@@ -2530,14 +2551,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
        scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
        if (!scratch_gpa_beg) {
                pr_err("vmgexit: scratch gpa not provided\n");
-               return false;
+               goto e_scratch;
        }
 
        scratch_gpa_end = scratch_gpa_beg + len;
        if (scratch_gpa_end < scratch_gpa_beg) {
                pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
                       len, scratch_gpa_beg);
-               return false;
+               goto e_scratch;
        }
 
        if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
@@ -2555,7 +2576,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
                    scratch_gpa_end > ghcb_scratch_end) {
                        pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
                               scratch_gpa_beg, scratch_gpa_end);
-                       return false;
+                       goto e_scratch;
                }
 
                scratch_va = (void *)svm->sev_es.ghcb;
@@ -2568,18 +2589,18 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
                if (len > GHCB_SCRATCH_AREA_LIMIT) {
                        pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
                               len, GHCB_SCRATCH_AREA_LIMIT);
-                       return false;
+                       goto e_scratch;
                }
-               scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT);
+               scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT);
                if (!scratch_va)
-                       return false;
+                       goto e_scratch;
 
                if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
                        /* Unable to copy scratch area from guest */
                        pr_err("vmgexit: kvm_read_guest for scratch area failed\n");
 
-                       kfree(scratch_va);
-                       return false;
+                       kvfree(scratch_va);
+                       goto e_scratch;
                }
 
                /*
@@ -2596,6 +2617,12 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
        svm->sev_es.ghcb_sa_len = len;
 
        return true;
+
+e_scratch:
+       ghcb_set_sw_exit_info_1(ghcb, 2);
+       ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
+
+       return false;
 }
 
 static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
@@ -2646,7 +2673,7 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
 
                ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
                if (!ret) {
-                       ret = -EINVAL;
+                       /* Error, keep GHCB MSR value as-is */
                        break;
                }
 
@@ -2682,10 +2709,13 @@ static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
                                                GHCB_MSR_TERM_REASON_POS);
                pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
                        reason_set, reason_code);
-               fallthrough;
+
+               ret = -EINVAL;
+               break;
        }
        default:
-               ret = -EINVAL;
+               /* Error, keep GHCB MSR value as-is */
+               break;
        }
 
        trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
@@ -2709,14 +2739,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 
        if (!ghcb_gpa) {
                vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n");
-               return -EINVAL;
+
+               /* Without a GHCB, just return right back to the guest */
+               return 1;
        }
 
        if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
                /* Unable to map GHCB from guest */
                vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
                            ghcb_gpa);
-               return -EINVAL;
+
+               /* Without a GHCB, just return right back to the guest */
+               return 1;
        }
 
        svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
@@ -2726,15 +2760,14 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
 
        exit_code = ghcb_get_sw_exit_code(ghcb);
 
-       ret = sev_es_validate_vmgexit(svm);
-       if (ret)
-               return ret;
+       if (!sev_es_validate_vmgexit(svm))
+               return 1;
 
        sev_es_sync_from_ghcb(svm);
        ghcb_set_sw_exit_info_1(ghcb, 0);
        ghcb_set_sw_exit_info_2(ghcb, 0);
 
-       ret = -EINVAL;
+       ret = 1;
        switch (exit_code) {
        case SVM_VMGEXIT_MMIO_READ:
                if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
@@ -2775,20 +2808,17 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
                default:
                        pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
                               control->exit_info_1);
-                       ghcb_set_sw_exit_info_1(ghcb, 1);
-                       ghcb_set_sw_exit_info_2(ghcb,
-                                               X86_TRAP_UD |
-                                               SVM_EVTINJ_TYPE_EXEPT |
-                                               SVM_EVTINJ_VALID);
+                       ghcb_set_sw_exit_info_1(ghcb, 2);
+                       ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
                }
 
-               ret = 1;
                break;
        }
        case SVM_VMGEXIT_UNSUPPORTED_EVENT:
                vcpu_unimpl(vcpu,
                            "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
                            control->exit_info_1, control->exit_info_2);
+               ret = -EINVAL;
                break;
        default:
                ret = svm_invoke_exit_handler(vcpu, exit_code);
@@ -2810,7 +2840,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
                return -EINVAL;
 
        if (!setup_vmgexit_scratch(svm, in, bytes))
-               return -EINVAL;
+               return 1;
 
        return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
                                    count, in);
index 5630c24..5557867 100644 (file)
@@ -192,6 +192,10 @@ module_param(vgif, int, 0444);
 static int lbrv = true;
 module_param(lbrv, int, 0444);
 
+/* enable/disable PMU virtualization */
+bool pmu = true;
+module_param(pmu, bool, 0444);
+
 static int tsc_scaling = true;
 module_param(tsc_scaling, int, 0444);
 
@@ -265,7 +269,7 @@ u32 svm_msrpm_offset(u32 msr)
 
 #define MAX_INST_SIZE 15
 
-static int get_max_npt_level(void)
+static int get_npt_level(void)
 {
 #ifdef CONFIG_X86_64
        return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
@@ -585,12 +589,10 @@ static int svm_cpu_init(int cpu)
        if (!sd)
                return ret;
        sd->cpu = cpu;
-       sd->save_area = alloc_page(GFP_KERNEL);
+       sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
        if (!sd->save_area)
                goto free_cpu_data;
 
-       clear_page(page_address(sd->save_area));
-
        ret = sev_cpu_init(sd);
        if (ret)
                goto free_save_area;
@@ -954,6 +956,10 @@ static __init void svm_set_cpu_caps(void)
            boot_cpu_has(X86_FEATURE_AMD_SSBD))
                kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
 
+       /* AMD PMU PERFCTR_CORE CPUID */
+       if (pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+               kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
+
        /* CPUID 0x8000001F (SME/SEV features) */
        sev_set_cpu_caps();
 }
@@ -1029,9 +1035,9 @@ static __init int svm_hardware_setup(void)
        if (!boot_cpu_has(X86_FEATURE_NPT))
                npt_enabled = false;
 
-       /* Force VM NPT level equal to the host's max NPT level */
-       kvm_configure_mmu(npt_enabled, get_max_npt_level(),
-                         get_max_npt_level(), PG_LEVEL_1G);
+       /* Force VM NPT level equal to the host's paging level */
+       kvm_configure_mmu(npt_enabled, get_npt_level(),
+                         get_npt_level(), PG_LEVEL_1G);
        pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
 
        /* Note, SEV setup consumes npt_enabled. */
@@ -1087,6 +1093,9 @@ static __init int svm_hardware_setup(void)
                        pr_info("LBR virtualization supported\n");
        }
 
+       if (!pmu)
+               pr_info("PMU virtualization is disabled\n");
+
        svm_set_cpu_caps();
 
        /*
@@ -1585,12 +1594,27 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
        to_svm(vcpu)->vmcb->save.rflags = rflags;
 }
 
+static bool svm_get_if_flag(struct kvm_vcpu *vcpu)
+{
+       struct vmcb *vmcb = to_svm(vcpu)->vmcb;
+
+       return sev_es_guest(vcpu->kvm)
+               ? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK
+               : kvm_get_rflags(vcpu) & X86_EFLAGS_IF;
+}
+
 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 {
+       kvm_register_mark_available(vcpu, reg);
+
        switch (reg) {
        case VCPU_EXREG_PDPTR:
-               BUG_ON(!npt_enabled);
-               load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
+               /*
+                * When !npt_enabled, mmu->pdptrs[] is already available since
+                * it is always updated per SDM when moving to CRs.
+                */
+               if (npt_enabled)
+                       load_pdptrs(vcpu, kvm_read_cr3(vcpu));
                break;
        default:
                KVM_BUG_ON(1, vcpu->kvm);
@@ -2508,7 +2532,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu,
        bool ret = false;
 
        if (!is_guest_mode(vcpu) ||
-           (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
+           (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0))))
                return false;
 
        cr0 &= ~SVM_CR0_SELECTIVE_MASK;
@@ -3568,14 +3592,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
        if (!gif_set(svm))
                return true;
 
-       if (sev_es_guest(vcpu->kvm)) {
-               /*
-                * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
-                * bit to determine the state of the IF flag.
-                */
-               if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
-                       return true;
-       } else if (is_guest_mode(vcpu)) {
+       if (is_guest_mode(vcpu)) {
                /* As long as interrupts are being delivered...  */
                if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
                    ? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
@@ -3586,7 +3603,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
                if (nested_exit_on_intr(svm))
                        return false;
        } else {
-               if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
+               if (!svm_get_if_flag(vcpu))
                        return true;
        }
 
@@ -3929,6 +3946,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
                vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
                vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
        }
+       vcpu->arch.regs_dirty = 0;
 
        if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
                kvm_before_interrupt(vcpu);
@@ -3963,8 +3981,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
                vcpu->arch.apf.host_apf_flags =
                        kvm_read_and_reset_apf_flags();
 
-       if (npt_enabled)
-               kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
+       vcpu->arch.regs_avail &= ~SVM_REGS_LAZY_LOAD_SET;
 
        /*
         * We need to handle MC intercepts here before the vcpu has a chance to
@@ -3994,9 +4011,6 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
 
                hv_track_root_tdp(vcpu, root_hpa);
 
-               /* Loading L2's CR3 is handled by enter_svm_guest_mode.  */
-               if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
-                       return;
                cr3 = vcpu->arch.cr3;
        } else if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
                cr3 = __sme_set(root_hpa) | kvm_get_active_pcid(vcpu);
@@ -4215,7 +4229,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
                    info->intercept == x86_intercept_clts)
                        break;
 
-               if (!(vmcb_is_intercept(&svm->nested.ctl,
+               if (!(vmcb12_is_intercept(&svm->nested.ctl,
                                        INTERCEPT_SELECTIVE_CR0)))
                        break;
 
@@ -4434,7 +4448,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
         */
 
        vmcb12 = map.hva;
-       nested_load_control_from_vmcb12(svm, &vmcb12->control);
+       nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
+       nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
        ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
 
 unmap_save:
@@ -4621,6 +4636,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .cache_reg = svm_cache_reg,
        .get_rflags = svm_get_rflags,
        .set_rflags = svm_set_rflags,
+       .get_if_flag = svm_get_if_flag,
 
        .tlb_flush_all = svm_flush_tlb,
        .tlb_flush_current = svm_flush_tlb,
@@ -4651,7 +4667,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .load_eoi_exitmap = svm_load_eoi_exitmap,
        .hwapic_irr_update = svm_hwapic_irr_update,
        .hwapic_isr_update = svm_hwapic_isr_update,
-       .sync_pir_to_irr = kvm_lapic_find_highest_irr,
        .apicv_post_state_restore = avic_post_state_restore,
 
        .set_tss_addr = svm_set_tss_addr,
index 5faad3d..9f153c5 100644 (file)
@@ -32,6 +32,7 @@
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;
 extern bool intercept_smi;
+extern bool pmu;
 
 /*
  * Clean bits in VMCB.
@@ -79,6 +80,7 @@ struct kvm_sev_info {
        struct list_head regions_list;  /* List of registered regions */
        u64 ap_jump_table;      /* SEV-ES AP Jump Table address */
        struct kvm *enc_context_owner; /* Owner of copied encryption context */
+       unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */
        struct misc_cg *misc_cg; /* For misc cgroup accounting */
        atomic_t migration_in_progress;
 };
@@ -104,6 +106,40 @@ struct kvm_vmcb_info {
        uint64_t asid_generation;
 };
 
+struct vmcb_save_area_cached {
+       u64 efer;
+       u64 cr4;
+       u64 cr3;
+       u64 cr0;
+       u64 dr7;
+       u64 dr6;
+};
+
+struct vmcb_ctrl_area_cached {
+       u32 intercepts[MAX_INTERCEPT];
+       u16 pause_filter_thresh;
+       u16 pause_filter_count;
+       u64 iopm_base_pa;
+       u64 msrpm_base_pa;
+       u64 tsc_offset;
+       u32 asid;
+       u8 tlb_ctl;
+       u32 int_ctl;
+       u32 int_vector;
+       u32 int_state;
+       u32 exit_code;
+       u32 exit_code_hi;
+       u64 exit_info_1;
+       u64 exit_info_2;
+       u32 exit_int_info;
+       u32 exit_int_info_err;
+       u64 nested_ctl;
+       u32 event_inj;
+       u32 event_inj_err;
+       u64 nested_cr3;
+       u64 virt_ext;
+};
+
 struct svm_nested_state {
        struct kvm_vmcb_info vmcb02;
        u64 hsave_msr;
@@ -119,7 +155,13 @@ struct svm_nested_state {
        bool nested_run_pending;
 
        /* cache for control fields of the guest */
-       struct vmcb_control_area ctl;
+       struct vmcb_ctrl_area_cached ctl;
+
+       /*
+        * Note: this struct is not kept up-to-date while L2 runs; it is only
+        * valid within nested_svm_vmrun.
+        */
+       struct vmcb_save_area_cached save;
 
        bool initialized;
 };
@@ -284,6 +326,16 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
        return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
+/*
+ * Only the PDPTRs are loaded on demand into the shadow MMU.  All other
+ * fields are synchronized in handle_exit, because accessing the VMCB is cheap.
+ *
+ * CR3 might be out of date in the VMCB but it is not marked dirty; instead,
+ * KVM_REQ_LOAD_MMU_PGD is always requested when the cached vcpu->arch.cr3
+ * is changed.  svm_load_mmu_pgd() then syncs the new CR3 value into the VMCB.
+ */
+#define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
+
 static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
 {
        WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
@@ -302,6 +354,12 @@ static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
        return test_bit(bit, (unsigned long *)&control->intercepts);
 }
 
+static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
+{
+       WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
+       return test_bit(bit, (unsigned long *)&control->intercepts);
+}
+
 static inline void set_dr_intercepts(struct vcpu_svm *svm)
 {
        struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -454,17 +512,17 @@ static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
 
 static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
 {
-       return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
+       return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
 }
 
 static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
 {
-       return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
+       return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
 }
 
 static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
 {
-       return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
+       return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
 }
 
 int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
@@ -493,8 +551,10 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 int nested_svm_exit_special(struct vcpu_svm *svm);
 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
 void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier);
-void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
-                                    struct vmcb_control_area *control);
+void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
+                                      struct vmcb_control_area *control);
+void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
+                                   struct vmcb_save_area *save);
 void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
 void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
 void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
index 953b0fc..92e6f67 100644 (file)
@@ -1356,6 +1356,30 @@ TRACE_EVENT(kvm_apicv_update_request,
                  __entry->bit)
 );
 
+TRACE_EVENT(kvm_apicv_accept_irq,
+           TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec),
+           TP_ARGS(apicid, dm, tm, vec),
+
+       TP_STRUCT__entry(
+               __field(        __u32,          apicid          )
+               __field(        __u16,          dm              )
+               __field(        __u16,          tm              )
+               __field(        __u8,           vec             )
+       ),
+
+       TP_fast_assign(
+               __entry->apicid         = apicid;
+               __entry->dm             = dm;
+               __entry->tm             = tm;
+               __entry->vec            = vec;
+       ),
+
+       TP_printk("apicid %x vec %u (%s|%s)",
+                 __entry->apicid, __entry->vec,
+                 __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
+                 __entry->tm ? "level" : "edge")
+);
+
 /*
  * Tracepoint for AMD AVIC
  */
index 4705ad5..c8029b7 100644 (file)
@@ -312,6 +312,15 @@ static inline bool cpu_has_vmx_ept_1g_page(void)
        return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT;
 }
 
+static inline int ept_caps_to_lpage_level(u32 ept_caps)
+{
+       if (ept_caps & VMX_EPT_1GB_PAGE_BIT)
+               return PG_LEVEL_1G;
+       if (ept_caps & VMX_EPT_2MB_PAGE_BIT)
+               return PG_LEVEL_2M;
+       return PG_LEVEL_4K;
+}
+
 static inline bool cpu_has_vmx_ept_ad_bits(void)
 {
        return vmx_capability.ept & VMX_EPT_AD_BIT;
index 1e2f669..2f6f465 100644 (file)
@@ -269,7 +269,13 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
        vmx_sync_vmcs_host_state(vmx, prev);
        put_cpu();
 
-       vmx_register_cache_reset(vcpu);
+       vcpu->arch.regs_avail = ~VMX_REGS_LAZY_LOAD_SET;
+
+       /*
+        * All lazily updated registers will be reloaded from VMCS12 on both
+        * vmentry and vmexit.
+        */
+       vcpu->arch.regs_dirty = 0;
 }
 
 /*
@@ -391,9 +397,11 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
 
 static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
 {
-       kvm_init_shadow_ept_mmu(vcpu,
-                               to_vmx(vcpu)->nested.msrs.ept_caps &
-                               VMX_EPT_EXECUTE_ONLY_BIT,
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       bool execonly = vmx->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT;
+       int ept_lpage_level = ept_caps_to_lpage_level(vmx->nested.msrs.ept_caps);
+
+       kvm_init_shadow_ept_mmu(vcpu, execonly, ept_lpage_level,
                                nested_ept_ad_enabled(vcpu),
                                nested_ept_get_eptp(vcpu));
 }
@@ -591,6 +599,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
        int msr;
        unsigned long *msr_bitmap_l1;
        unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
+       struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
        struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
 
        /* Nothing to do if the MSR bitmap is not in use.  */
@@ -598,6 +607,19 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
            !nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
                return false;
 
+       /*
+        * MSR bitmap update can be skipped when:
+        * - MSR bitmap for L1 hasn't changed.
+        * - Nested hypervisor (L1) is attempting to launch the same L2 as
+        *   before.
+        * - Nested hypervisor (L1) has enabled 'Enlightened MSR Bitmap' feature
+        *   and tells KVM (L0) there were no changes in MSR bitmap for L2.
+        */
+       if (!vmx->nested.force_msr_bitmap_recalc && evmcs &&
+           evmcs->hv_enlightenments_control.msr_bitmap &&
+           evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP)
+               return true;
+
        if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
                return false;
 
@@ -664,6 +686,8 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 
        kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
 
+       vmx->nested.force_msr_bitmap_recalc = false;
+
        return true;
 }
 
@@ -1095,7 +1119,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
         * must not be dereferenced.
         */
        if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
-           CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
+           CC(!load_pdptrs(vcpu, cr3))) {
                *entry_failure_code = ENTRY_FAIL_PDPTE;
                return -EINVAL;
        }
@@ -1104,7 +1128,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
                kvm_mmu_new_pgd(vcpu, cr3);
 
        vcpu->arch.cr3 = cr3;
-       kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
+       kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
 
        /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
        kvm_init_mmu(vcpu);
@@ -1162,29 +1186,26 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
        WARN_ON(!enable_vpid);
 
        /*
-        * If VPID is enabled and used by vmc12, but L2 does not have a unique
-        * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
-        * a VPID for L2, flush the current context as the effective ASID is
-        * common to both L1 and L2.
-        *
-        * Defer the flush so that it runs after vmcs02.EPTP has been set by
-        * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid
-        * redundant flushes further down the nested pipeline.
-        *
-        * If a TLB flush isn't required due to any of the above, and vpid12 is
-        * changing then the new "virtual" VPID (vpid12) will reuse the same
-        * "real" VPID (vpid02), and so needs to be flushed.  There's no direct
-        * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
-        * all nested vCPUs.  Remember, a flush on VM-Enter does not invalidate
-        * guest-physical mappings, so there is no need to sync the nEPT MMU.
+        * VPID is enabled and in use by vmcs12.  If vpid12 is changing, then
+        * emulate a guest TLB flush as KVM does not track vpid12 history nor
+        * is the VPID incorporated into the MMU context.  I.e. KVM must assume
+        * that the new vpid12 has never been used and thus represents a new
+        * guest ASID that cannot have entries in the TLB.
         */
-       if (!nested_has_guest_tlb_tag(vcpu)) {
-               kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-       } else if (is_vmenter &&
-                  vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
+       if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
                vmx->nested.last_vpid = vmcs12->virtual_processor_id;
-               vpid_sync_context(nested_get_vpid02(vcpu));
+               kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
+               return;
        }
+
+       /*
+        * If VPID is enabled, used by vmc12, and vpid12 is not changing but
+        * does not have a unique TLB tag (ASID), i.e. EPT is disabled and
+        * KVM was unable to allocate a VPID for L2, flush the current context
+        * as the effective ASID is common to both L1 and L2.
+        */
+       if (!nested_has_guest_tlb_tag(vcpu))
+               kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 }
 
 static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
@@ -2024,10 +2045,13 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
         * Clean fields data can't be used on VMLAUNCH and when we switch
         * between different L2 guests as KVM keeps a single VMCS12 per L1.
         */
-       if (from_launch || evmcs_gpa_changed)
+       if (from_launch || evmcs_gpa_changed) {
                vmx->nested.hv_evmcs->hv_clean_fields &=
                        ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
 
+               vmx->nested.force_msr_bitmap_recalc = true;
+       }
+
        return EVMPTRLD_SUCCEEDED;
 }
 
@@ -2594,8 +2618,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
        if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
            WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
-                                    vmcs12->guest_ia32_perf_global_ctrl)))
+                                    vmcs12->guest_ia32_perf_global_ctrl))) {
+               *entry_failure_code = ENTRY_FAIL_DEFAULT;
                return -EINVAL;
+       }
 
        kvm_rsp_write(vcpu, vmcs12->guest_rsp);
        kvm_rip_write(vcpu, vmcs12->guest_rip);
@@ -3028,7 +3054,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       unsigned long cr3, cr4;
+       unsigned long cr4;
        bool vm_fail;
 
        if (!nested_early_check)
@@ -3051,12 +3077,6 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
         */
        vmcs_writel(GUEST_RFLAGS, 0);
 
-       cr3 = __get_current_cr3_fast();
-       if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
-               vmcs_writel(HOST_CR3, cr3);
-               vmx->loaded_vmcs->host_state.cr3 = cr3;
-       }
-
        cr4 = cr4_read_shadow();
        if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
                vmcs_writel(HOST_CR4, cr4);
@@ -3146,7 +3166,7 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
                 * the guest CR3 might be restored prior to setting the nested
                 * state which can lead to a load of wrong PDPTRs.
                 */
-               if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
+               if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
                        return false;
        }
 
@@ -3344,8 +3364,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
        };
        u32 failed_index;
 
-       if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
-               kvm_vcpu_flush_tlb_current(vcpu);
+       kvm_service_local_tlb_flush_requests(vcpu);
 
        evaluate_pending_interrupts = exec_controls_get(vmx) &
                (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
@@ -3605,7 +3624,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
                    !(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
                      (vmcs12->guest_rflags & X86_EFLAGS_IF))) {
                        vmx->nested.nested_run_pending = 0;
-                       return kvm_vcpu_halt(vcpu);
+                       return kvm_emulate_halt_noskip(vcpu);
                }
                break;
        case GUEST_ACTIVITY_WAIT_SIPI:
@@ -4502,9 +4521,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
                (void)nested_get_evmcs_page(vcpu);
        }
 
-       /* Service the TLB flush request for L2 before switching to L1. */
-       if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
-               kvm_vcpu_flush_tlb_current(vcpu);
+       /* Service pending TLB flush requests for L2 before switching to L1. */
+       kvm_service_local_tlb_flush_requests(vcpu);
 
        /*
         * VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
@@ -4857,6 +4875,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu)
        if (!vmx->nested.cached_vmcs12)
                goto out_cached_vmcs12;
 
+       vmx->nested.shadow_vmcs12_cache.gpa = INVALID_GPA;
        vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
        if (!vmx->nested.cached_shadow_vmcs12)
                goto out_cached_shadow_vmcs12;
@@ -5260,6 +5279,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
                vmx->nested.need_vmcs12_to_shadow_sync = true;
        }
        vmx->nested.dirty_vmcs12 = true;
+       vmx->nested.force_msr_bitmap_recalc = true;
 }
 
 /* Emulate the VMPTRLD instruction */
@@ -5289,8 +5309,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
                struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
                struct vmcs_hdr hdr;
 
-               if (ghc->gpa != vmptr &&
-                   kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
+               if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
                        /*
                         * Reads from an unbacked page return all 1s,
                         * which means that the 32 bits located at the
@@ -6396,6 +6415,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
                goto error_guest_mode;
 
        vmx->nested.dirty_vmcs12 = true;
+       vmx->nested.force_msr_bitmap_recalc = true;
        ret = nested_vmx_enter_non_root_mode(vcpu, false);
        if (ret)
                goto error_guest_mode;
index 5f81ef0..88c53c5 100644 (file)
@@ -5,15 +5,28 @@
 #include <asm/cpu.h>
 
 #include "lapic.h"
+#include "irq.h"
 #include "posted_intr.h"
 #include "trace.h"
 #include "vmx.h"
 
 /*
- * We maintain a per-CPU linked-list of vCPU, so in wakeup_handler() we
- * can find which vCPU should be waken up.
+ * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler()
+ * when a WAKEUP_VECTOR interrupted is posted.  vCPUs are added to the list when
+ * the vCPU is scheduled out and is blocking (e.g. in HLT) with IRQs enabled.
+ * The vCPUs posted interrupt descriptor is updated at the same time to set its
+ * notification vector to WAKEUP_VECTOR, so that posted interrupt from devices
+ * wake the target vCPUs.  vCPUs are removed from the list and the notification
+ * vector is reset when the vCPU is scheduled in.
  */
 static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
+/*
+ * Protect the per-CPU list with a per-CPU spinlock to handle task migration.
+ * When a blocking vCPU is awakened _and_ migrated to a different pCPU, the
+ * ->sched_in() path will need to take the vCPU off the list of the _previous_
+ * CPU.  IRQs must be disabled when taking this lock, otherwise deadlock will
+ * occur if a wakeup IRQ arrives and attempts to acquire the lock.
+ */
 static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
 
 static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
@@ -21,6 +34,20 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
        return &(to_vmx(vcpu)->pi_desc);
 }
 
+static int pi_try_set_control(struct pi_desc *pi_desc, u64 old, u64 new)
+{
+       /*
+        * PID.ON can be set at any time by a different vCPU or by hardware,
+        * e.g. a device.  PID.control must be written atomically, and the
+        * update must be retried with a fresh snapshot an ON change causes
+        * the cmpxchg to fail.
+        */
+       if (cmpxchg64(&pi_desc->control, old, new) != old)
+               return -EBUSY;
+
+       return 0;
+}
+
 void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 {
        struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -28,11 +55,14 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
        unsigned int dest;
 
        /*
-        * In case of hot-plug or hot-unplug, we may have to undo
-        * vmx_vcpu_pi_put even if there is no assigned device.  And we
-        * always keep PI.NDST up to date for simplicity: it makes the
-        * code easier, and CPU migration is not a fast path.
+        * To simplify hot-plug and dynamic toggling of APICv, keep PI.NDST and
+        * PI.SN up-to-date even if there is no assigned device or if APICv is
+        * deactivated due to a dynamic inhibit bit, e.g. for Hyper-V's SyncIC.
         */
+       if (!enable_apicv || !lapic_in_kernel(vcpu))
+               return;
+
+       /* Nothing to do if PI.SN and PI.NDST both have the desired value. */
        if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
                return;
 
@@ -48,20 +78,17 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
                goto after_clear_sn;
        }
 
-       /* The full case.  */
-       do {
-               old.control = new.control = pi_desc->control;
-
-               dest = cpu_physical_id(cpu);
+       /* The full case.  Set the new destination and clear SN. */
+       dest = cpu_physical_id(cpu);
+       if (!x2apic_mode)
+               dest = (dest << 8) & 0xFF00;
 
-               if (x2apic_mode)
-                       new.ndst = dest;
-               else
-                       new.ndst = (dest << 8) & 0xFF00;
+       do {
+               old.control = new.control = READ_ONCE(pi_desc->control);
 
+               new.ndst = dest;
                new.sn = 0;
-       } while (cmpxchg64(&pi_desc->control, old.control,
-                          new.control) != old.control);
+       } while (pi_try_set_control(pi_desc, old.control, new.control));
 
 after_clear_sn:
 
@@ -77,13 +104,18 @@ after_clear_sn:
                pi_set_on(pi_desc);
 }
 
+static bool vmx_can_use_vtd_pi(struct kvm *kvm)
+{
+       return irqchip_in_kernel(kvm) && enable_apicv &&
+               kvm_arch_has_assigned_device(kvm) &&
+               irq_remapping_cap(IRQ_POSTING_CAP);
+}
+
 void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
 {
        struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
+       if (!vmx_can_use_vtd_pi(vcpu->kvm))
                return;
 
        /* Set SN when the vCPU is preempted */
@@ -97,29 +129,31 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
        struct pi_desc old, new;
        unsigned int dest;
 
-       do {
-               old.control = new.control = pi_desc->control;
-               WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
-                    "Wakeup handler not enabled while the VCPU is blocked\n");
+       /*
+        * Remove the vCPU from the wakeup list of the _previous_ pCPU, which
+        * will not be the same as the current pCPU if the task was migrated.
+        */
+       spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+       list_del(&vcpu->blocked_vcpu_list);
+       spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
 
-               dest = cpu_physical_id(vcpu->cpu);
+       dest = cpu_physical_id(vcpu->cpu);
+       if (!x2apic_mode)
+               dest = (dest << 8) & 0xFF00;
 
-               if (x2apic_mode)
-                       new.ndst = dest;
-               else
-                       new.ndst = (dest << 8) & 0xFF00;
+       WARN(pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR,
+            "Wakeup handler not enabled while the vCPU was blocking");
+
+       do {
+               old.control = new.control = READ_ONCE(pi_desc->control);
+
+               new.ndst = dest;
 
                /* set 'NV' to 'notification vector' */
                new.nv = POSTED_INTR_VECTOR;
-       } while (cmpxchg64(&pi_desc->control, old.control,
-                          new.control) != old.control);
-
-       if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
-               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
-               list_del(&vcpu->blocked_vcpu_list);
-               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
-               vcpu->pre_pcpu = -1;
-       }
+       } while (pi_try_set_control(pi_desc, old.control, new.control));
+
+       vcpu->pre_pcpu = -1;
 }
 
 /*
@@ -128,7 +162,6 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
  * - Store the vCPU to the wakeup list, so when interrupts happen
  *   we can find the right vCPU to wake up.
  * - Change the Posted-interrupt descriptor as below:
- *      'NDST' <-- vcpu->pre_pcpu
  *      'NV' <-- POSTED_INTR_WAKEUP_VECTOR
  * - If 'ON' is set during this process, which means at least one
  *   interrupt is posted for this vCPU, we cannot block it, in
@@ -137,70 +170,50 @@ static void __pi_post_block(struct kvm_vcpu *vcpu)
  */
 int pi_pre_block(struct kvm_vcpu *vcpu)
 {
-       unsigned int dest;
        struct pi_desc old, new;
        struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+       unsigned long flags;
 
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
+       if (!vmx_can_use_vtd_pi(vcpu->kvm) ||
+           vmx_interrupt_blocked(vcpu))
                return 0;
 
-       WARN_ON(irqs_disabled());
-       local_irq_disable();
-       if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
-               vcpu->pre_pcpu = vcpu->cpu;
-               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
-               list_add_tail(&vcpu->blocked_vcpu_list,
-                             &per_cpu(blocked_vcpu_on_cpu,
-                                      vcpu->pre_pcpu));
-               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
-       }
-
-       do {
-               old.control = new.control = pi_desc->control;
+       local_irq_save(flags);
 
-               WARN((pi_desc->sn == 1),
-                    "Warning: SN field of posted-interrupts "
-                    "is set before blocking\n");
+       vcpu->pre_pcpu = vcpu->cpu;
+       spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu));
+       list_add_tail(&vcpu->blocked_vcpu_list,
+                     &per_cpu(blocked_vcpu_on_cpu, vcpu->cpu));
+       spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu));
 
-               /*
-                * Since vCPU can be preempted during this process,
-                * vcpu->cpu could be different with pre_pcpu, we
-                * need to set pre_pcpu as the destination of wakeup
-                * notification event, then we can find the right vCPU
-                * to wakeup in wakeup handler if interrupts happen
-                * when the vCPU is in blocked state.
-                */
-               dest = cpu_physical_id(vcpu->pre_pcpu);
+       WARN(pi_desc->sn == 1,
+            "Posted Interrupt Suppress Notification set before blocking");
 
-               if (x2apic_mode)
-                       new.ndst = dest;
-               else
-                       new.ndst = (dest << 8) & 0xFF00;
+       do {
+               old.control = new.control = READ_ONCE(pi_desc->control);
 
                /* set 'NV' to 'wakeup vector' */
                new.nv = POSTED_INTR_WAKEUP_VECTOR;
-       } while (cmpxchg64(&pi_desc->control, old.control,
-                          new.control) != old.control);
+       } while (pi_try_set_control(pi_desc, old.control, new.control));
 
        /* We should not block the vCPU if an interrupt is posted for it.  */
-       if (pi_test_on(pi_desc) == 1)
+       if (pi_test_on(pi_desc))
                __pi_post_block(vcpu);
 
-       local_irq_enable();
+       local_irq_restore(flags);
        return (vcpu->pre_pcpu == -1);
 }
 
 void pi_post_block(struct kvm_vcpu *vcpu)
 {
+       unsigned long flags;
+
        if (vcpu->pre_pcpu == -1)
                return;
 
-       WARN_ON(irqs_disabled());
-       local_irq_disable();
+       local_irq_save(flags);
        __pi_post_block(vcpu);
-       local_irq_enable();
+       local_irq_restore(flags);
 }
 
 /*
@@ -216,7 +229,7 @@ void pi_wakeup_handler(void)
                        blocked_vcpu_list) {
                struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 
-               if (pi_test_on(pi_desc) == 1)
+               if (pi_test_on(pi_desc))
                        kvm_vcpu_kick(vcpu);
        }
        spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
@@ -270,9 +283,7 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
        struct vcpu_data vcpu_info;
        int idx, ret = 0;
 
-       if (!kvm_arch_has_assigned_device(kvm) ||
-           !irq_remapping_cap(IRQ_POSTING_CAP) ||
-           !kvm_vcpu_apicv_active(kvm->vcpus[0]))
+       if (!vmx_can_use_vtd_pi(kvm))
                return 0;
 
        idx = srcu_read_lock(&kvm->irq_srcu);
index 7f7b232..36ae035 100644 (file)
@@ -40,7 +40,7 @@ static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
                        (unsigned long *)&pi_desc->control);
 }
 
-static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
 {
        return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
@@ -74,13 +74,13 @@ static inline void pi_clear_sn(struct pi_desc *pi_desc)
                (unsigned long *)&pi_desc->control);
 }
 
-static inline int pi_test_on(struct pi_desc *pi_desc)
+static inline bool pi_test_on(struct pi_desc *pi_desc)
 {
        return test_bit(POSTED_INTR_ON,
                        (unsigned long *)&pi_desc->control);
 }
 
-static inline int pi_test_sn(struct pi_desc *pi_desc)
+static inline bool pi_test_sn(struct pi_desc *pi_desc)
 {
        return test_bit(POSTED_INTR_SN,
                        (unsigned long *)&pi_desc->control);
index ba66c17..9bf65e5 100644 (file)
@@ -602,15 +602,13 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
        unsigned int slot = msr - vmx->guest_uret_msrs;
        int ret = 0;
 
-       u64 old_msr_data = msr->data;
-       msr->data = data;
        if (msr->load_into_hardware) {
                preempt_disable();
-               ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
+               ret = kvm_set_user_return_msr(slot, data, msr->mask);
                preempt_enable();
-               if (ret)
-                       msr->data = old_msr_data;
        }
+       if (!ret)
+               msr->data = data;
        return ret;
 }
 
@@ -1105,6 +1103,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_X86_64
        int cpu = raw_smp_processor_id();
 #endif
+       unsigned long cr3;
        unsigned long fs_base, gs_base;
        u16 fs_sel, gs_sel;
        int i;
@@ -1169,6 +1168,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 #endif
 
        vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
+
+       /* Host CR3 including its PCID is stable when guest state is loaded. */
+       cr3 = __get_current_cr3_fast();
+       if (unlikely(cr3 != host_state->cr3)) {
+               vmcs_writel(HOST_CR3, cr3);
+               host_state->cr3 = cr3;
+       }
+
        vmx->guest_state_loaded = true;
 }
 
@@ -1271,7 +1278,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 
        if (!already_loaded) {
                void *gdt = get_current_gdt_ro();
-               unsigned long sysenter_esp;
 
                /*
                 * Flush all EPTP/VPID contexts, the new pCPU may have stale
@@ -1287,8 +1293,11 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
                            (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
                vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
 
-               rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
-               vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
+               if (IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) {
+                       /* 22.2.3 */
+                       vmcs_writel(HOST_IA32_SYSENTER_ESP,
+                                   (unsigned long)(cpu_entry_stack(cpu) + 1));
+               }
 
                vmx->loaded_vmcs->cpu = cpu;
        }
@@ -1363,6 +1372,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
                vmx->emulation_required = vmx_emulation_required(vcpu);
 }
 
+static bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
+{
+       return vmx_get_rflags(vcpu) & X86_EFLAGS_IF;
+}
+
 u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
 {
        u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
@@ -1748,7 +1762,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 }
 
 /*
- * Reads an msr value (of 'msr_index') into 'pdata'.
+ * Reads an msr value (of 'msr_info->index') into 'msr_info->data'.
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
  */
@@ -2095,9 +2109,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                }
                ret = kvm_set_msr_common(vcpu, msr_info);
                break;
-       case MSR_IA32_TSC_ADJUST:
-               ret = kvm_set_msr_common(vcpu, msr_info);
-               break;
        case MSR_IA32_MCG_EXT_CTL:
                if ((!msr_info->host_initiated &&
                     !(to_vmx(vcpu)->msr_ia32_feature_control &
@@ -2646,15 +2657,6 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
                if (!loaded_vmcs->msr_bitmap)
                        goto out_vmcs;
                memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
-
-               if (IS_ENABLED(CONFIG_HYPERV) &&
-                   static_branch_unlikely(&enable_evmcs) &&
-                   (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
-                       struct hv_enlightened_vmcs *evmcs =
-                               (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
-
-                       evmcs->hv_enlightenments_control.msr_bitmap = 1;
-               }
        }
 
        memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
@@ -2918,6 +2920,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
        }
 }
 
+static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
+{
+       if (is_guest_mode(vcpu))
+               return nested_get_vpid02(vcpu);
+       return to_vmx(vcpu)->vpid;
+}
+
 static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu *mmu = vcpu->arch.mmu;
@@ -2930,31 +2939,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu)
        if (enable_ept)
                ept_sync_context(construct_eptp(vcpu, root_hpa,
                                                mmu->shadow_root_level));
-       else if (!is_guest_mode(vcpu))
-               vpid_sync_context(to_vmx(vcpu)->vpid);
        else
-               vpid_sync_context(nested_get_vpid02(vcpu));
+               vpid_sync_context(vmx_get_current_vpid(vcpu));
 }
 
 static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
 {
        /*
-        * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in
+        * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in
         * vmx_flush_tlb_guest() for an explanation of why this is ok.
         */
-       vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr);
+       vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr);
 }
 
 static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
 {
        /*
-        * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
-        * or a vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit
-        * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
+        * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a
+        * vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit are
+        * required to flush GVA->{G,H}PA mappings from the TLB if vpid is
         * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
         * i.e. no explicit INVVPID is necessary.
         */
-       vpid_sync_context(to_vmx(vcpu)->vpid);
+       vpid_sync_context(vmx_get_current_vpid(vcpu));
 }
 
 void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu)
@@ -2984,7 +2991,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
        mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
        mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3);
 
-       kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+       kvm_register_mark_available(vcpu, VCPU_EXREG_PDPTR);
 }
 
 #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
@@ -3109,9 +3116,9 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
 
                if (!enable_unrestricted_guest && !is_paging(vcpu))
                        guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
-               else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+               else if (kvm_register_is_dirty(vcpu, VCPU_EXREG_CR3))
                        guest_cr3 = vcpu->arch.cr3;
-               else /* vmcs01.GUEST_CR3 is already up-to-date. */
+               else /* vmcs.GUEST_CR3 is already up-to-date. */
                        update_guest_cr3 = false;
                vmx_ept_load_pdptrs(vcpu);
        } else {
@@ -3686,6 +3693,19 @@ void free_vpid(int vpid)
        spin_unlock(&vmx_vpid_lock);
 }
 
+static void vmx_msr_bitmap_l01_changed(struct vcpu_vmx *vmx)
+{
+       /*
+        * When KVM is a nested hypervisor on top of Hyper-V and uses
+        * 'Enlightened MSR Bitmap' feature L0 needs to know that MSR
+        * bitmap has changed.
+        */
+       if (static_branch_unlikely(&enable_evmcs))
+               evmcs_touch_msr_bitmap();
+
+       vmx->nested.force_msr_bitmap_recalc = true;
+}
+
 void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3694,8 +3714,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
        if (!cpu_has_vmx_msr_bitmap())
                return;
 
-       if (static_branch_unlikely(&enable_evmcs))
-               evmcs_touch_msr_bitmap();
+       vmx_msr_bitmap_l01_changed(vmx);
 
        /*
         * Mark the desired intercept state in shadow bitmap, this is needed
@@ -3739,8 +3758,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
        if (!cpu_has_vmx_msr_bitmap())
                return;
 
-       if (static_branch_unlikely(&enable_evmcs))
-               evmcs_touch_msr_bitmap();
+       vmx_msr_bitmap_l01_changed(vmx);
 
        /*
         * Mark the desired intercept state in shadow bitmap, this is needed
@@ -3930,6 +3948,19 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
                 */
                vmx->nested.pi_pending = true;
                kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+               /*
+                * This pairs with the smp_mb_*() after setting vcpu->mode in
+                * vcpu_enter_guest() to guarantee the vCPU sees the event
+                * request if triggering a posted interrupt "fails" because
+                * vcpu->mode != IN_GUEST_MODE.  The extra barrier is needed as
+                * the smb_wmb() in kvm_make_request() only ensures everything
+                * done before making the request is visible when the request
+                * is visible, it doesn't ensure ordering between the store to
+                * vcpu->requests and the load from vcpu->mode.
+                */
+               smp_mb__after_atomic();
+
                /* the PIR and ON have been set by L1. */
                if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
                        kvm_vcpu_kick(vcpu);
@@ -3963,8 +3994,13 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
        if (pi_test_and_set_on(&vmx->pi_desc))
                return 0;
 
-       if (vcpu != kvm_get_running_vcpu() &&
-           !kvm_vcpu_trigger_posted_interrupt(vcpu, false))
+       /*
+        * The implied barrier in pi_test_and_set_on() pairs with the smp_mb_*()
+        * after setting vcpu->mode in vcpu_enter_guest(), thus the vCPU is
+        * guaranteed to see PID.ON=1 and sync the PIR to IRR if triggering a
+        * posted interrupt "fails" because vcpu->mode != IN_GUEST_MODE.
+        */
+       if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
                kvm_vcpu_kick(vcpu);
 
        return 0;
@@ -4021,6 +4057,12 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 
        rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
        vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
+
+       /*
+        * If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites
+        * HOST_IA32_SYSENTER_ESP.
+        */
+       vmcs_writel(HOST_IA32_SYSENTER_ESP, 0);
        rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl);
        vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl);   /* 22.2.3 */
 
@@ -4039,8 +4081,10 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
 
        vcpu->arch.cr4_guest_owned_bits = KVM_POSSIBLE_CR4_GUEST_BITS &
                                          ~vcpu->arch.cr4_guest_rsvd_bits;
-       if (!enable_ept)
-               vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_PGE;
+       if (!enable_ept) {
+               vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_TLBFLUSH_BITS;
+               vcpu->arch.cr4_guest_owned_bits &= ~X86_CR4_PDPTR_BITS;
+       }
        if (is_guest_mode(&vmx->vcpu))
                vcpu->arch.cr4_guest_owned_bits &=
                        ~get_vmcs12(vcpu)->cr4_guest_host_mask;
@@ -4692,7 +4736,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
                if (kvm_emulate_instruction(vcpu, 0)) {
                        if (vcpu->arch.halt_request) {
                                vcpu->arch.halt_request = 0;
-                               return kvm_vcpu_halt(vcpu);
+                               return kvm_emulate_halt_noskip(vcpu);
                        }
                        return 1;
                }
@@ -5363,7 +5407,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 
                if (vcpu->arch.halt_request) {
                        vcpu->arch.halt_request = 0;
-                       return kvm_vcpu_halt(vcpu);
+                       return kvm_emulate_halt_noskip(vcpu);
                }
 
                /*
@@ -5881,18 +5925,14 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
                vmx_flush_pml_buffer(vcpu);
 
        /*
-        * We should never reach this point with a pending nested VM-Enter, and
-        * more specifically emulation of L2 due to invalid guest state (see
-        * below) should never happen as that means we incorrectly allowed a
-        * nested VM-Enter with an invalid vmcs12.
+        * KVM should never reach this point with a pending nested VM-Enter.
+        * More specifically, short-circuiting VM-Entry to emulate L2 due to
+        * invalid guest state should never happen as that means KVM knowingly
+        * allowed a nested VM-Enter with an invalid vmcs12.  More below.
         */
        if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
                return -EIO;
 
-       /* If guest state is invalid, start emulating */
-       if (vmx->emulation_required)
-               return handle_invalid_guest_state(vcpu);
-
        if (is_guest_mode(vcpu)) {
                /*
                 * PML is never enabled when running L2, bail immediately if a
@@ -5914,10 +5954,30 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
                 */
                nested_mark_vmcs12_pages_dirty(vcpu);
 
+               /*
+                * Synthesize a triple fault if L2 state is invalid.  In normal
+                * operation, nested VM-Enter rejects any attempt to enter L2
+                * with invalid state.  However, those checks are skipped if
+                * state is being stuffed via RSM or KVM_SET_NESTED_STATE.  If
+                * L2 state is invalid, it means either L1 modified SMRAM state
+                * or userspace provided bad state.  Synthesize TRIPLE_FAULT as
+                * doing so is architecturally allowed in the RSM case, and is
+                * the least awful solution for the userspace case without
+                * risking false positives.
+                */
+               if (vmx->emulation_required) {
+                       nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
+                       return 1;
+               }
+
                if (nested_vmx_reflect_vmexit(vcpu))
                        return 1;
        }
 
+       /* If guest state is invalid, start emulating.  L2 is handled above. */
+       if (vmx->emulation_required)
+               return handle_invalid_guest_state(vcpu);
+
        if (exit_reason.failed_vmentry) {
                dump_vmcs(vcpu);
                vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -6262,9 +6322,9 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        int max_irr;
-       bool max_irr_updated;
+       bool got_posted_interrupt;
 
-       if (KVM_BUG_ON(!vcpu->arch.apicv_active, vcpu->kvm))
+       if (KVM_BUG_ON(!enable_apicv, vcpu->kvm))
                return -EIO;
 
        if (pi_test_on(&vmx->pi_desc)) {
@@ -6274,22 +6334,33 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
                 * But on x86 this is just a compiler barrier anyway.
                 */
                smp_mb__after_atomic();
-               max_irr_updated =
+               got_posted_interrupt =
                        kvm_apic_update_irr(vcpu, vmx->pi_desc.pir, &max_irr);
-
-               /*
-                * If we are running L2 and L1 has a new pending interrupt
-                * which can be injected, this may cause a vmexit or it may
-                * be injected into L2.  Either way, this interrupt will be
-                * processed via KVM_REQ_EVENT, not RVI, because we do not use
-                * virtual interrupt delivery to inject L1 interrupts into L2.
-                */
-               if (is_guest_mode(vcpu) && max_irr_updated)
-                       kvm_make_request(KVM_REQ_EVENT, vcpu);
        } else {
                max_irr = kvm_lapic_find_highest_irr(vcpu);
+               got_posted_interrupt = false;
        }
-       vmx_hwapic_irr_update(vcpu, max_irr);
+
+       /*
+        * Newly recognized interrupts are injected via either virtual interrupt
+        * delivery (RVI) or KVM_REQ_EVENT.  Virtual interrupt delivery is
+        * disabled in two cases:
+        *
+        * 1) If L2 is running and the vCPU has a new pending interrupt.  If L1
+        * wants to exit on interrupts, KVM_REQ_EVENT is needed to synthesize a
+        * VM-Exit to L1.  If L1 doesn't want to exit, the interrupt is injected
+        * into L2, but KVM doesn't use virtual interrupt delivery to inject
+        * interrupts into L2, and so KVM_REQ_EVENT is again needed.
+        *
+        * 2) If APICv is disabled for this vCPU, assigned devices may still
+        * attempt to post interrupts.  The posted interrupt vector will cause
+        * a VM-Exit and the subsequent entry will call sync_pir_to_irr.
+        */
+       if (!is_guest_mode(vcpu) && kvm_vcpu_apicv_active(vcpu))
+               vmx_set_rvi(max_irr);
+       else if (got_posted_interrupt)
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+
        return max_irr;
 }
 
@@ -6588,7 +6659,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       unsigned long cr3, cr4;
+       unsigned long cr4;
 
        /* Record the guest's net vcpu time for enforced NMI injections. */
        if (unlikely(!enable_vnmi &&
@@ -6601,9 +6672,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
         * consistency check VM-Exit due to invalid guest state and bail.
         */
        if (unlikely(vmx->emulation_required)) {
-
-               /* We don't emulate invalid state of a nested guest */
-               vmx->fail = is_guest_mode(vcpu);
+               vmx->fail = 0;
 
                vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
                vmx->exit_reason.failed_vmentry = 1;
@@ -6631,12 +6700,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
                vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
        if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
                vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
-
-       cr3 = __get_current_cr3_fast();
-       if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
-               vmcs_writel(HOST_CR3, cr3);
-               vmx->loaded_vmcs->host_state.cr3 = cr3;
-       }
+       vcpu->arch.regs_dirty = 0;
 
        cr4 = cr4_read_shadow();
        if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
@@ -6725,7 +6789,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
        loadsegment(es, __USER_DS);
 #endif
 
-       vmx_register_cache_reset(vcpu);
+       vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
 
        pt_guest_exit(vmx);
 
@@ -6826,6 +6890,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
        if (err < 0)
                goto free_pml;
 
+       /*
+        * Use Hyper-V 'Enlightened MSR Bitmap' feature when KVM runs as a
+        * nested (L1) hypervisor and Hyper-V in L0 supports it. Enable the
+        * feature only for vmcs01, KVM currently isn't equipped to realize any
+        * performance benefits from enabling it for vmcs02.
+        */
+       if (IS_ENABLED(CONFIG_HYPERV) && static_branch_unlikely(&enable_evmcs) &&
+           (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
+               struct hv_enlightened_vmcs *evmcs = (void *)vmx->vmcs01.vmcs;
+
+               evmcs->hv_enlightenments_control.msr_bitmap = 1;
+       }
+
        /* The MSR bitmap starts with all ones */
        bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
        bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
@@ -6931,7 +7008,6 @@ static int __init vmx_check_processor_compat(void)
 static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
        u8 cache;
-       u64 ipat = 0;
 
        /* We wanted to honor guest CD/MTRR/PAT, but doing so could result in
         * memory aliases with conflicting memory types and sometimes MCEs.
@@ -6951,30 +7027,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
         * EPT memory type is used to emulate guest CD/MTRR.
         */
 
-       if (is_mmio) {
-               cache = MTRR_TYPE_UNCACHABLE;
-               goto exit;
-       }
+       if (is_mmio)
+               return MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
 
-       if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
-               ipat = VMX_EPT_IPAT_BIT;
-               cache = MTRR_TYPE_WRBACK;
-               goto exit;
-       }
+       if (!kvm_arch_has_noncoherent_dma(vcpu->kvm))
+               return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
 
        if (kvm_read_cr0(vcpu) & X86_CR0_CD) {
-               ipat = VMX_EPT_IPAT_BIT;
                if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
                        cache = MTRR_TYPE_WRBACK;
                else
                        cache = MTRR_TYPE_UNCACHABLE;
-               goto exit;
-       }
 
-       cache = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+               return (cache << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
+       }
 
-exit:
-       return (cache << VMX_EPT_MT_EPTE_SHIFT) | ipat;
+       return kvm_mtrr_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT;
 }
 
 static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx, u32 new_ctl)
@@ -7509,6 +7577,7 @@ static void hardware_unsetup(void)
 static bool vmx_check_apicv_inhibit_reasons(ulong bit)
 {
        ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
+                         BIT(APICV_INHIBIT_REASON_ABSENT) |
                          BIT(APICV_INHIBIT_REASON_HYPERV) |
                          BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
 
@@ -7558,6 +7627,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .cache_reg = vmx_cache_reg,
        .get_rflags = vmx_get_rflags,
        .set_rflags = vmx_set_rflags,
+       .get_if_flag = vmx_get_if_flag,
 
        .tlb_flush_all = vmx_flush_tlb_all,
        .tlb_flush_current = vmx_flush_tlb_current,
@@ -7683,7 +7753,7 @@ static __init int hardware_setup(void)
 {
        unsigned long host_bndcfgs;
        struct desc_ptr dt;
-       int r, ept_lpage_level;
+       int r;
 
        store_idt(&dt);
        host_idt_base = dt.address;
@@ -7761,10 +7831,10 @@ static __init int hardware_setup(void)
                ple_window_shrink = 0;
        }
 
-       if (!cpu_has_vmx_apicv()) {
+       if (!cpu_has_vmx_apicv())
                enable_apicv = 0;
+       if (!enable_apicv)
                vmx_x86_ops.sync_pir_to_irr = NULL;
-       }
 
        if (cpu_has_vmx_tsc_scaling()) {
                kvm_has_tsc_control = true;
@@ -7780,16 +7850,8 @@ static __init int hardware_setup(void)
                kvm_mmu_set_ept_masks(enable_ept_ad_bits,
                                      cpu_has_vmx_ept_execute_only());
 
-       if (!enable_ept)
-               ept_lpage_level = 0;
-       else if (cpu_has_vmx_ept_1g_page())
-               ept_lpage_level = PG_LEVEL_1G;
-       else if (cpu_has_vmx_ept_2m_page())
-               ept_lpage_level = PG_LEVEL_2M;
-       else
-               ept_lpage_level = PG_LEVEL_4K;
        kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
-                         ept_lpage_level);
+                         ept_caps_to_lpage_level(vmx_capability.ept));
 
        /*
         * Only enable PML when hardware supports PML feature, and both EPT
index 4df2ac2..6c2c1af 100644 (file)
@@ -158,6 +158,15 @@ struct nested_vmx {
        bool need_vmcs12_to_shadow_sync;
        bool dirty_vmcs12;
 
+       /*
+        * Indicates whether MSR bitmap for L2 needs to be rebuilt due to
+        * changes in MSR bitmap for L1 or switching to a different L2. Note,
+        * this flag can only be used reliably in conjunction with a paravirt L1
+        * which informs L0 whether any changes to MSR bitmap for L2 were done
+        * on its side.
+        */
+       bool force_msr_bitmap_recalc;
+
        /*
         * Indicates lazily loaded guest state has not yet been decached from
         * vmcs02.
@@ -473,19 +482,21 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
 BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
 BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
 
-static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu)
-{
-       vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
-                                 | (1 << VCPU_EXREG_RFLAGS)
-                                 | (1 << VCPU_EXREG_PDPTR)
-                                 | (1 << VCPU_EXREG_SEGMENTS)
-                                 | (1 << VCPU_EXREG_CR0)
-                                 | (1 << VCPU_EXREG_CR3)
-                                 | (1 << VCPU_EXREG_CR4)
-                                 | (1 << VCPU_EXREG_EXIT_INFO_1)
-                                 | (1 << VCPU_EXREG_EXIT_INFO_2));
-       vcpu->arch.regs_dirty = 0;
-}
+/*
+ * VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the
+ * cache on demand.  Other registers not listed here are synced to
+ * the cache immediately after VM-Exit.
+ */
+#define VMX_REGS_LAZY_LOAD_SET ((1 << VCPU_REGS_RIP) |         \
+                               (1 << VCPU_REGS_RSP) |          \
+                               (1 << VCPU_EXREG_RFLAGS) |      \
+                               (1 << VCPU_EXREG_PDPTR) |       \
+                               (1 << VCPU_EXREG_SEGMENTS) |    \
+                               (1 << VCPU_EXREG_CR0) |         \
+                               (1 << VCPU_EXREG_CR3) |         \
+                               (1 << VCPU_EXREG_CR4) |         \
+                               (1 << VCPU_EXREG_EXIT_INFO_1) | \
+                               (1 << VCPU_EXREG_EXIT_INFO_2))
 
 static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
 {
index 5a403d9..8912f44 100644 (file)
@@ -118,6 +118,7 @@ static void enter_smm(struct kvm_vcpu *vcpu);
 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 static void store_regs(struct kvm_vcpu *vcpu);
 static int sync_regs(struct kvm_vcpu *vcpu);
+static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu);
 
 static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
 static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
@@ -710,6 +711,17 @@ int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
 }
 EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
 
+static int complete_emulated_insn_gp(struct kvm_vcpu *vcpu, int err)
+{
+       if (err) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+       }
+
+       return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE | EMULTYPE_SKIP |
+                                      EMULTYPE_COMPLETE_USER_EXIT);
+}
+
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 {
        ++vcpu->stat.pf_guest;
@@ -798,8 +810,9 @@ static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
 /*
  * Load the pae pdptrs.  Return 1 if they are all valid, 0 otherwise.
  */
-int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
+int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
        gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
        gpa_t real_gpa;
        int i;
@@ -810,8 +823,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
         * If the MMU is nested, CR3 holds an L2 GPA and needs to be translated
         * to an L1 GPA.
         */
-       real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(pdpt_gfn),
-                                     PFERR_USER_MASK | PFERR_WRITE_MASK, NULL);
+       real_gpa = kvm_translate_gpa(vcpu, mmu, gfn_to_gpa(pdpt_gfn),
+                                    PFERR_USER_MASK | PFERR_WRITE_MASK, NULL);
        if (real_gpa == UNMAPPED_GVA)
                return 0;
 
@@ -828,8 +841,12 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
                }
        }
 
-       memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
-       kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+       kvm_register_mark_available(vcpu, VCPU_EXREG_PDPTR);
+       if (memcmp(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs))) {
+               memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
+               kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+               kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
+       }
        vcpu->arch.pdptrs_from_userspace = false;
 
        return 1;
@@ -856,7 +873,6 @@ EXPORT_SYMBOL_GPL(kvm_post_set_cr0);
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        unsigned long old_cr0 = kvm_read_cr0(vcpu);
-       unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
 
        cr0 |= X86_CR0_ET;
 
@@ -886,11 +902,12 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        }
 #endif
        if (!(vcpu->arch.efer & EFER_LME) && (cr0 & X86_CR0_PG) &&
-           is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) &&
-           !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)))
+           is_pae(vcpu) && ((cr0 ^ old_cr0) & X86_CR0_PDPTR_BITS) &&
+           !load_pdptrs(vcpu, kvm_read_cr3(vcpu)))
                return 1;
 
-       if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
+       if (!(cr0 & X86_CR0_PG) &&
+           (is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
                return 1;
 
        static_call(kvm_x86_set_cr0)(vcpu, cr0);
@@ -1050,8 +1067,6 @@ EXPORT_SYMBOL_GPL(kvm_post_set_cr4);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
        unsigned long old_cr4 = kvm_read_cr4(vcpu);
-       unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
-                                  X86_CR4_SMEP;
 
        if (!kvm_is_valid_cr4(vcpu, cr4))
                return 1;
@@ -1062,9 +1077,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                if ((cr4 ^ old_cr4) & X86_CR4_LA57)
                        return 1;
        } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE)
-                  && ((cr4 ^ old_cr4) & pdptr_bits)
-                  && !load_pdptrs(vcpu, vcpu->arch.walk_mmu,
-                                  kvm_read_cr3(vcpu)))
+                  && ((cr4 ^ old_cr4) & X86_CR4_PDPTR_BITS)
+                  && !load_pdptrs(vcpu, kvm_read_cr3(vcpu)))
                return 1;
 
        if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
@@ -1153,14 +1167,14 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
                return 1;
 
-       if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
+       if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, cr3))
                return 1;
 
        if (cr3 != kvm_read_cr3(vcpu))
                kvm_mmu_new_pgd(vcpu, cr3);
 
        vcpu->arch.cr3 = cr3;
-       kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
+       kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
 
 handle_tlb_flush:
        /*
@@ -1330,7 +1344,7 @@ static const u32 msrs_to_save_all[] = {
        MSR_IA32_UMWAIT_CONTROL,
 
        MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
-       MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
+       MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
        MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
        MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
        MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
@@ -1814,22 +1828,36 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
 }
 EXPORT_SYMBOL_GPL(kvm_set_msr);
 
-static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
+static void complete_userspace_rdmsr(struct kvm_vcpu *vcpu)
 {
-       int err = vcpu->run->msr.error;
-       if (!err) {
+       if (!vcpu->run->msr.error) {
                kvm_rax_write(vcpu, (u32)vcpu->run->msr.data);
                kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
        }
+}
+
+static int complete_emulated_msr_access(struct kvm_vcpu *vcpu)
+{
+       return complete_emulated_insn_gp(vcpu, vcpu->run->msr.error);
+}
 
-       return static_call(kvm_x86_complete_emulated_msr)(vcpu, err);
+static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
+{
+       complete_userspace_rdmsr(vcpu);
+       return complete_emulated_msr_access(vcpu);
 }
 
-static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
+static int complete_fast_msr_access(struct kvm_vcpu *vcpu)
 {
        return static_call(kvm_x86_complete_emulated_msr)(vcpu, vcpu->run->msr.error);
 }
 
+static int complete_fast_rdmsr(struct kvm_vcpu *vcpu)
+{
+       complete_userspace_rdmsr(vcpu);
+       return complete_fast_msr_access(vcpu);
+}
+
 static u64 kvm_msr_reason(int r)
 {
        switch (r) {
@@ -1864,18 +1892,6 @@ static int kvm_msr_user_space(struct kvm_vcpu *vcpu, u32 index,
        return 1;
 }
 
-static int kvm_get_msr_user_space(struct kvm_vcpu *vcpu, u32 index, int r)
-{
-       return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_RDMSR, 0,
-                                  complete_emulated_rdmsr, r);
-}
-
-static int kvm_set_msr_user_space(struct kvm_vcpu *vcpu, u32 index, u64 data, int r)
-{
-       return kvm_msr_user_space(vcpu, index, KVM_EXIT_X86_WRMSR, data,
-                                  complete_emulated_wrmsr, r);
-}
-
 int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
 {
        u32 ecx = kvm_rcx_read(vcpu);
@@ -1884,18 +1900,16 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
 
        r = kvm_get_msr(vcpu, ecx, &data);
 
-       /* MSR read failed? See if we should ask user space */
-       if (r && kvm_get_msr_user_space(vcpu, ecx, r)) {
-               /* Bounce to user space */
-               return 0;
-       }
-
        if (!r) {
                trace_kvm_msr_read(ecx, data);
 
                kvm_rax_write(vcpu, data & -1u);
                kvm_rdx_write(vcpu, (data >> 32) & -1u);
        } else {
+               /* MSR read failed? See if we should ask user space */
+               if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_RDMSR, 0,
+                                      complete_fast_rdmsr, r))
+                       return 0;
                trace_kvm_msr_read_ex(ecx);
        }
 
@@ -1911,19 +1925,18 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
 
        r = kvm_set_msr(vcpu, ecx, data);
 
-       /* MSR write failed? See if we should ask user space */
-       if (r && kvm_set_msr_user_space(vcpu, ecx, data, r))
-               /* Bounce to user space */
-               return 0;
-
-       /* Signal all other negative errors to userspace */
-       if (r < 0)
-               return r;
-
-       if (!r)
+       if (!r) {
                trace_kvm_msr_write(ecx, data);
-       else
+       } else {
+               /* MSR write failed? See if we should ask user space */
+               if (kvm_msr_user_space(vcpu, ecx, KVM_EXIT_X86_WRMSR, data,
+                                      complete_fast_msr_access, r))
+                       return 0;
+               /* Signal all other negative errors to userspace */
+               if (r < 0)
+                       return r;
                trace_kvm_msr_write_ex(ecx, data);
+       }
 
        return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
 }
@@ -2816,7 +2829,7 @@ static void kvm_end_pvclock_update(struct kvm *kvm)
 {
        struct kvm_arch *ka = &kvm->arch;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        write_seqcount_end(&ka->pvclock_sc);
        raw_spin_unlock_irq(&ka->tsc_write_lock);
@@ -3065,7 +3078,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 static void kvmclock_update_fn(struct work_struct *work)
 {
-       int i;
+       unsigned long i;
        struct delayed_work *dwork = to_delayed_work(work);
        struct kvm_arch *ka = container_of(dwork, struct kvm_arch,
                                           kvmclock_update_work);
@@ -3258,6 +3271,29 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
        static_call(kvm_x86_tlb_flush_guest)(vcpu);
 }
 
+
+static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
+{
+       ++vcpu->stat.tlb_flush;
+       static_call(kvm_x86_tlb_flush_current)(vcpu);
+}
+
+/*
+ * Service "local" TLB flush requests, which are specific to the current MMU
+ * context.  In addition to the generic event handling in vcpu_enter_guest(),
+ * TLB flushes that are targeted at an MMU context also need to be serviced
+ * prior before nested VM-Enter/VM-Exit.
+ */
+void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu)
+{
+       if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
+               kvm_vcpu_flush_tlb_current(vcpu);
+
+       if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
+               kvm_vcpu_flush_tlb_guest(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_service_local_tlb_flush_requests);
+
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
        struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
@@ -3389,7 +3425,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
                if (!msr_info->host_initiated)
                        return 1;
-               if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent))
+               if (kvm_get_msr_feature(&msr_ent))
                        return 1;
                if (data & ~msr_ent.data)
                        return 1;
@@ -4133,6 +4169,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_SGX_ATTRIBUTE:
 #endif
        case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
+       case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
        case KVM_CAP_SREGS2:
        case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
        case KVM_CAP_VCPU_ATTRIBUTES:
@@ -4448,8 +4485,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
                                    struct kvm_lapic_state *s)
 {
-       if (vcpu->arch.apicv_active)
-               static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+       static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
 
        return kvm_apic_get_state(vcpu, s);
 }
@@ -5124,6 +5160,17 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                struct kvm_cpuid __user *cpuid_arg = argp;
                struct kvm_cpuid cpuid;
 
+               /*
+                * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
+                * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
+                * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
+                * faults due to reusing SPs/SPTEs.  In practice no sane VMM mucks with
+                * the core vCPU model on the fly, so fail.
+                */
+               r = -EINVAL;
+               if (vcpu->arch.last_vmentry_cpu != -1)
+                       goto out;
+
                r = -EFAULT;
                if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
                        goto out;
@@ -5134,6 +5181,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                struct kvm_cpuid2 __user *cpuid_arg = argp;
                struct kvm_cpuid2 cpuid;
 
+               /*
+                * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
+                * KVM_SET_CPUID case above.
+                */
+               r = -EINVAL;
+               if (vcpu->arch.last_vmentry_cpu != -1)
+                       goto out;
+
                r = -EFAULT;
                if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
                        goto out;
@@ -5650,7 +5705,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
         * VM-Exit.
         */
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        kvm_for_each_vcpu(i, vcpu, kvm)
                kvm_vcpu_kick(vcpu);
@@ -5698,6 +5753,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                smp_wmb();
                kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
                kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
+               kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
                r = 0;
 split_irqchip_unlock:
                mutex_unlock(&kvm->lock);
@@ -5918,7 +5974,8 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
 static int kvm_arch_suspend_notifier(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
-       int i, ret = 0;
+       unsigned long i;
+       int ret = 0;
 
        mutex_lock(&kvm->lock);
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -6078,6 +6135,7 @@ set_identity_unlock:
                /* Write kvm->irq_routing before enabling irqchip_in_kernel. */
                smp_wmb();
                kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
+               kvm_request_apicv_update(kvm, true, APICV_INHIBIT_REASON_ABSENT);
        create_irqchip_unlock:
                mutex_unlock(&kvm->lock);
                break;
@@ -6460,13 +6518,14 @@ void kvm_get_segment(struct kvm_vcpu *vcpu,
 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
                           struct x86_exception *exception)
 {
+       struct kvm_mmu *mmu = vcpu->arch.mmu;
        gpa_t t_gpa;
 
        BUG_ON(!mmu_is_nested(vcpu));
 
        /* NPT walks are always user-walks */
        access |= PFERR_USER_MASK;
-       t_gpa  = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception);
+       t_gpa  = mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
 
        return t_gpa;
 }
@@ -6474,25 +6533,31 @@ gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
                              struct x86_exception *exception)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
        u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
-       return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
+       return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
 
  gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
                                struct x86_exception *exception)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
        u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
        access |= PFERR_FETCH_MASK;
-       return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
+       return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
 }
 
 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
                               struct x86_exception *exception)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
        u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
        access |= PFERR_WRITE_MASK;
-       return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
+       return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
 
@@ -6500,19 +6565,21 @@ EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
                                struct x86_exception *exception)
 {
-       return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
+       return mmu->gva_to_gpa(vcpu, mmu, gva, 0, exception);
 }
 
 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
                                      struct kvm_vcpu *vcpu, u32 access,
                                      struct x86_exception *exception)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
        void *data = val;
        int r = X86EMUL_CONTINUE;
 
        while (bytes) {
-               gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access,
-                                                           exception);
+               gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access, exception);
                unsigned offset = addr & (PAGE_SIZE-1);
                unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
                int ret;
@@ -6540,13 +6607,14 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
                                struct x86_exception *exception)
 {
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
        u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
        unsigned offset;
        int ret;
 
        /* Inline kvm_read_guest_virt_helper for speed.  */
-       gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
-                                                   exception);
+       gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access|PFERR_FETCH_MASK,
+                                   exception);
        if (unlikely(gpa == UNMAPPED_GVA))
                return X86EMUL_PROPAGATE_FAULT;
 
@@ -6605,13 +6673,12 @@ static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes
                                      struct kvm_vcpu *vcpu, u32 access,
                                      struct x86_exception *exception)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
        void *data = val;
        int r = X86EMUL_CONTINUE;
 
        while (bytes) {
-               gpa_t gpa =  vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
-                                                            access,
-                                                            exception);
+               gpa_t gpa = mmu->gva_to_gpa(vcpu, mmu, addr, access, exception);
                unsigned offset = addr & (PAGE_SIZE-1);
                unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
                int ret;
@@ -6698,6 +6765,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
                                gpa_t *gpa, struct x86_exception *exception,
                                bool write)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
        u32 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0)
                | (write ? PFERR_WRITE_MASK : 0);
 
@@ -6715,7 +6783,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
                return 1;
        }
 
-       *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
+       *gpa = mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
 
        if (*gpa == UNMAPPED_GVA)
                return -1;
@@ -7077,7 +7145,13 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
                           unsigned short port, void *val, unsigned int count)
 {
        if (vcpu->arch.pio.count) {
-               /* Complete previous iteration.  */
+               /*
+                * Complete a previous iteration that required userspace I/O.
+                * Note, @count isn't guaranteed to match pio.count as userspace
+                * can modify ECX before rerunning the vCPU.  Ignore any such
+                * shenanigans as KVM doesn't support modifying the rep count,
+                * and the emulator ensures @count doesn't overflow the buffer.
+                */
        } else {
                int r = __emulator_pio_in(vcpu, size, port, count);
                if (!r)
@@ -7086,7 +7160,6 @@ static int emulator_pio_in(struct kvm_vcpu *vcpu, int size,
                /* Results already available, fall through.  */
        }
 
-       WARN_ON(count != vcpu->arch.pio.count);
        complete_emulator_pio_in(vcpu, val);
        return 1;
 }
@@ -7344,7 +7417,8 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
 
        r = kvm_get_msr(vcpu, msr_index, pdata);
 
-       if (r && kvm_get_msr_user_space(vcpu, msr_index, r)) {
+       if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_RDMSR, 0,
+                                   complete_emulated_rdmsr, r)) {
                /* Bounce to user space */
                return X86EMUL_IO_NEEDED;
        }
@@ -7360,7 +7434,8 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
 
        r = kvm_set_msr(vcpu, msr_index, data);
 
-       if (r && kvm_set_msr_user_space(vcpu, msr_index, data, r)) {
+       if (r && kvm_msr_user_space(vcpu, msr_index, KVM_EXIT_X86_WRMSR, data,
+                                   complete_emulated_msr_access, r)) {
                /* Bounce to user space */
                return X86EMUL_IO_NEEDED;
        }
@@ -8078,12 +8153,23 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
        }
 
        /*
-        * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
-        * for kvm_skip_emulated_instruction().  The caller is responsible for
-        * updating interruptibility state and injecting single-step #DBs.
+        * EMULTYPE_SKIP without EMULTYPE_COMPLETE_USER_EXIT is intended for
+        * use *only* by vendor callbacks for kvm_skip_emulated_instruction().
+        * The caller is responsible for updating interruptibility state and
+        * injecting single-step #DBs.
         */
        if (emulation_type & EMULTYPE_SKIP) {
-               kvm_rip_write(vcpu, ctxt->_eip);
+               if (ctxt->mode != X86EMUL_MODE_PROT64)
+                       ctxt->eip = (u32)ctxt->_eip;
+               else
+                       ctxt->eip = ctxt->_eip;
+
+               if (emulation_type & EMULTYPE_COMPLETE_USER_EXIT) {
+                       r = 1;
+                       goto writeback;
+               }
+
+               kvm_rip_write(vcpu, ctxt->eip);
                if (ctxt->eflags & X86_EFLAGS_RF)
                        kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
                return 1;
@@ -8147,11 +8233,15 @@ restart:
                        writeback = false;
                r = 0;
                vcpu->arch.complete_userspace_io = complete_emulated_mmio;
+       } else if (vcpu->arch.complete_userspace_io) {
+               writeback = false;
+               r = 0;
        } else if (r == EMULATION_RESTART)
                goto restart;
        else
                r = 1;
 
+writeback:
        if (writeback) {
                unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
                toggle_interruptibility(vcpu, ctxt->interruptibility);
@@ -8344,7 +8434,8 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
 {
        struct kvm *kvm;
        struct kvm_vcpu *vcpu;
-       int i, send_ipi = 0;
+       int send_ipi = 0;
+       unsigned long i;
 
        /*
         * We allow guests to temporarily run on slowing clocks,
@@ -8517,9 +8608,8 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 static void pvclock_gtod_update_fn(struct work_struct *work)
 {
        struct kvm *kvm;
-
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        mutex_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
@@ -8680,8 +8770,15 @@ void kvm_arch_exit(void)
 #endif
 }
 
-static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
+static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
 {
+       /*
+        * The vCPU has halted, e.g. executed HLT.  Update the run state if the
+        * local APIC is in-kernel, the run loop will detect the non-runnable
+        * state and halt the vCPU.  Exit to userspace if the local APIC is
+        * managed by userspace, in which case userspace is responsible for
+        * handling wake events.
+        */
        ++vcpu->stat.halt_exits;
        if (lapic_in_kernel(vcpu)) {
                vcpu->arch.mp_state = state;
@@ -8692,11 +8789,11 @@ static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
        }
 }
 
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu)
 {
-       return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
+       return __kvm_emulate_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
 }
-EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+EXPORT_SYMBOL_GPL(kvm_emulate_halt_noskip);
 
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
@@ -8705,7 +8802,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
         * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
         * KVM_EXIT_DEBUG here.
         */
-       return kvm_vcpu_halt(vcpu) && ret;
+       return kvm_emulate_halt_noskip(vcpu) && ret;
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
@@ -8713,7 +8810,8 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
 {
        int ret = kvm_skip_emulated_instruction(vcpu);
 
-       return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
+       return __kvm_emulate_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD,
+                                       KVM_EXIT_AP_RESET_HOLD) && ret;
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
 
@@ -8776,10 +8874,9 @@ static void kvm_apicv_init(struct kvm *kvm)
 {
        init_rwsem(&kvm->arch.apicv_update_lock);
 
-       if (enable_apicv)
-               clear_bit(APICV_INHIBIT_REASON_DISABLE,
-                         &kvm->arch.apicv_inhibit_reasons);
-       else
+       set_bit(APICV_INHIBIT_REASON_ABSENT,
+               &kvm->arch.apicv_inhibit_reasons);
+       if (!enable_apicv)
                set_bit(APICV_INHIBIT_REASON_DISABLE,
                        &kvm->arch.apicv_inhibit_reasons);
 }
@@ -8952,14 +9049,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
-       /*
-        * if_flag is obsolete and useless, so do not bother
-        * setting it for SEV-ES guests.  Userspace can just
-        * use kvm_run->ready_for_interrupt_injection.
-        */
-       kvm_run->if_flag = !vcpu->arch.guest_state_protected
-               && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
-
+       kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu);
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
 
@@ -9528,8 +9618,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
        if (irqchip_split(vcpu->kvm))
                kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
        else {
-               if (vcpu->arch.apicv_active)
-                       static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+               static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
                if (ioapic_in_kernel(vcpu->kvm))
                        kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
        }
@@ -9648,10 +9737,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                        /* Flushing all ASIDs flushes the current ASID... */
                        kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
                }
-               if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
-                       kvm_vcpu_flush_tlb_current(vcpu);
-               if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
-                       kvm_vcpu_flush_tlb_guest(vcpu);
+               kvm_service_local_tlb_flush_requests(vcpu);
 
                if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
@@ -9802,10 +9888,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        /*
         * This handles the case where a posted interrupt was
-        * notified with kvm_vcpu_kick.
+        * notified with kvm_vcpu_kick.  Assigned devices can
+        * use the POSTED_INTR_VECTOR even if APICv is disabled,
+        * so do it even if APICv is disabled on this vCPU.
         */
-       if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
-               static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+       if (kvm_lapic_enabled(vcpu))
+               static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
 
        if (kvm_vcpu_exit_request(vcpu)) {
                vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -9849,8 +9937,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
                        break;
 
-               if (vcpu->arch.apicv_active)
-                       static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+               if (kvm_lapic_enabled(vcpu))
+                       static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
 
                if (unlikely(kvm_vcpu_exit_request(vcpu))) {
                        exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
@@ -9956,7 +10044,10 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
        if (!kvm_arch_vcpu_runnable(vcpu) &&
            (!kvm_x86_ops.pre_block || static_call(kvm_x86_pre_block)(vcpu) == 0)) {
                srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-               kvm_vcpu_block(vcpu);
+               if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+                       kvm_vcpu_halt(vcpu);
+               else
+                       kvm_vcpu_block(vcpu);
                vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 
                if (kvm_x86_ops.post_block)
@@ -10516,7 +10607,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
        vcpu->arch.cr2 = sregs->cr2;
        *mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
        vcpu->arch.cr3 = sregs->cr3;
-       kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
+       kvm_register_mark_dirty(vcpu, VCPU_EXREG_CR3);
 
        kvm_set_cr8(vcpu, sregs->cr8);
 
@@ -10533,7 +10624,7 @@ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
        if (update_pdptrs) {
                idx = srcu_read_lock(&vcpu->kvm->srcu);
                if (is_pae_paging(vcpu)) {
-                       load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
+                       load_pdptrs(vcpu, kvm_read_cr3(vcpu));
                        *mmu_reset_needed = 1;
                }
                srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -10631,7 +10722,7 @@ static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
 {
        bool inhibit = false;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        down_write(&kvm->arch.apicv_update_lock);
 
@@ -11119,7 +11210,7 @@ int kvm_arch_hardware_enable(void)
 {
        struct kvm *kvm;
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
        int ret;
        u64 local_tsc;
        u64 max_tsc = 0;
@@ -11372,7 +11463,7 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 
 static void kvm_free_vcpus(struct kvm *kvm)
 {
-       unsigned int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        /*
@@ -11382,15 +11473,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
                kvm_clear_async_pf_completion_queue(vcpu);
                kvm_unload_vcpu_mmu(vcpu);
        }
-       kvm_for_each_vcpu(i, vcpu, kvm)
-               kvm_vcpu_destroy(vcpu);
 
-       mutex_lock(&kvm->lock);
-       for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
-               kvm->vcpus[i] = NULL;
-
-       atomic_set(&kvm->online_vcpus, 0);
-       mutex_unlock(&kvm->lock);
+       kvm_destroy_vcpus(kvm);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
@@ -11558,9 +11642,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages)
 }
 
 static int kvm_alloc_memslot_metadata(struct kvm *kvm,
-                                     struct kvm_memory_slot *slot,
-                                     unsigned long npages)
+                                     struct kvm_memory_slot *slot)
 {
+       unsigned long npages = slot->npages;
        int i, r;
 
        /*
@@ -11625,7 +11709,7 @@ out_free:
 void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
 {
        struct kvm_vcpu *vcpu;
-       int i;
+       unsigned long i;
 
        /*
         * memslots->generation has been incremented.
@@ -11639,13 +11723,18 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-                               struct kvm_memory_slot *memslot,
-                               const struct kvm_userspace_memory_region *mem,
-                               enum kvm_mr_change change)
+                                  const struct kvm_memory_slot *old,
+                                  struct kvm_memory_slot *new,
+                                  enum kvm_mr_change change)
 {
        if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
-               return kvm_alloc_memslot_metadata(kvm, memslot,
-                                                 mem->memory_size >> PAGE_SHIFT);
+               return kvm_alloc_memslot_metadata(kvm, new);
+
+       if (change == KVM_MR_FLAGS_ONLY)
+               memcpy(&new->arch, &old->arch, sizeof(old->arch));
+       else if (WARN_ON_ONCE(change != KVM_MR_DELETE))
+               return -EIO;
+
        return 0;
 }
 
@@ -11669,13 +11758,15 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
                                     const struct kvm_memory_slot *new,
                                     enum kvm_mr_change change)
 {
-       bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
+       u32 old_flags = old ? old->flags : 0;
+       u32 new_flags = new ? new->flags : 0;
+       bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES;
 
        /*
         * Update CPU dirty logging if dirty logging is being toggled.  This
         * applies to all operations.
         */
-       if ((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)
+       if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)
                kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
 
        /*
@@ -11693,7 +11784,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
         * MOVE/DELETE: The old mappings will already have been cleaned up by
         *              kvm_arch_flush_shadow_memslot().
         */
-       if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
+       if ((change != KVM_MR_FLAGS_ONLY) || (new_flags & KVM_MEM_READONLY))
                return;
 
        /*
@@ -11701,7 +11792,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
         * other flag is LOG_DIRTY_PAGES, i.e. something is wrong if dirty
         * logging isn't being toggled on or off.
         */
-       if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)))
+       if (WARN_ON_ONCE(!((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES)))
                return;
 
        if (!log_dirty_pages) {
@@ -11737,14 +11828,18 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-                               const struct kvm_userspace_memory_region *mem,
                                struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change)
 {
-       if (!kvm->arch.n_requested_mmu_pages)
-               kvm_mmu_change_mmu_pages(kvm,
-                               kvm_mmu_calculate_default_mmu_pages(kvm));
+       if (!kvm->arch.n_requested_mmu_pages &&
+           (change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
+               unsigned long nr_mmu_pages;
+
+               nr_mmu_pages = kvm->nr_memslot_pages / KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO;
+               nr_mmu_pages = max(nr_mmu_pages, KVM_MIN_ALLOC_MMU_PAGES);
+               kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
+       }
 
        kvm_mmu_slot_apply_flags(kvm, old, new, change);
 
@@ -12254,12 +12349,13 @@ EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value);
 
 void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code)
 {
+       struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
        struct x86_exception fault;
        u32 access = error_code &
                (PFERR_WRITE_MASK | PFERR_FETCH_MASK | PFERR_USER_MASK);
 
        if (!(error_code & PFERR_PRESENT_MASK) ||
-           vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, &fault) != UNMAPPED_GVA) {
+           mmu->gva_to_gpa(vcpu, mmu, gva, access, &fault) != UNMAPPED_GVA) {
                /*
                 * If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page
                 * tables probably do not match the TLB.  Just proceed
@@ -12596,6 +12692,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_ga_log);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_update_request);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_apicv_accept_irq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_enter);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_enter);
index 997669a..4abcd8d 100644 (file)
@@ -103,6 +103,7 @@ static inline unsigned int __shrink_ple_window(unsigned int val,
 
 #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
 
+void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
 int kvm_check_nested_events(struct kvm_vcpu *vcpu);
 
 static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
@@ -185,12 +186,6 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
        return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
 }
 
-static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu)
-{
-       ++vcpu->stat.tlb_flush;
-       static_call(kvm_x86_tlb_flush_current)(vcpu);
-}
-
 static inline int is_pae(struct kvm_vcpu *vcpu)
 {
        return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
index 4a3da75..38d24d2 100644 (file)
@@ -72,6 +72,7 @@ static void __init setup_real_mode(void)
 #ifdef CONFIG_X86_64
        u64 *trampoline_pgd;
        u64 efer;
+       int i;
 #endif
 
        base = (unsigned char *)real_mode_header;
@@ -128,8 +129,17 @@ static void __init setup_real_mode(void)
        trampoline_header->flags = 0;
 
        trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
+
+       /* Map the real mode stub as virtual == physical */
        trampoline_pgd[0] = trampoline_pgd_entry.pgd;
-       trampoline_pgd[511] = init_top_pgt[511].pgd;
+
+       /*
+        * Include the entirety of the kernel mapping into the trampoline
+        * PGD.  This way, all mappings present in the normal kernel page
+        * tables are usable while running on trampoline_pgd.
+        */
+       for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++)
+               trampoline_pgd[i] = init_top_pgt[i].pgd;
 #endif
 
        sme_sev_setup_real_mode(trampoline_header);
index 220dd96..444d824 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <../entry/calling.h>
 
 .pushsection .noinstr.text, "ax"
 /*
@@ -192,6 +193,25 @@ SYM_CODE_START(xen_iret)
        jmp hypercall_iret
 SYM_CODE_END(xen_iret)
 
+/*
+ * XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is
+ * also the kernel stack.  Reusing swapgs_restore_regs_and_return_to_usermode()
+ * in XEN pv would cause %rsp to move up to the top of the kernel stack and
+ * leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI
+ * interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET
+ * frame at the same address is useless.
+ */
+SYM_CODE_START(xenpv_restore_regs_and_return_to_usermode)
+       UNWIND_HINT_REGS
+       POP_REGS
+
+       /* stackleak_erase() can work safely on the kernel stack. */
+       STACKLEAK_ERASE_NOCLOBBER
+
+       addq    $8, %rsp        /* skip regs->orig_ax */
+       jmp xen_iret
+SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
+
 /*
  * Xen handles syscall callbacks much like ordinary exceptions, which
  * means we have:
index a8a0416..7b43593 100644 (file)
@@ -121,7 +121,6 @@ void flush_cache_page(struct vm_area_struct*,
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *);
-void flush_dcache_folio(struct folio *);
 
 void local_flush_cache_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end);
@@ -138,9 +137,7 @@ void local_flush_cache_page(struct vm_area_struct *vma,
 #define flush_cache_vunmap(start,end)                  do { } while (0)
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
 #define flush_dcache_page(page)                                do { } while (0)
-static inline void flush_dcache_folio(struct folio *folio) { }
 
 #define flush_icache_range local_flush_icache_range
 #define flush_cache_page(vma, addr, pfn)               do { } while (0)
index 104b327..3e3e1a5 100644 (file)
 446    common  landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    common  process_mrelease                sys_process_mrelease
+449    common  futex_waitv                     sys_futex_waitv
index b4dab2f..b1d087e 100644 (file)
@@ -753,8 +753,7 @@ struct block_device *blkdev_get_no_open(dev_t dev)
 
        if (!bdev)
                return NULL;
-       if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
-           !try_module_get(bdev->bd_disk->fops->owner)) {
+       if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN)) {
                put_device(&bdev->bd_device);
                return NULL;
        }
@@ -764,7 +763,6 @@ struct block_device *blkdev_get_no_open(dev_t dev)
 
 void blkdev_put_no_open(struct block_device *bdev)
 {
-       module_put(bdev->bd_disk->fops->owner);
        put_device(&bdev->bd_device);
 }
 
@@ -820,12 +818,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
        ret = -ENXIO;
        if (!disk_live(disk))
                goto abort_claiming;
+       if (!try_module_get(disk->fops->owner))
+               goto abort_claiming;
        if (bdev_is_partition(bdev))
                ret = blkdev_get_part(bdev, mode);
        else
                ret = blkdev_get_whole(bdev, mode);
        if (ret)
-               goto abort_claiming;
+               goto put_module;
        if (mode & FMODE_EXCL) {
                bd_finish_claiming(bdev, holder);
 
@@ -847,7 +847,8 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
        if (unblock_events)
                disk_unblock_events(disk);
        return bdev;
-
+put_module:
+       module_put(disk->fops->owner);
 abort_claiming:
        if (mode & FMODE_EXCL)
                bd_abort_claiming(bdev, holder);
@@ -956,6 +957,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
                blkdev_put_whole(bdev, mode);
        mutex_unlock(&disk->open_mutex);
 
+       module_put(disk->fops->owner);
        blkdev_put_no_open(bdev);
 }
 EXPORT_SYMBOL(blkdev_put);
index f0f38ca..1378d08 100644 (file)
@@ -1017,6 +1017,7 @@ EXPORT_SYMBOL(submit_bio);
 /**
  * bio_poll - poll for BIO completions
  * @bio: bio to poll for
+ * @iob: batches of IO
  * @flags: BLK_POLL_* flags that control the behavior
  *
  * Poll for completions on queue associated with the bio. Returns number of
index 8799fa7..8874a63 100644 (file)
@@ -860,13 +860,14 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
                if (iob->need_ts)
                        __blk_mq_end_request_acct(rq, now);
 
+               rq_qos_done(rq->q, rq);
+
                WRITE_ONCE(rq->state, MQ_RQ_IDLE);
                if (!refcount_dec_and_test(&rq->ref))
                        continue;
 
                blk_crypto_free_request(rq);
                blk_pm_mark_last_busy(rq);
-               rq_qos_done(rq->q, rq);
 
                if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) {
                        if (cur_hctx)
index a85c351..b62c87b 100644 (file)
@@ -998,7 +998,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
 static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf)
 {
        struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
-       struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx];
+       struct cpc_register_resource *reg;
+
+       if (!cpc_desc) {
+               pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
+               return -ENODEV;
+       }
+
+       reg = &cpc_desc->cpc_regs[reg_idx];
 
        if (CPC_IN_PCC(reg)) {
                int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
index e312eba..2366f54 100644 (file)
@@ -1084,21 +1084,17 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
  * Returns parent node of an ACPI device or data firmware node or %NULL if
  * not available.
  */
-struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode)
+static struct fwnode_handle *
+acpi_node_get_parent(const struct fwnode_handle *fwnode)
 {
        if (is_acpi_data_node(fwnode)) {
                /* All data nodes have parent pointer so just return that */
                return to_acpi_data_node(fwnode)->parent;
        } else if (is_acpi_device_node(fwnode)) {
-               acpi_handle handle, parent_handle;
-
-               handle = to_acpi_device_node(fwnode)->handle;
-               if (ACPI_SUCCESS(acpi_get_parent(handle, &parent_handle))) {
-                       struct acpi_device *adev;
+               struct device *dev = to_acpi_device_node(fwnode)->dev.parent;
 
-                       if (!acpi_bus_get_device(parent_handle, &adev))
-                               return acpi_fwnode_handle(adev);
-               }
+               if (dev)
+                       return acpi_fwnode_handle(to_acpi_device(dev));
        }
 
        return NULL;
index 49fb741..cffbe57 100644 (file)
@@ -2710,7 +2710,7 @@ static void binder_transaction(struct binder_proc *proc,
                t->from = thread;
        else
                t->from = NULL;
-       t->sender_euid = proc->cred->euid;
+       t->sender_euid = task_euid(proc->tsk);
        t->to_proc = target_proc;
        t->to_thread = target_thread;
        t->code = tr->code;
index 5b78e86..b9c7788 100644 (file)
@@ -827,7 +827,7 @@ static ssize_t ata_scsi_lpm_show(struct device *dev,
        if (ap->target_lpm_policy >= ARRAY_SIZE(ata_lpm_policy_names))
                return -EINVAL;
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
+       return sysfs_emit(buf, "%s\n",
                        ata_lpm_policy_names[ap->target_lpm_policy]);
 }
 DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR,
index 121635a..823c886 100644 (file)
@@ -55,14 +55,14 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc,
        /* Transfer multiple of 2 bytes */
        if (rw == READ) {
                if (swap)
-                       raw_insw_swapw((u16 *)data_addr, (u16 *)buf, words);
+                       raw_insw_swapw(data_addr, (u16 *)buf, words);
                else
-                       raw_insw((u16 *)data_addr, (u16 *)buf, words);
+                       raw_insw(data_addr, (u16 *)buf, words);
        } else {
                if (swap)
-                       raw_outsw_swapw((u16 *)data_addr, (u16 *)buf, words);
+                       raw_outsw_swapw(data_addr, (u16 *)buf, words);
                else
-                       raw_outsw((u16 *)data_addr, (u16 *)buf, words);
+                       raw_outsw(data_addr, (u16 *)buf, words);
        }
 
        /* Transfer trailing byte, if any. */
@@ -74,16 +74,16 @@ static unsigned int pata_falcon_data_xfer(struct ata_queued_cmd *qc,
 
                if (rw == READ) {
                        if (swap)
-                               raw_insw_swapw((u16 *)data_addr, (u16 *)pad, 1);
+                               raw_insw_swapw(data_addr, (u16 *)pad, 1);
                        else
-                               raw_insw((u16 *)data_addr, (u16 *)pad, 1);
+                               raw_insw(data_addr, (u16 *)pad, 1);
                        *buf = pad[0];
                } else {
                        pad[0] = *buf;
                        if (swap)
-                               raw_outsw_swapw((u16 *)data_addr, (u16 *)pad, 1);
+                               raw_outsw_swapw(data_addr, (u16 *)pad, 1);
                        else
-                               raw_outsw((u16 *)data_addr, (u16 *)pad, 1);
+                               raw_outsw(data_addr, (u16 *)pad, 1);
                }
                words++;
        }
index e5838b2..3b31a4f 100644 (file)
@@ -1394,6 +1394,14 @@ static int sata_fsl_init_controller(struct ata_host *host)
        return 0;
 }
 
+static void sata_fsl_host_stop(struct ata_host *host)
+{
+        struct sata_fsl_host_priv *host_priv = host->private_data;
+
+        iounmap(host_priv->hcr_base);
+        kfree(host_priv);
+}
+
 /*
  * scsi mid-layer and libata interface structures
  */
@@ -1426,6 +1434,8 @@ static struct ata_port_operations sata_fsl_ops = {
        .port_start = sata_fsl_port_start,
        .port_stop = sata_fsl_port_stop,
 
+       .host_stop      = sata_fsl_host_stop,
+
        .pmp_attach = sata_fsl_pmp_attach,
        .pmp_detach = sata_fsl_pmp_detach,
 };
@@ -1480,9 +1490,9 @@ static int sata_fsl_probe(struct platform_device *ofdev)
        host_priv->ssr_base = ssr_base;
        host_priv->csr_base = csr_base;
 
-       irq = irq_of_parse_and_map(ofdev->dev.of_node, 0);
-       if (!irq) {
-               dev_err(&ofdev->dev, "invalid irq from platform\n");
+       irq = platform_get_irq(ofdev, 0);
+       if (irq < 0) {
+               retval = irq;
                goto error_exit_with_cleanup;
        }
        host_priv->irq = irq;
@@ -1557,10 +1567,6 @@ static int sata_fsl_remove(struct platform_device *ofdev)
 
        ata_host_detach(host);
 
-       irq_dispose_mapping(host_priv->irq);
-       iounmap(host_priv->hcr_base);
-       kfree(host_priv);
-
        return 0;
 }
 
index a154cab..c3a36cf 100644 (file)
@@ -2103,7 +2103,7 @@ static int loop_control_remove(int idx)
        int ret;
 
        if (idx < 0) {
-               pr_warn("deleting an unspecified loop device is not supported.\n");
+               pr_warn_once("deleting an unspecified loop device is not supported.\n");
                return -EINVAL;
        }
                
index 97bf051..6ae3877 100644 (file)
@@ -316,7 +316,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
        struct request *req = bd->rq;
        struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
        unsigned long flags;
-       unsigned int num;
+       int num;
        int qid = hctx->queue_num;
        bool notify = false;
        blk_status_t status;
@@ -1049,7 +1049,6 @@ static struct virtio_driver virtio_blk = {
        .feature_table_size             = ARRAY_SIZE(features),
        .feature_table_legacy           = features_legacy,
        .feature_table_size_legacy      = ARRAY_SIZE(features_legacy),
-       .suppress_used_validation       = true,
        .driver.name                    = KBUILD_MODNAME,
        .driver.owner                   = THIS_MODULE,
        .id_table                       = id_table,
index 08d7953..2507112 100644 (file)
@@ -1853,12 +1853,14 @@ static const struct block_device_operations zram_devops = {
        .owner = THIS_MODULE
 };
 
+#ifdef CONFIG_ZRAM_WRITEBACK
 static const struct block_device_operations zram_wb_devops = {
        .open = zram_open,
        .submit_bio = zram_submit_bio,
        .swap_slot_free_notify = zram_slot_free_notify,
        .owner = THIS_MODULE
 };
+#endif
 
 static DEVICE_ATTR_WO(compact);
 static DEVICE_ATTR_RW(disksize);
index ed3c4c4..d68d05d 100644 (file)
@@ -281,7 +281,7 @@ agp_ioc_init(void __iomem *ioc_regs)
         return 0;
 }
 
-static int
+static int __init
 lba_find_capability(int cap)
 {
        struct _parisc_agp_info *info = &parisc_agp_info;
@@ -366,7 +366,7 @@ fail:
        return error;
 }
 
-static int
+static int __init
 find_quicksilver(struct device *dev, void *data)
 {
        struct parisc_device **lba = data;
@@ -378,7 +378,7 @@ find_quicksilver(struct device *dev, void *data)
        return 0;
 }
 
-static int
+static int __init
 parisc_agp_init(void)
 {
        extern struct sba_device *sba_list;
index deed355..c837d54 100644 (file)
@@ -191,6 +191,8 @@ struct ipmi_user {
        struct work_struct remove_work;
 };
 
+static struct workqueue_struct *remove_work_wq;
+
 static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index)
        __acquires(user->release_barrier)
 {
@@ -1297,7 +1299,7 @@ static void free_user(struct kref *ref)
        struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount);
 
        /* SRCU cleanup must happen in task context. */
-       schedule_work(&user->remove_work);
+       queue_work(remove_work_wq, &user->remove_work);
 }
 
 static void _ipmi_destroy_user(struct ipmi_user *user)
@@ -3918,9 +3920,11 @@ static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf,
                /* We didn't find a user, deliver an error response. */
                ipmi_inc_stat(intf, unhandled_commands);
 
-               msg->data[0] = ((netfn + 1) << 2) | (msg->rsp[4] & 0x3);
-               msg->data[1] = msg->rsp[2];
-               msg->data[2] = msg->rsp[4] & ~0x3;
+               msg->data[0] = (netfn + 1) << 2;
+               msg->data[0] |= msg->rsp[2] & 0x3; /* rqLUN */
+               msg->data[1] = msg->rsp[1]; /* Addr */
+               msg->data[2] = msg->rsp[2] & ~0x3; /* rqSeq */
+               msg->data[2] |= msg->rsp[0] & 0x3; /* rsLUN */
                msg->data[3] = cmd;
                msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE;
                msg->data_size = 5;
@@ -4455,13 +4459,24 @@ return_unspecified:
                msg->rsp[2] = IPMI_ERR_UNSPECIFIED;
                msg->rsp_size = 3;
        } else if (msg->type == IPMI_SMI_MSG_TYPE_IPMB_DIRECT) {
-               /* commands must have at least 3 bytes, responses 4. */
-               if (is_cmd && (msg->rsp_size < 3)) {
+               /* commands must have at least 4 bytes, responses 5. */
+               if (is_cmd && (msg->rsp_size < 4)) {
                        ipmi_inc_stat(intf, invalid_commands);
                        goto out;
                }
-               if (!is_cmd && (msg->rsp_size < 4))
-                       goto return_unspecified;
+               if (!is_cmd && (msg->rsp_size < 5)) {
+                       ipmi_inc_stat(intf, invalid_ipmb_responses);
+                       /* Construct a valid error response. */
+                       msg->rsp[0] = msg->data[0] & 0xfc; /* NetFN */
+                       msg->rsp[0] |= (1 << 2); /* Make it a response */
+                       msg->rsp[0] |= msg->data[2] & 3; /* rqLUN */
+                       msg->rsp[1] = msg->data[1]; /* Addr */
+                       msg->rsp[2] = msg->data[2] & 0xfc; /* rqSeq */
+                       msg->rsp[2] |= msg->data[0] & 0x3; /* rsLUN */
+                       msg->rsp[3] = msg->data[3]; /* Cmd */
+                       msg->rsp[4] = IPMI_ERR_UNSPECIFIED;
+                       msg->rsp_size = 5;
+               }
        } else if ((msg->data_size >= 2)
            && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
            && (msg->data[1] == IPMI_SEND_MSG_CMD)
@@ -5031,6 +5046,7 @@ struct ipmi_smi_msg *ipmi_alloc_smi_msg(void)
        if (rv) {
                rv->done = free_smi_msg;
                rv->user_data = NULL;
+               rv->type = IPMI_SMI_MSG_TYPE_NORMAL;
                atomic_inc(&smi_msg_inuse_count);
        }
        return rv;
@@ -5383,6 +5399,13 @@ static int ipmi_init_msghandler(void)
 
        atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
 
+       remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq");
+       if (!remove_work_wq) {
+               pr_err("unable to create ipmi-msghandler-remove-wq workqueue");
+               rv = -ENOMEM;
+               goto out;
+       }
+
        initialized = true;
 
 out:
@@ -5408,6 +5431,8 @@ static void __exit cleanup_ipmi(void)
        int count;
 
        if (initialized) {
+               destroy_workqueue(remove_work_wq);
+
                atomic_notifier_chain_unregister(&panic_notifier_list,
                                                 &panic_block);
 
index e338d2f..096c384 100644 (file)
@@ -1004,10 +1004,9 @@ static struct kobj_type ktype_cpufreq = {
        .release        = cpufreq_sysfs_release,
 };
 
-static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu)
+static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu,
+                               struct device *dev)
 {
-       struct device *dev = get_cpu_device(cpu);
-
        if (unlikely(!dev))
                return;
 
@@ -1296,8 +1295,9 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 
        if (policy->max_freq_req) {
                /*
-                * CPUFREQ_CREATE_POLICY notification is sent only after
-                * successfully adding max_freq_req request.
+                * Remove max_freq_req after sending CPUFREQ_REMOVE_POLICY
+                * notification, since CPUFREQ_CREATE_POLICY notification was
+                * sent after adding max_freq_req earlier.
                 */
                blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
                                             CPUFREQ_REMOVE_POLICY, policy);
@@ -1391,7 +1391,7 @@ static int cpufreq_online(unsigned int cpu)
        if (new_policy) {
                for_each_cpu(j, policy->related_cpus) {
                        per_cpu(cpufreq_cpu_data, j) = policy;
-                       add_cpu_dev_symlink(policy, j);
+                       add_cpu_dev_symlink(policy, j, get_cpu_device(j));
                }
 
                policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req),
@@ -1565,7 +1565,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
        /* Create sysfs link on CPU registration */
        policy = per_cpu(cpufreq_cpu_data, cpu);
        if (policy)
-               add_cpu_dev_symlink(policy, cpu);
+               add_cpu_dev_symlink(policy, cpu, dev);
 
        return 0;
 }
index 815df3d..dec2a56 100644 (file)
@@ -338,6 +338,8 @@ static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
 
 static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
 
+#define CPPC_MAX_PERF  U8_MAX
+
 static void intel_pstate_set_itmt_prio(int cpu)
 {
        struct cppc_perf_caps cppc_perf;
@@ -348,6 +350,14 @@ static void intel_pstate_set_itmt_prio(int cpu)
        if (ret)
                return;
 
+       /*
+        * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
+        * In this case we can't use CPPC.highest_perf to enable ITMT.
+        * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
+        */
+       if (cppc_perf.highest_perf == CPPC_MAX_PERF)
+               cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
+
        /*
         * The priorities can be set regardless of whether or not
         * sched_set_itmt_support(true) has been called and it is valid to
@@ -1006,6 +1016,12 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu)
                 */
                value &= ~GENMASK_ULL(31, 24);
                value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached);
+               /*
+                * However, make sure that EPP will be set to "performance" when
+                * the CPU is brought back online again and the "performance"
+                * scaling algorithm is still in effect.
+                */
+               cpu->epp_policy = CPUFREQ_POLICY_UNKNOWN;
        }
 
        /*
@@ -2353,6 +2369,7 @@ static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = {
        X86_MATCH(BROADWELL_D,          core_funcs),
        X86_MATCH(BROADWELL_X,          core_funcs),
        X86_MATCH(SKYLAKE_X,            core_funcs),
+       X86_MATCH(ICELAKE_X,            core_funcs),
        {}
 };
 
index f57a39d..ab7fd89 100644 (file)
@@ -290,7 +290,7 @@ static void system_heap_dma_buf_release(struct dma_buf *dmabuf)
        int i;
 
        table = &buffer->sg_table;
-       for_each_sg(table->sgl, sg, table->nents, i) {
+       for_each_sgtable_sg(table, sg, i) {
                struct page *page = sg_page(sg);
 
                __free_pages(page, compound_order(page));
index de416f9..f521933 100644 (file)
@@ -34,6 +34,12 @@ struct scmi_msg_resp_base_attributes {
        __le16 reserved;
 };
 
+struct scmi_msg_resp_base_discover_agent {
+       __le32 agent_id;
+       u8 name[SCMI_MAX_STR_SIZE];
+};
+
+
 struct scmi_msg_base_error_notify {
        __le32 event_control;
 #define BASE_TP_NOTIFY_ALL     BIT(0)
@@ -225,18 +231,21 @@ static int scmi_base_discover_agent_get(const struct scmi_protocol_handle *ph,
                                        int id, char *name)
 {
        int ret;
+       struct scmi_msg_resp_base_discover_agent *agent_info;
        struct scmi_xfer *t;
 
        ret = ph->xops->xfer_get_init(ph, BASE_DISCOVER_AGENT,
-                                     sizeof(__le32), SCMI_MAX_STR_SIZE, &t);
+                                     sizeof(__le32), sizeof(*agent_info), &t);
        if (ret)
                return ret;
 
        put_unaligned_le32(id, t->tx.buf);
 
        ret = ph->xops->do_xfer(ph, t);
-       if (!ret)
-               strlcpy(name, t->rx.buf, SCMI_MAX_STR_SIZE);
+       if (!ret) {
+               agent_info = t->rx.buf;
+               strlcpy(name, agent_info->name, SCMI_MAX_STR_SIZE);
+       }
 
        ph->xops->xfer_put(ph, t);
 
index 4371fdc..581d34c 100644 (file)
@@ -138,9 +138,7 @@ static int scmi_pm_domain_probe(struct scmi_device *sdev)
        scmi_pd_data->domains = domains;
        scmi_pd_data->num_domains = num_domains;
 
-       of_genpd_add_provider_onecell(np, scmi_pd_data);
-
-       return 0;
+       return of_genpd_add_provider_onecell(np, scmi_pd_data);
 }
 
 static const struct scmi_device_id scmi_id_table[] = {
index 3084715..cdbb287 100644 (file)
@@ -637,7 +637,7 @@ static int scmi_sensor_config_get(const struct scmi_protocol_handle *ph,
        if (ret)
                return ret;
 
-       put_unaligned_le32(cpu_to_le32(sensor_id), t->tx.buf);
+       put_unaligned_le32(sensor_id, t->tx.buf);
        ret = ph->xops->do_xfer(ph, t);
        if (!ret) {
                struct sensors_info *si = ph->get_priv(ph);
index 11e8efb..87039c5 100644 (file)
@@ -82,7 +82,8 @@ static bool scmi_vio_have_vq_rx(struct virtio_device *vdev)
 }
 
 static int scmi_vio_feed_vq_rx(struct scmi_vio_channel *vioch,
-                              struct scmi_vio_msg *msg)
+                              struct scmi_vio_msg *msg,
+                              struct device *dev)
 {
        struct scatterlist sg_in;
        int rc;
@@ -94,8 +95,7 @@ static int scmi_vio_feed_vq_rx(struct scmi_vio_channel *vioch,
 
        rc = virtqueue_add_inbuf(vioch->vqueue, &sg_in, 1, msg, GFP_ATOMIC);
        if (rc)
-               dev_err_once(vioch->cinfo->dev,
-                            "failed to add to virtqueue (%d)\n", rc);
+               dev_err_once(dev, "failed to add to virtqueue (%d)\n", rc);
        else
                virtqueue_kick(vioch->vqueue);
 
@@ -108,7 +108,7 @@ static void scmi_finalize_message(struct scmi_vio_channel *vioch,
                                  struct scmi_vio_msg *msg)
 {
        if (vioch->is_rx) {
-               scmi_vio_feed_vq_rx(vioch, msg);
+               scmi_vio_feed_vq_rx(vioch, msg, vioch->cinfo->dev);
        } else {
                /* Here IRQs are assumed to be already disabled by the caller */
                spin_lock(&vioch->lock);
@@ -269,7 +269,7 @@ static int virtio_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
                        list_add_tail(&msg->list, &vioch->free_list);
                        spin_unlock_irqrestore(&vioch->lock, flags);
                } else {
-                       scmi_vio_feed_vq_rx(vioch, msg);
+                       scmi_vio_feed_vq_rx(vioch, msg, cinfo->dev);
                }
        }
 
index a504895..ac08e81 100644 (file)
@@ -156,7 +156,7 @@ static int scmi_voltage_descriptors_get(const struct scmi_protocol_handle *ph,
                        int cnt;
 
                        cmd->domain_id = cpu_to_le32(v->id);
-                       cmd->level_index = desc_index;
+                       cmd->level_index = cpu_to_le32(desc_index);
                        ret = ph->xops->do_xfer(ph, tl);
                        if (ret)
                                break;
index 581aa5e..dd7c3d5 100644 (file)
@@ -50,7 +50,7 @@ static int __init smccc_soc_init(void)
        arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
                             ARM_SMCCC_ARCH_SOC_ID, &res);
 
-       if (res.a0 == SMCCC_RET_NOT_SUPPORTED) {
+       if ((int)res.a0 == SMCCC_RET_NOT_SUPPORTED) {
                pr_info("ARCH_SOC_ID not implemented, skipping ....\n");
                return 0;
        }
index 71acd57..6348559 100644 (file)
@@ -646,12 +646,6 @@ kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
        if (IS_ERR(gobj))
                return PTR_ERR(gobj);
 
-       /* Import takes an extra reference on the dmabuf. Drop it now to
-        * avoid leaking it. We only need the one reference in
-        * kgd_mem->dmabuf.
-        */
-       dma_buf_put(mem->dmabuf);
-
        *bo = gem_to_amdgpu_bo(gobj);
        (*bo)->flags |= AMDGPU_GEM_CREATE_PREEMPTIBLE;
        (*bo)->parent = amdgpu_bo_ref(mem->bo);
@@ -1402,7 +1396,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        struct sg_table *sg = NULL;
        uint64_t user_addr = 0;
        struct amdgpu_bo *bo;
-       struct drm_gem_object *gobj;
+       struct drm_gem_object *gobj = NULL;
        u32 domain, alloc_domain;
        u64 alloc_flags;
        int ret;
@@ -1512,14 +1506,16 @@ allocate_init_user_pages_failed:
        remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
        drm_vma_node_revoke(&gobj->vma_node, drm_priv);
 err_node_allow:
-       drm_gem_object_put(gobj);
        /* Don't unreserve system mem limit twice */
        goto err_reserve_limit;
 err_bo_create:
        unreserve_mem_limit(adev, size, alloc_domain, !!sg);
 err_reserve_limit:
        mutex_destroy(&(*mem)->lock);
-       kfree(*mem);
+       if (gobj)
+               drm_gem_object_put(gobj);
+       else
+               kfree(*mem);
 err:
        if (sg) {
                sg_free_table(sg);
index 96b7bb1..12a6b1c 100644 (file)
@@ -1569,6 +1569,18 @@ void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
        WREG32(adev->bios_scratch_reg_offset + 3, tmp);
 }
 
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+                                                     u32 backlight_level)
+{
+       u32 tmp = RREG32(adev->bios_scratch_reg_offset + 2);
+
+       tmp &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK;
+       tmp |= (backlight_level << ATOM_S2_CURRENT_BL_LEVEL_SHIFT) &
+               ATOM_S2_CURRENT_BL_LEVEL_MASK;
+
+       WREG32(adev->bios_scratch_reg_offset + 2, tmp);
+}
+
 bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev)
 {
        u32 tmp = RREG32(adev->bios_scratch_reg_offset + 7);
index 8cc0222..27e74b1 100644 (file)
@@ -185,6 +185,8 @@ bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
 void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
 void amdgpu_atombios_scratch_regs_engine_hung(struct amdgpu_device *adev,
                                              bool hung);
+void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev,
+                                                     u32 backlight_level);
 bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev);
 
 void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
index 188accb..1e651b9 100644 (file)
@@ -3833,7 +3833,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
        /* disable all interrupts */
        amdgpu_irq_disable_all(adev);
        if (adev->mode_info.mode_config_initialized){
-               if (!amdgpu_device_has_dc_support(adev))
+               if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
                        drm_helper_force_disable_all(adev_to_drm(adev));
                else
                        drm_atomic_helper_shutdown(adev_to_drm(adev));
@@ -4289,6 +4289,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 {
        int r;
 
+       amdgpu_amdkfd_pre_reset(adev);
+
        if (from_hypervisor)
                r = amdgpu_virt_request_full_gpu(adev, true);
        else
@@ -5031,7 +5033,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
                cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
 
-               amdgpu_amdkfd_pre_reset(tmp_adev);
+               if (!amdgpu_sriov_vf(tmp_adev))
+                       amdgpu_amdkfd_pre_reset(tmp_adev);
 
                /*
                 * Mark these ASICs to be reseted as untracked first
@@ -5089,7 +5092,7 @@ retry:    /* Rest of adevs pre asic reset from XGMI hive. */
 
        tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
        /* Actual ASIC resets if needed.*/
-       /* TODO Implement XGMI hive reset logic for SRIOV */
+       /* Host driver will handle XGMI hive reset for SRIOV */
        if (amdgpu_sriov_vf(adev)) {
                r = amdgpu_device_reset_sriov(adev, job ? false : true);
                if (r)
@@ -5130,7 +5133,7 @@ skip_hw_reset:
                        drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
                }
 
-               if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
+               if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
                        drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
                }
 
@@ -5151,7 +5154,7 @@ skip_sched_resume:
        list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
                /* unlock kfd: SRIOV would do it separately */
                if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
-                       amdgpu_amdkfd_post_reset(tmp_adev);
+                       amdgpu_amdkfd_post_reset(tmp_adev);
 
                /* kfd_post_reset will do nothing if kfd device is not initialized,
                 * need to bring up kfd here if it's not be initialized before
index 4e36694..ea00090 100644 (file)
@@ -157,6 +157,8 @@ static int hw_id_map[MAX_HWIP] = {
        [HDP_HWIP]      = HDP_HWID,
        [SDMA0_HWIP]    = SDMA0_HWID,
        [SDMA1_HWIP]    = SDMA1_HWID,
+       [SDMA2_HWIP]    = SDMA2_HWID,
+       [SDMA3_HWIP]    = SDMA3_HWID,
        [MMHUB_HWIP]    = MMHUB_HWID,
        [ATHUB_HWIP]    = ATHUB_HWID,
        [NBIO_HWIP]     = NBIF_HWID,
@@ -248,8 +250,8 @@ get_from_vram:
 
        offset = offsetof(struct binary_header, binary_checksum) +
                sizeof(bhdr->binary_checksum);
-       size = bhdr->binary_size - offset;
-       checksum = bhdr->binary_checksum;
+       size = le16_to_cpu(bhdr->binary_size) - offset;
+       checksum = le16_to_cpu(bhdr->binary_checksum);
 
        if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
                                              size, checksum)) {
@@ -270,7 +272,7 @@ get_from_vram:
        }
 
        if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
-                                             ihdr->size, checksum)) {
+                                             le16_to_cpu(ihdr->size), checksum)) {
                DRM_ERROR("invalid ip discovery data table checksum\n");
                r = -EINVAL;
                goto out;
@@ -282,7 +284,7 @@ get_from_vram:
        ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
 
        if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
-                                             ghdr->size, checksum)) {
+                                             le32_to_cpu(ghdr->size), checksum)) {
                DRM_ERROR("invalid gc data table checksum\n");
                r = -EINVAL;
                goto out;
@@ -489,10 +491,10 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
                        le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset));
 
        for (i = 0; i < 32; i++) {
-               if (le32_to_cpu(harvest_info->list[i].hw_id) == 0)
+               if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
                        break;
 
-               switch (le32_to_cpu(harvest_info->list[i].hw_id)) {
+               switch (le16_to_cpu(harvest_info->list[i].hw_id)) {
                case VCN_HWID:
                        vcn_harvest_count++;
                        if (harvest_info->list[i].number_instance == 0)
@@ -918,6 +920,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
                case IP_VERSION(3, 0, 64):
                case IP_VERSION(3, 1, 1):
                case IP_VERSION(3, 0, 2):
+               case IP_VERSION(3, 0, 192):
                        amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
                        if (!amdgpu_sriov_vf(adev))
                                amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
index f3d62e1..0c7963d 100644 (file)
@@ -223,7 +223,7 @@ int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev,
  */
 int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 {
-       unsigned int count = AMDGPU_IH_MAX_NUM_IVS;
+       unsigned int count;
        u32 wptr;
 
        if (!ih->enabled || adev->shutdown)
@@ -232,6 +232,7 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
        wptr = amdgpu_ih_get_wptr(adev, ih);
 
 restart_ih:
+       count  = AMDGPU_IH_MAX_NUM_IVS;
        DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
 
        /* Order reading of wptr vs. reading of IH ring data */
index 4f7c708..585961c 100644 (file)
@@ -135,6 +135,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
                break;
        case IP_VERSION(3, 0, 0):
        case IP_VERSION(3, 0, 64):
+       case IP_VERSION(3, 0, 192):
                if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0))
                        fw_name = FIRMWARE_SIENNA_CICHLID;
                else
index ce982af..ac9a8cd 100644 (file)
@@ -504,8 +504,8 @@ static int amdgpu_vkms_sw_fini(void *handle)
        int i = 0;
 
        for (i = 0; i < adev->mode_info.num_crtc; i++)
-               if (adev->mode_info.crtcs[i])
-                       hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
+               if (adev->amdgpu_vkms_output[i].vblank_hrtimer.function)
+                       hrtimer_cancel(&adev->amdgpu_vkms_output[i].vblank_hrtimer);
 
        kfree(adev->mode_info.bios_hardcoded_edid);
        kfree(adev->amdgpu_vkms_output);
index e7dfeb4..dbe7442 100644 (file)
@@ -7707,8 +7707,19 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev)
        switch (adev->ip_versions[GC_HWIP][0]) {
        case IP_VERSION(10, 3, 1):
        case IP_VERSION(10, 3, 3):
-               clock = (uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh) |
-                       ((uint64_t)RREG32_SOC15(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh) << 32ULL);
+               preempt_disable();
+               clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+               clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+               hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh);
+               /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+                * roughly every 42 seconds.
+                */
+               if (hi_check != clock_hi) {
+                       clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh);
+                       clock_hi = hi_check;
+               }
+               preempt_enable();
+               clock = clock_lo | (clock_hi << 32ULL);
                break;
        default:
                preempt_disable();
index b4b80f2..b305fd3 100644 (file)
@@ -140,6 +140,11 @@ MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
 #define mmTCP_CHAN_STEER_5_ARCT                                                                0x0b0c
 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                       0
 
+#define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
+#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
+#define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
+#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
+
 enum ta_ras_gfx_subblock {
        /*CPC*/
        TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
@@ -4055,9 +4060,10 @@ static int gfx_v9_0_hw_fini(void *handle)
 
        gfx_v9_0_cp_enable(adev, false);
 
-       /* Skip suspend with A+A reset */
-       if (adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) {
-               dev_dbg(adev->dev, "Device in reset. Skipping RLC halt\n");
+       /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
+       if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
+           (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
+               dev_dbg(adev->dev, "Skipping RLC halt\n");
                return 0;
        }
 
@@ -4238,19 +4244,38 @@ failed_kiq_read:
 
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 {
-       uint64_t clock;
+       uint64_t clock, clock_lo, clock_hi, hi_check;
 
-       amdgpu_gfx_off_ctrl(adev, false);
-       mutex_lock(&adev->gfx.gpu_clock_mutex);
-       if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
-               clock = gfx_v9_0_kiq_read_clock(adev);
-       } else {
-               WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
-               clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
-                       ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+       switch (adev->ip_versions[GC_HWIP][0]) {
+       case IP_VERSION(9, 3, 0):
+               preempt_disable();
+               clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
+               clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
+               hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
+               /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
+                * roughly every 42 seconds.
+                */
+               if (hi_check != clock_hi) {
+                       clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
+                       clock_hi = hi_check;
+               }
+               preempt_enable();
+               clock = clock_lo | (clock_hi << 32ULL);
+               break;
+       default:
+               amdgpu_gfx_off_ctrl(adev, false);
+               mutex_lock(&adev->gfx.gpu_clock_mutex);
+               if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
+                       clock = gfx_v9_0_kiq_read_clock(adev);
+               } else {
+                       WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+                       clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
+                               ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+               }
+               mutex_unlock(&adev->gfx.gpu_clock_mutex);
+               amdgpu_gfx_off_ctrl(adev, true);
+               break;
        }
-       mutex_unlock(&adev->gfx.gpu_clock_mutex);
-       amdgpu_gfx_off_ctrl(adev, true);
        return clock;
 }
 
index 1d8414c..38241cf 100644 (file)
@@ -160,6 +160,7 @@ static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
 
        tmp = RREG32(ih_regs->ih_rb_cntl);
        tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+       tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
        /* enable_intr field is only valid in ring0 */
        if (ih == &adev->irq.ih)
                tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
@@ -275,10 +276,8 @@ static int navi10_ih_enable_ring(struct amdgpu_device *adev,
        tmp = navi10_ih_rb_cntl(ih, tmp);
        if (ih == &adev->irq.ih)
                tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled);
-       if (ih == &adev->irq.ih1) {
-               tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0);
+       if (ih == &adev->irq.ih1)
                tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1);
-       }
 
        if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) {
                if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) {
@@ -319,7 +318,6 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
 {
        struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2};
        u32 ih_chicken;
-       u32 tmp;
        int ret;
        int i;
 
@@ -363,15 +361,6 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev)
        adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell,
                                            ih[0]->doorbell_index);
 
-       tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL);
-       tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL,
-                           CLIENT18_IS_STORM_CLIENT, 1);
-       WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp);
-
-       tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL);
-       tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1);
-       WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp);
-
        pci_set_master(adev->pdev);
 
        /* enable interrupts */
@@ -420,12 +409,19 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev,
        u32 wptr, tmp;
        struct amdgpu_ih_regs *ih_regs;
 
-       wptr = le32_to_cpu(*ih->wptr_cpu);
-       ih_regs = &ih->ih_regs;
+       if (ih == &adev->irq.ih) {
+               /* Only ring0 supports writeback. On other rings fall back
+                * to register-based code with overflow checking below.
+                */
+               wptr = le32_to_cpu(*ih->wptr_cpu);
 
-       if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
-               goto out;
+               if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
+                       goto out;
+       }
 
+       ih_regs = &ih->ih_regs;
+
+       /* Double check that the overflow wasn't already cleared. */
        wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr);
        if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW))
                goto out;
@@ -513,15 +509,11 @@ static int navi10_ih_self_irq(struct amdgpu_device *adev,
                              struct amdgpu_irq_src *source,
                              struct amdgpu_iv_entry *entry)
 {
-       uint32_t wptr = cpu_to_le32(entry->src_data[0]);
-
        switch (entry->ring_id) {
        case 1:
-               *adev->irq.ih1.wptr_cpu = wptr;
                schedule_work(&adev->irq.ih1_work);
                break;
        case 2:
-               *adev->irq.ih2.wptr_cpu = wptr;
                schedule_work(&adev->irq.ih2_work);
                break;
        default: break;
index 4ecd2b5..ee7cab3 100644 (file)
@@ -359,6 +359,10 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
 
        if (def != data)
                WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
+
+       if (amdgpu_sriov_vf(adev))
+               adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+                       mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 #define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT         0x00000000 // off by default, no gains over L1
index 0d2d629..4bbacf1 100644 (file)
@@ -276,6 +276,10 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
 
        if (def != data)
                WREG32_PCIE(smnPCIE_CI_CNTL, data);
+
+       if (amdgpu_sriov_vf(adev))
+               adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+                       mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
index 3c00666..37a4039 100644 (file)
@@ -273,7 +273,9 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = {
 
 static void nbio_v7_0_init_registers(struct amdgpu_device *adev)
 {
-
+       if (amdgpu_sriov_vf(adev))
+               adev->rmmio_remap.reg_offset =
+                       SOC15_REG_OFFSET(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
index 8f2a315..3444332 100644 (file)
@@ -371,6 +371,10 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev)
                if (def != data)
                        WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), data);
        }
+
+       if (amdgpu_sriov_vf(adev))
+               adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+                       regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 const struct amdgpu_nbio_funcs nbio_v7_2_funcs = {
index b8bd03d..dc5e937 100644 (file)
@@ -362,7 +362,9 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = {
 
 static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
 {
-
+       if (amdgpu_sriov_vf(adev))
+               adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
+                       mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
 }
 
 static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev)
@@ -692,6 +694,9 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
 {
        uint32_t def, data;
 
+       if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4))
+               return;
+
        def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
        data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK;
        data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK;
index 59eafa3..2ec1ffb 100644 (file)
@@ -183,6 +183,7 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
        switch (adev->ip_versions[UVD_HWIP][0]) {
        case IP_VERSION(3, 0, 0):
        case IP_VERSION(3, 0, 64):
+       case IP_VERSION(3, 0, 192):
                if (amdgpu_sriov_vf(adev)) {
                        if (encode)
                                *codecs = &sriov_sc_video_codecs_encode;
@@ -731,8 +732,10 @@ static int nv_common_early_init(void *handle)
 #define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
-       adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+       if (!amdgpu_sriov_vf(adev)) {
+               adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+               adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+       }
        adev->smc_rreg = NULL;
        adev->smc_wreg = NULL;
        adev->pcie_rreg = &nv_pcie_rreg;
@@ -1032,7 +1035,7 @@ static int nv_common_hw_init(void *handle)
         * for the purpose of expose those registers
         * to process space
         */
-       if (adev->nbio.funcs->remap_hdp_registers)
+       if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
                adev->nbio.funcs->remap_hdp_registers(adev);
        /* enable the doorbell aperture */
        nv_enable_doorbell_aperture(adev, true);
index 0c316a2..de9b553 100644 (file)
@@ -971,8 +971,10 @@ static int soc15_common_early_init(void *handle)
 #define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
-       adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+       if (!amdgpu_sriov_vf(adev)) {
+               adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+               adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+       }
        adev->smc_rreg = NULL;
        adev->smc_wreg = NULL;
        adev->pcie_rreg = &soc15_pcie_rreg;
@@ -1285,7 +1287,7 @@ static int soc15_common_hw_init(void *handle)
         * for the purpose of expose those registers
         * to process space
         */
-       if (adev->nbio.funcs->remap_hdp_registers)
+       if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev))
                adev->nbio.funcs->remap_hdp_registers(adev);
 
        /* enable the doorbell aperture */
index 94e92c0..8fd48d0 100644 (file)
@@ -766,7 +766,7 @@ struct svm_range_list {
        struct list_head                deferred_range_list;
        spinlock_t                      deferred_list_lock;
        atomic_t                        evicted_ranges;
-       bool                            drain_pagefaults;
+       atomic_t                        drain_pagefaults;
        struct delayed_work             restore_work;
        DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);
        struct task_struct              *faulting_task;
index 16137c4..3cb4681 100644 (file)
@@ -1574,7 +1574,6 @@ retry_flush_work:
 static void svm_range_restore_work(struct work_struct *work)
 {
        struct delayed_work *dwork = to_delayed_work(work);
-       struct amdkfd_process_info *process_info;
        struct svm_range_list *svms;
        struct svm_range *prange;
        struct kfd_process *p;
@@ -1594,12 +1593,10 @@ static void svm_range_restore_work(struct work_struct *work)
         * the lifetime of this thread, kfd_process and mm will be valid.
         */
        p = container_of(svms, struct kfd_process, svms);
-       process_info = p->kgd_process_info;
        mm = p->mm;
        if (!mm)
                return;
 
-       mutex_lock(&process_info->lock);
        svm_range_list_lock_and_flush_work(svms, mm);
        mutex_lock(&svms->lock);
 
@@ -1652,7 +1649,6 @@ static void svm_range_restore_work(struct work_struct *work)
 out_reschedule:
        mutex_unlock(&svms->lock);
        mmap_write_unlock(mm);
-       mutex_unlock(&process_info->lock);
 
        /* If validation failed, reschedule another attempt */
        if (evicted_ranges) {
@@ -1968,10 +1964,16 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
        struct kfd_process_device *pdd;
        struct amdgpu_device *adev;
        struct kfd_process *p;
+       int drain;
        uint32_t i;
 
        p = container_of(svms, struct kfd_process, svms);
 
+restart:
+       drain = atomic_read(&svms->drain_pagefaults);
+       if (!drain)
+               return;
+
        for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {
                pdd = p->pdds[i];
                if (!pdd)
@@ -1983,6 +1985,8 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
                amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1);
                pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
        }
+       if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain)
+               goto restart;
 }
 
 static void svm_range_deferred_list_work(struct work_struct *work)
@@ -1990,43 +1994,41 @@ static void svm_range_deferred_list_work(struct work_struct *work)
        struct svm_range_list *svms;
        struct svm_range *prange;
        struct mm_struct *mm;
+       struct kfd_process *p;
 
        svms = container_of(work, struct svm_range_list, deferred_list_work);
        pr_debug("enter svms 0x%p\n", svms);
 
+       p = container_of(svms, struct kfd_process, svms);
+       /* Avoid mm is gone when inserting mmu notifier */
+       mm = get_task_mm(p->lead_thread);
+       if (!mm) {
+               pr_debug("svms 0x%p process mm gone\n", svms);
+               return;
+       }
+retry:
+       mmap_write_lock(mm);
+
+       /* Checking for the need to drain retry faults must be inside
+        * mmap write lock to serialize with munmap notifiers.
+        */
+       if (unlikely(atomic_read(&svms->drain_pagefaults))) {
+               mmap_write_unlock(mm);
+               svm_range_drain_retry_fault(svms);
+               goto retry;
+       }
+
        spin_lock(&svms->deferred_list_lock);
        while (!list_empty(&svms->deferred_range_list)) {
                prange = list_first_entry(&svms->deferred_range_list,
                                          struct svm_range, deferred_list);
+               list_del_init(&prange->deferred_list);
                spin_unlock(&svms->deferred_list_lock);
+
                pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,
                         prange->start, prange->last, prange->work_item.op);
 
-               mm = prange->work_item.mm;
-retry:
-               mmap_write_lock(mm);
                mutex_lock(&svms->lock);
-
-               /* Checking for the need to drain retry faults must be in
-                * mmap write lock to serialize with munmap notifiers.
-                *
-                * Remove from deferred_list must be inside mmap write lock,
-                * otherwise, svm_range_list_lock_and_flush_work may hold mmap
-                * write lock, and continue because deferred_list is empty, then
-                * deferred_list handle is blocked by mmap write lock.
-                */
-               spin_lock(&svms->deferred_list_lock);
-               if (unlikely(svms->drain_pagefaults)) {
-                       svms->drain_pagefaults = false;
-                       spin_unlock(&svms->deferred_list_lock);
-                       mutex_unlock(&svms->lock);
-                       mmap_write_unlock(mm);
-                       svm_range_drain_retry_fault(svms);
-                       goto retry;
-               }
-               list_del_init(&prange->deferred_list);
-               spin_unlock(&svms->deferred_list_lock);
-
                mutex_lock(&prange->migrate_mutex);
                while (!list_empty(&prange->child_list)) {
                        struct svm_range *pchild;
@@ -2042,12 +2044,13 @@ retry:
 
                svm_range_handle_list_op(svms, prange);
                mutex_unlock(&svms->lock);
-               mmap_write_unlock(mm);
 
                spin_lock(&svms->deferred_list_lock);
        }
        spin_unlock(&svms->deferred_list_lock);
 
+       mmap_write_unlock(mm);
+       mmput(mm);
        pr_debug("exit svms 0x%p\n", svms);
 }
 
@@ -2056,12 +2059,6 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,
                        struct mm_struct *mm, enum svm_work_list_ops op)
 {
        spin_lock(&svms->deferred_list_lock);
-       /* Make sure pending page faults are drained in the deferred worker
-        * before the range is freed to avoid straggler interrupts on
-        * unmapped memory causing "phantom faults".
-        */
-       if (op == SVM_OP_UNMAP_RANGE)
-               svms->drain_pagefaults = true;
        /* if prange is on the deferred list */
        if (!list_empty(&prange->deferred_list)) {
                pr_debug("update exist prange 0x%p work op %d\n", prange, op);
@@ -2140,6 +2137,12 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
        pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
                 prange, prange->start, prange->last, start, last);
 
+       /* Make sure pending page faults are drained in the deferred worker
+        * before the range is freed to avoid straggler interrupts on
+        * unmapped memory causing "phantom faults".
+        */
+       atomic_inc(&svms->drain_pagefaults);
+
        unmap_parent = start <= prange->start && last >= prange->last;
 
        list_for_each_entry(pchild, &prange->child_list, child_list) {
@@ -2559,20 +2562,13 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
 }
 
 static bool
-svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault)
+svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)
 {
        unsigned long requested = VM_READ;
-       struct vm_area_struct *vma;
 
        if (write_fault)
                requested |= VM_WRITE;
 
-       vma = find_vma(mm, addr << PAGE_SHIFT);
-       if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
-               pr_debug("address 0x%llx VMA is removed\n", addr);
-               return true;
-       }
-
        pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
                vma->vm_flags);
        return (vma->vm_flags & requested) == requested;
@@ -2590,6 +2586,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
        int32_t best_loc;
        int32_t gpuidx = MAX_GPU_INSTANCE;
        bool write_locked = false;
+       struct vm_area_struct *vma;
        int r = 0;
 
        if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) {
@@ -2600,7 +2597,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
        p = kfd_lookup_process_by_pasid(pasid);
        if (!p) {
                pr_debug("kfd process not founded pasid 0x%x\n", pasid);
-               return -ESRCH;
+               return 0;
        }
        if (!p->xnack_enabled) {
                pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
@@ -2611,10 +2608,19 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 
        pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
 
+       if (atomic_read(&svms->drain_pagefaults)) {
+               pr_debug("draining retry fault, drop fault 0x%llx\n", addr);
+               r = 0;
+               goto out;
+       }
+
+       /* p->lead_thread is available as kfd_process_wq_release flush the work
+        * before releasing task ref.
+        */
        mm = get_task_mm(p->lead_thread);
        if (!mm) {
                pr_debug("svms 0x%p failed to get mm\n", svms);
-               r = -ESRCH;
+               r = 0;
                goto out;
        }
 
@@ -2652,6 +2658,7 @@ retry_write_locked:
 
        if (svm_range_skip_recover(prange)) {
                amdgpu_gmc_filter_faults_remove(adev, addr, pasid);
+               r = 0;
                goto out_unlock_range;
        }
 
@@ -2660,10 +2667,21 @@ retry_write_locked:
        if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
                pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
                         svms, prange->start, prange->last);
+               r = 0;
                goto out_unlock_range;
        }
 
-       if (!svm_fault_allowed(mm, addr, write_fault)) {
+       /* __do_munmap removed VMA, return success as we are handling stale
+        * retry fault.
+        */
+       vma = find_vma(mm, addr << PAGE_SHIFT);
+       if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+               pr_debug("address 0x%llx VMA is removed\n", addr);
+               r = 0;
+               goto out_unlock_range;
+       }
+
+       if (!svm_fault_allowed(vma, write_fault)) {
                pr_debug("fault addr 0x%llx no %s permission\n", addr,
                        write_fault ? "write" : "read");
                r = -EPERM;
@@ -2741,6 +2759,14 @@ void svm_range_list_fini(struct kfd_process *p)
        /* Ensure list work is finished before process is destroyed */
        flush_work(&p->svms.deferred_list_work);
 
+       /*
+        * Ensure no retry fault comes in afterwards, as page fault handler will
+        * not find kfd process and take mm lock to recover fault.
+        */
+       atomic_inc(&p->svms.drain_pagefaults);
+       svm_range_drain_retry_fault(&p->svms);
+
+
        list_for_each_entry_safe(prange, next, &p->svms.list, list) {
                svm_range_unlink(prange);
                svm_range_remove_notifier(prange);
@@ -2761,6 +2787,7 @@ int svm_range_list_init(struct kfd_process *p)
        mutex_init(&svms->lock);
        INIT_LIST_HEAD(&svms->list);
        atomic_set(&svms->evicted_ranges, 0);
+       atomic_set(&svms->drain_pagefaults, 0);
        INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);
        INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);
        INIT_LIST_HEAD(&svms->deferred_range_list);
@@ -3150,7 +3177,6 @@ static int
 svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
                   uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
 {
-       struct amdkfd_process_info *process_info = p->kgd_process_info;
        struct mm_struct *mm = current->mm;
        struct list_head update_list;
        struct list_head insert_list;
@@ -3169,8 +3195,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
 
        svms = &p->svms;
 
-       mutex_lock(&process_info->lock);
-
        svm_range_list_lock_and_flush_work(svms, mm);
 
        r = svm_range_is_valid(p, start, size);
@@ -3246,8 +3270,6 @@ out_unlock_range:
        mutex_unlock(&svms->lock);
        mmap_read_unlock(mm);
 out:
-       mutex_unlock(&process_info->lock);
-
        pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,
                 &p->svms, start, start + size - 1, r);
 
index c27cb47..1cd6b9f 100644 (file)
@@ -51,6 +51,7 @@
 #include <drm/drm_hdcp.h>
 #endif
 #include "amdgpu_pm.h"
+#include "amdgpu_atombios.h"
 
 #include "amd_shared.h"
 #include "amdgpu_dm_irq.h"
@@ -2561,6 +2562,22 @@ static int dm_resume(void *handle)
        if (amdgpu_in_reset(adev)) {
                dc_state = dm->cached_dc_state;
 
+               /*
+                * The dc->current_state is backed up into dm->cached_dc_state
+                * before we commit 0 streams.
+                *
+                * DC will clear link encoder assignments on the real state
+                * but the changes won't propagate over to the copy we made
+                * before the 0 streams commit.
+                *
+                * DC expects that link encoder assignments are *not* valid
+                * when committing a state, so as a workaround it needs to be
+                * cleared here.
+                */
+               link_enc_cfg_init(dm->dc, dc_state);
+
+               amdgpu_dm_outbox_init(adev);
+
                r = dm_dmub_hw_init(adev);
                if (r)
                        DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
@@ -2572,8 +2589,8 @@ static int dm_resume(void *handle)
 
                for (i = 0; i < dc_state->stream_count; i++) {
                        dc_state->streams[i]->mode_changed = true;
-                       for (j = 0; j < dc_state->stream_status->plane_count; j++) {
-                               dc_state->stream_status->plane_states[j]->update_flags.raw
+                       for (j = 0; j < dc_state->stream_status[i].plane_count; j++) {
+                               dc_state->stream_status[i].plane_states[j]->update_flags.raw
                                        = 0xffffffff;
                        }
                }
@@ -3909,6 +3926,9 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm,
        caps = dm->backlight_caps[bl_idx];
 
        dm->brightness[bl_idx] = user_brightness;
+       /* update scratch register */
+       if (bl_idx == 0)
+               amdgpu_atombios_scratch_regs_set_backlight_level(dm->adev, dm->brightness[bl_idx]);
        brightness = convert_brightness_from_user(&caps, dm->brightness[bl_idx]);
        link = (struct dc_link *)dm->backlight_link[bl_idx];
 
index cce062a..8a441a2 100644 (file)
@@ -314,6 +314,14 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name)
                        ret = -EINVAL;
                        goto cleanup;
                }
+
+               if ((aconn->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) &&
+                               (aconn->base.connector_type != DRM_MODE_CONNECTOR_eDP)) {
+                       DRM_DEBUG_DRIVER("No DP connector available for CRC source\n");
+                       ret = -EINVAL;
+                       goto cleanup;
+               }
+
        }
 
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
index 32a5ce0..cc34a35 100644 (file)
@@ -36,6 +36,8 @@
 #include "dm_helpers.h"
 
 #include "dc_link_ddc.h"
+#include "ddc_service_types.h"
+#include "dpcd_defs.h"
 
 #include "i2caux_interface.h"
 #include "dmub_cmd.h"
@@ -157,6 +159,16 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
 };
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
+static bool needs_dsc_aux_workaround(struct dc_link *link)
+{
+       if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 &&
+           (link->dpcd_caps.dpcd_rev.raw == DPCD_REV_14 || link->dpcd_caps.dpcd_rev.raw == DPCD_REV_12) &&
+           link->dpcd_caps.sink_count.bits.SINK_COUNT >= 2)
+               return true;
+
+       return false;
+}
+
 static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector)
 {
        struct dc_sink *dc_sink = aconnector->dc_sink;
@@ -166,7 +178,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto
        u8 *dsc_branch_dec_caps = NULL;
 
        aconnector->dsc_aux = drm_dp_mst_dsc_aux_for_port(port);
-#if defined(CONFIG_HP_HOOK_WORKAROUND)
+
        /*
         * drm_dp_mst_dsc_aux_for_port() will return NULL for certain configs
         * because it only check the dsc/fec caps of the "port variable" and not the dock
@@ -176,10 +188,10 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto
         * Workaround: explicitly check the use case above and use the mst dock's aux as dsc_aux
         *
         */
-
-       if (!aconnector->dsc_aux && !port->parent->port_parent)
+       if (!aconnector->dsc_aux && !port->parent->port_parent &&
+           needs_dsc_aux_workaround(aconnector->dc_link))
                aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux;
-#endif
+
        if (!aconnector->dsc_aux)
                return false;
 
index 6054478..c8457ba 100644 (file)
@@ -758,6 +758,18 @@ static bool detect_dp(struct dc_link *link,
                        dal_ddc_service_set_transaction_type(link->ddc,
                                                             sink_caps->transaction_type);
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+                       /* Apply work around for tunneled MST on certain USB4 docks. Always use DSC if dock
+                        * reports DSC support.
+                        */
+                       if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA &&
+                                       link->type == dc_connection_mst_branch &&
+                                       link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 &&
+                                       link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT &&
+                                       !link->dc->debug.dpia_debug.bits.disable_mst_dsc_work_around)
+                               link->wa_flags.dpia_mst_dsc_always_on = true;
+#endif
+
 #if defined(CONFIG_DRM_AMD_DC_HDCP)
                        /* In case of fallback to SST when topology discovery below fails
                         * HDCP caps will be querried again later by the upper layer (caller
@@ -1203,6 +1215,10 @@ static bool dc_link_detect_helper(struct dc_link *link,
                        LINK_INFO("link=%d, mst branch is now Disconnected\n",
                                  link->link_index);
 
+                       /* Disable work around which keeps DSC on for tunneled MST on certain USB4 docks. */
+                       if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+                               link->wa_flags.dpia_mst_dsc_always_on = false;
+
                        dm_helpers_dp_mst_stop_top_mgr(link->ctx, link);
 
                        link->mst_stream_alloc_table.stream_count = 0;
index cb7bf91..13bc69d 100644 (file)
@@ -2138,7 +2138,7 @@ static enum link_training_result dp_perform_8b_10b_link_training(
                }
 
                for (lane = 0; lane < (uint8_t)lt_settings->link_settings.lane_count; lane++)
-                       lt_settings->dpcd_lane_settings[lane].bits.VOLTAGE_SWING_SET = VOLTAGE_SWING_LEVEL0;
+                       lt_settings->dpcd_lane_settings[lane].raw = 0;
        }
 
        if (status == LINK_TRAINING_SUCCESS) {
index c32fdcc..e2d9a46 100644 (file)
@@ -1664,6 +1664,10 @@ bool dc_is_stream_unchanged(
        if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param)
                return false;
 
+       // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks
+       if (old_stream->audio_info.mode_count != stream->audio_info.mode_count)
+               return false;
+
        return true;
 }
 
@@ -2252,16 +2256,6 @@ enum dc_status dc_validate_global_state(
 
        if (!new_ctx)
                return DC_ERROR_UNEXPECTED;
-#if defined(CONFIG_DRM_AMD_DC_DCN)
-
-       /*
-        * Update link encoder to stream assignment.
-        * TODO: Split out reason allocation from validation.
-        */
-       if (dc->res_pool->funcs->link_encs_assign && fast_validate == false)
-               dc->res_pool->funcs->link_encs_assign(
-                       dc, new_ctx, new_ctx->streams, new_ctx->stream_count);
-#endif
 
        if (dc->res_pool->funcs->validate_global) {
                result = dc->res_pool->funcs->validate_global(dc, new_ctx);
@@ -2313,6 +2307,16 @@ enum dc_status dc_validate_global_state(
                if (!dc->res_pool->funcs->validate_bandwidth(dc, new_ctx, fast_validate))
                        result = DC_FAIL_BANDWIDTH_VALIDATE;
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+       /*
+        * Only update link encoder to stream assignment after bandwidth validation passed.
+        * TODO: Split out assignment and validation.
+        */
+       if (result == DC_OK && dc->res_pool->funcs->link_encs_assign && fast_validate == false)
+               dc->res_pool->funcs->link_encs_assign(
+                       dc, new_ctx, new_ctx->streams, new_ctx->stream_count);
+#endif
+
        return result;
 }
 
index 3aac3f4..618e798 100644 (file)
@@ -508,7 +508,8 @@ union dpia_debug_options {
                uint32_t disable_dpia:1;
                uint32_t force_non_lttpr:1;
                uint32_t extend_aux_rd_interval:1;
-               uint32_t reserved:29;
+               uint32_t disable_mst_dsc_work_around:1;
+               uint32_t reserved:28;
        } bits;
        uint32_t raw;
 };
index 180ecd8..b01077a 100644 (file)
@@ -191,6 +191,8 @@ struct dc_link {
                bool dp_skip_DID2;
                bool dp_skip_reset_segment;
                bool dp_mot_reset_segment;
+               /* Some USB4 docks do not handle turning off MST DSC once it has been enabled. */
+               bool dpia_mst_dsc_always_on;
        } wa_flags;
        struct link_mst_stream_allocation_table mst_stream_alloc_table;
 
index 0b788d7..04d7bdd 100644 (file)
@@ -1637,7 +1637,7 @@ void dcn10_reset_hw_ctx_wrap(
 
                        dcn10_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
                        if (hws->funcs.enable_stream_gating)
-                               hws->funcs.enable_stream_gating(dc, pipe_ctx);
+                               hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
                        if (old_clk)
                                old_clk->funcs->cs_power_down(old_clk);
                }
index 4f88376..e6af99a 100644 (file)
@@ -2270,7 +2270,7 @@ void dcn20_reset_hw_ctx_wrap(
 
                        dcn20_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
                        if (hws->funcs.enable_stream_gating)
-                               hws->funcs.enable_stream_gating(dc, pipe_ctx);
+                               hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
                        if (old_clk)
                                old_clk->funcs->cs_power_down(old_clk);
                }
index 5dd1ce9..4d4ac4c 100644 (file)
@@ -602,7 +602,7 @@ void dcn31_reset_hw_ctx_wrap(
 
                        dcn31_reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
                        if (hws->funcs.enable_stream_gating)
-                               hws->funcs.enable_stream_gating(dc, pipe_ctx);
+                               hws->funcs.enable_stream_gating(dc, pipe_ctx_old);
                        if (old_clk)
                                old_clk->funcs->cs_power_down(old_clk);
                }
index 258c573..1f406f2 100644 (file)
@@ -1024,8 +1024,6 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
        uint32_t min_freq, max_freq = 0;
        uint32_t ret = 0;
 
-       phm_get_sysfs_buf(&buf, &size);
-
        switch (type) {
        case PP_SCLK:
                smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetGfxclkFrequency, &now);
@@ -1038,13 +1036,13 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
                else
                        i = 1;
 
-               size += sysfs_emit_at(buf, size, "0: %uMhz %s\n",
+               size += sprintf(buf + size, "0: %uMhz %s\n",
                                        data->gfx_min_freq_limit/100,
                                        i == 0 ? "*" : "");
-               size += sysfs_emit_at(buf, size, "1: %uMhz %s\n",
+               size += sprintf(buf + size, "1: %uMhz %s\n",
                                        i == 1 ? now : SMU10_UMD_PSTATE_GFXCLK,
                                        i == 1 ? "*" : "");
-               size += sysfs_emit_at(buf, size, "2: %uMhz %s\n",
+               size += sprintf(buf + size, "2: %uMhz %s\n",
                                        data->gfx_max_freq_limit/100,
                                        i == 2 ? "*" : "");
                break;
@@ -1052,7 +1050,7 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
                smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetFclkFrequency, &now);
 
                for (i = 0; i < mclk_table->count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                        i,
                                        mclk_table->entries[i].clk / 100,
                                        ((mclk_table->entries[i].clk / 100)
@@ -1067,10 +1065,10 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
                        if (ret)
                                return ret;
 
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK");
-                       size += sysfs_emit_at(buf, size, "0: %10uMhz\n",
+                       size += sprintf(buf + size, "%s:\n", "OD_SCLK");
+                       size += sprintf(buf + size, "0: %10uMhz\n",
                        (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq);
-                       size += sysfs_emit_at(buf, size, "1: %10uMhz\n",
+                       size += sprintf(buf + size, "1: %10uMhz\n",
                        (data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq);
                }
                break;
@@ -1083,8 +1081,8 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr,
                        if (ret)
                                return ret;
 
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE");
-                       size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n",
+                       size += sprintf(buf + size, "%s:\n", "OD_RANGE");
+                       size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
                                min_freq, max_freq);
                }
                break;
index aceebf5..611969b 100644 (file)
@@ -4914,8 +4914,6 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
        int size = 0;
        uint32_t i, now, clock, pcie_speed;
 
-       phm_get_sysfs_buf(&buf, &size);
-
        switch (type) {
        case PP_SCLK:
                smum_send_msg_to_smc(hwmgr, PPSMC_MSG_API_GetSclkFrequency, &clock);
@@ -4928,7 +4926,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
                now = i;
 
                for (i = 0; i < sclk_table->count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                        i, sclk_table->dpm_levels[i].value / 100,
                                        (i == now) ? "*" : "");
                break;
@@ -4943,7 +4941,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
                now = i;
 
                for (i = 0; i < mclk_table->count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                        i, mclk_table->dpm_levels[i].value / 100,
                                        (i == now) ? "*" : "");
                break;
@@ -4957,7 +4955,7 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
                now = i;
 
                for (i = 0; i < pcie_table->count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %s %s\n", i,
+                       size += sprintf(buf + size, "%d: %s %s\n", i,
                                        (pcie_table->dpm_levels[i].value == 0) ? "2.5GT/s, x8" :
                                        (pcie_table->dpm_levels[i].value == 1) ? "5.0GT/s, x16" :
                                        (pcie_table->dpm_levels[i].value == 2) ? "8.0GT/s, x16" : "",
@@ -4965,32 +4963,32 @@ static int smu7_print_clock_levels(struct pp_hwmgr *hwmgr,
                break;
        case OD_SCLK:
                if (hwmgr->od_enabled) {
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK");
+                       size += sprintf(buf + size, "%s:\n", "OD_SCLK");
                        for (i = 0; i < odn_sclk_table->num_of_pl; i++)
-                               size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n",
+                               size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
                                        i, odn_sclk_table->entries[i].clock/100,
                                        odn_sclk_table->entries[i].vddc);
                }
                break;
        case OD_MCLK:
                if (hwmgr->od_enabled) {
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK");
+                       size += sprintf(buf + size, "%s:\n", "OD_MCLK");
                        for (i = 0; i < odn_mclk_table->num_of_pl; i++)
-                               size += sysfs_emit_at(buf, size, "%d: %10uMHz %10umV\n",
+                               size += sprintf(buf + size, "%d: %10uMHz %10umV\n",
                                        i, odn_mclk_table->entries[i].clock/100,
                                        odn_mclk_table->entries[i].vddc);
                }
                break;
        case OD_RANGE:
                if (hwmgr->od_enabled) {
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE");
-                       size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n",
+                       size += sprintf(buf + size, "%s:\n", "OD_RANGE");
+                       size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
                                data->golden_dpm_table.sclk_table.dpm_levels[0].value/100,
                                hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
-                       size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n",
+                       size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
                                data->golden_dpm_table.mclk_table.dpm_levels[0].value/100,
                                hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
-                       size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n",
+                       size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
                                data->odn_dpm_table.min_vddc,
                                data->odn_dpm_table.max_vddc);
                }
index 8e28a8e..03bf8f0 100644 (file)
@@ -1550,8 +1550,6 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr,
        uint32_t i, now;
        int size = 0;
 
-       phm_get_sysfs_buf(&buf, &size);
-
        switch (type) {
        case PP_SCLK:
                now = PHM_GET_FIELD(cgs_read_ind_register(hwmgr->device,
@@ -1561,7 +1559,7 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr,
                                CURR_SCLK_INDEX);
 
                for (i = 0; i < sclk_table->count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                        i, sclk_table->entries[i].clk / 100,
                                        (i == now) ? "*" : "");
                break;
@@ -1573,7 +1571,7 @@ static int smu8_print_clock_levels(struct pp_hwmgr *hwmgr,
                                CURR_MCLK_INDEX);
 
                for (i = SMU8_NUM_NBPMEMORYCLOCK; i > 0; i--)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                        SMU8_NUM_NBPMEMORYCLOCK-i, data->sys_info.nbp_memory_clock[i-1] / 100,
                                        (SMU8_NUM_NBPMEMORYCLOCK-i == now) ? "*" : "");
                break;
index c981fc2..e633665 100644 (file)
@@ -4639,8 +4639,6 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 
        int i, now, size = 0, count = 0;
 
-       phm_get_sysfs_buf(&buf, &size);
-
        switch (type) {
        case PP_SCLK:
                if (data->registry_data.sclk_dpm_key_disabled)
@@ -4654,7 +4652,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
                else
                        count = sclk_table->count;
                for (i = 0; i < count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                        i, sclk_table->dpm_levels[i].value / 100,
                                        (i == now) ? "*" : "");
                break;
@@ -4665,7 +4663,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
                smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex, &now);
 
                for (i = 0; i < mclk_table->count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                        i, mclk_table->dpm_levels[i].value / 100,
                                        (i == now) ? "*" : "");
                break;
@@ -4676,7 +4674,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
                smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex, &now);
 
                for (i = 0; i < soc_table->count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                        i, soc_table->dpm_levels[i].value / 100,
                                        (i == now) ? "*" : "");
                break;
@@ -4688,7 +4686,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
                                PPSMC_MSG_GetClockFreqMHz, CLK_DCEFCLK, &now);
 
                for (i = 0; i < dcef_table->count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                        i, dcef_table->dpm_levels[i].value / 100,
                                        (dcef_table->dpm_levels[i].value / 100 == now) ?
                                        "*" : "");
@@ -4702,7 +4700,7 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
                        gen_speed = pptable->PcieGenSpeed[i];
                        lane_width = pptable->PcieLaneCount[i];
 
-                       size += sysfs_emit_at(buf, size, "%d: %s %s %s\n", i,
+                       size += sprintf(buf + size, "%d: %s %s %s\n", i,
                                        (gen_speed == 0) ? "2.5GT/s," :
                                        (gen_speed == 1) ? "5.0GT/s," :
                                        (gen_speed == 2) ? "8.0GT/s," :
@@ -4721,34 +4719,34 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
 
        case OD_SCLK:
                if (hwmgr->od_enabled) {
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK");
+                       size += sprintf(buf + size, "%s:\n", "OD_SCLK");
                        podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
                        for (i = 0; i < podn_vdd_dep->count; i++)
-                               size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n",
+                               size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
                                        i, podn_vdd_dep->entries[i].clk / 100,
                                                podn_vdd_dep->entries[i].vddc);
                }
                break;
        case OD_MCLK:
                if (hwmgr->od_enabled) {
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK");
+                       size += sprintf(buf + size, "%s:\n", "OD_MCLK");
                        podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;
                        for (i = 0; i < podn_vdd_dep->count; i++)
-                               size += sysfs_emit_at(buf, size, "%d: %10uMhz %10umV\n",
+                               size += sprintf(buf + size, "%d: %10uMhz %10umV\n",
                                        i, podn_vdd_dep->entries[i].clk/100,
                                                podn_vdd_dep->entries[i].vddc);
                }
                break;
        case OD_RANGE:
                if (hwmgr->od_enabled) {
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE");
-                       size += sysfs_emit_at(buf, size, "SCLK: %7uMHz %10uMHz\n",
+                       size += sprintf(buf + size, "%s:\n", "OD_RANGE");
+                       size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n",
                                data->golden_dpm_table.gfx_table.dpm_levels[0].value/100,
                                hwmgr->platform_descriptor.overdriveLimit.engineClock/100);
-                       size += sysfs_emit_at(buf, size, "MCLK: %7uMHz %10uMHz\n",
+                       size += sprintf(buf + size, "MCLK: %7uMHz %10uMHz\n",
                                data->golden_dpm_table.mem_table.dpm_levels[0].value/100,
                                hwmgr->platform_descriptor.overdriveLimit.memoryClock/100);
-                       size += sysfs_emit_at(buf, size, "VDDC: %7umV %11umV\n",
+                       size += sprintf(buf + size, "VDDC: %7umV %11umV\n",
                                data->odn_dpm_table.min_vddc,
                                data->odn_dpm_table.max_vddc);
                }
index f7e783e..a2f4d67 100644 (file)
@@ -2246,8 +2246,6 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
        int i, now, size = 0;
        struct pp_clock_levels_with_latency clocks;
 
-       phm_get_sysfs_buf(&buf, &size);
-
        switch (type) {
        case PP_SCLK:
                PP_ASSERT_WITH_CODE(
@@ -2260,7 +2258,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
                                "Attempt to get gfx clk levels Failed!",
                                return -1);
                for (i = 0; i < clocks.num_levels; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : "");
                break;
@@ -2276,7 +2274,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
                                "Attempt to get memory clk levels Failed!",
                                return -1);
                for (i = 0; i < clocks.num_levels; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz / 1000 == now / 100) ? "*" : "");
                break;
@@ -2294,7 +2292,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
                                "Attempt to get soc clk levels Failed!",
                                return -1);
                for (i = 0; i < clocks.num_levels; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : "");
                break;
@@ -2312,7 +2310,7 @@ static int vega12_print_clock_levels(struct pp_hwmgr *hwmgr,
                                "Attempt to get dcef clk levels Failed!",
                                return -1);
                for (i = 0; i < clocks.num_levels; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz / 1000 == now) ? "*" : "");
                break;
index 03e63be..85d55ab 100644 (file)
@@ -3366,8 +3366,6 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
        int ret = 0;
        uint32_t gen_speed, lane_width, current_gen_speed, current_lane_width;
 
-       phm_get_sysfs_buf(&buf, &size);
-
        switch (type) {
        case PP_SCLK:
                ret = vega20_get_current_clk_freq(hwmgr, PPCLK_GFXCLK, &now);
@@ -3376,13 +3374,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                return ret);
 
                if (vega20_get_sclks(hwmgr, &clocks)) {
-                       size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
+                       size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
                                now / 100);
                        break;
                }
 
                for (i = 0; i < clocks.num_levels; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
                break;
@@ -3394,13 +3392,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                return ret);
 
                if (vega20_get_memclocks(hwmgr, &clocks)) {
-                       size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
+                       size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
                                now / 100);
                        break;
                }
 
                for (i = 0; i < clocks.num_levels; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
                break;
@@ -3412,13 +3410,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                return ret);
 
                if (vega20_get_socclocks(hwmgr, &clocks)) {
-                       size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
+                       size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
                                now / 100);
                        break;
                }
 
                for (i = 0; i < clocks.num_levels; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
                break;
@@ -3430,7 +3428,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                return ret);
 
                for (i = 0; i < fclk_dpm_table->count; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                i, fclk_dpm_table->dpm_levels[i].value,
                                fclk_dpm_table->dpm_levels[i].value == (now / 100) ? "*" : "");
                break;
@@ -3442,13 +3440,13 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                                return ret);
 
                if (vega20_get_dcefclocks(hwmgr, &clocks)) {
-                       size += sysfs_emit_at(buf, size, "0: %uMhz * (DPM disabled)\n",
+                       size += sprintf(buf + size, "0: %uMhz * (DPM disabled)\n",
                                now / 100);
                        break;
                }
 
                for (i = 0; i < clocks.num_levels; i++)
-                       size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n",
+                       size += sprintf(buf + size, "%d: %uMhz %s\n",
                                i, clocks.data[i].clocks_in_khz / 1000,
                                (clocks.data[i].clocks_in_khz == now * 10) ? "*" : "");
                break;
@@ -3462,7 +3460,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                        gen_speed = pptable->PcieGenSpeed[i];
                        lane_width = pptable->PcieLaneCount[i];
 
-                       size += sysfs_emit_at(buf, size, "%d: %s %s %dMhz %s\n", i,
+                       size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i,
                                        (gen_speed == 0) ? "2.5GT/s," :
                                        (gen_speed == 1) ? "5.0GT/s," :
                                        (gen_speed == 2) ? "8.0GT/s," :
@@ -3483,18 +3481,18 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
        case OD_SCLK:
                if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id &&
                    od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) {
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK");
-                       size += sysfs_emit_at(buf, size, "0: %10uMhz\n",
+                       size += sprintf(buf + size, "%s:\n", "OD_SCLK");
+                       size += sprintf(buf + size, "0: %10uMhz\n",
                                od_table->GfxclkFmin);
-                       size += sysfs_emit_at(buf, size, "1: %10uMhz\n",
+                       size += sprintf(buf + size, "1: %10uMhz\n",
                                od_table->GfxclkFmax);
                }
                break;
 
        case OD_MCLK:
                if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) {
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK");
-                       size += sysfs_emit_at(buf, size, "1: %10uMhz\n",
+                       size += sprintf(buf + size, "%s:\n", "OD_MCLK");
+                       size += sprintf(buf + size, "1: %10uMhz\n",
                                od_table->UclkFmax);
                }
 
@@ -3507,14 +3505,14 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id &&
                    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id &&
                    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) {
-                       size += sysfs_emit_at(buf, size, "%s:\n", "OD_VDDC_CURVE");
-                       size += sysfs_emit_at(buf, size, "0: %10uMhz %10dmV\n",
+                       size += sprintf(buf + size, "%s:\n", "OD_VDDC_CURVE");
+                       size += sprintf(buf + size, "0: %10uMhz %10dmV\n",
                                od_table->GfxclkFreq1,
                                od_table->GfxclkVolt1 / VOLTAGE_SCALE);
-                       size += sysfs_emit_at(buf, size, "1: %10uMhz %10dmV\n",
+                       size += sprintf(buf + size, "1: %10uMhz %10dmV\n",
                                od_table->GfxclkFreq2,
                                od_table->GfxclkVolt2 / VOLTAGE_SCALE);
-                       size += sysfs_emit_at(buf, size, "2: %10uMhz %10dmV\n",
+                       size += sprintf(buf + size, "2: %10uMhz %10dmV\n",
                                od_table->GfxclkFreq3,
                                od_table->GfxclkVolt3 / VOLTAGE_SCALE);
                }
@@ -3522,17 +3520,17 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                break;
 
        case OD_RANGE:
-               size += sysfs_emit_at(buf, size, "%s:\n", "OD_RANGE");
+               size += sprintf(buf + size, "%s:\n", "OD_RANGE");
 
                if (od8_settings[OD8_SETTING_GFXCLK_FMIN].feature_id &&
                    od8_settings[OD8_SETTING_GFXCLK_FMAX].feature_id) {
-                       size += sysfs_emit_at(buf, size, "SCLK: %7uMhz %10uMhz\n",
+                       size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n",
                                od8_settings[OD8_SETTING_GFXCLK_FMIN].min_value,
                                od8_settings[OD8_SETTING_GFXCLK_FMAX].max_value);
                }
 
                if (od8_settings[OD8_SETTING_UCLK_FMAX].feature_id) {
-                       size += sysfs_emit_at(buf, size, "MCLK: %7uMhz %10uMhz\n",
+                       size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
                                od8_settings[OD8_SETTING_UCLK_FMAX].min_value,
                                od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
                }
@@ -3543,22 +3541,22 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].feature_id &&
                    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].feature_id &&
                    od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].feature_id) {
-                       size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n",
+                       size += sprintf(buf + size, "VDDC_CURVE_SCLK[0]: %7uMhz %10uMhz\n",
                                od8_settings[OD8_SETTING_GFXCLK_FREQ1].min_value,
                                od8_settings[OD8_SETTING_GFXCLK_FREQ1].max_value);
-                       size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n",
+                       size += sprintf(buf + size, "VDDC_CURVE_VOLT[0]: %7dmV %11dmV\n",
                                od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].min_value,
                                od8_settings[OD8_SETTING_GFXCLK_VOLTAGE1].max_value);
-                       size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n",
+                       size += sprintf(buf + size, "VDDC_CURVE_SCLK[1]: %7uMhz %10uMhz\n",
                                od8_settings[OD8_SETTING_GFXCLK_FREQ2].min_value,
                                od8_settings[OD8_SETTING_GFXCLK_FREQ2].max_value);
-                       size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n",
+                       size += sprintf(buf + size, "VDDC_CURVE_VOLT[1]: %7dmV %11dmV\n",
                                od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].min_value,
                                od8_settings[OD8_SETTING_GFXCLK_VOLTAGE2].max_value);
-                       size += sysfs_emit_at(buf, size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n",
+                       size += sprintf(buf + size, "VDDC_CURVE_SCLK[2]: %7uMhz %10uMhz\n",
                                od8_settings[OD8_SETTING_GFXCLK_FREQ3].min_value,
                                od8_settings[OD8_SETTING_GFXCLK_FREQ3].max_value);
-                       size += sysfs_emit_at(buf, size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n",
+                       size += sprintf(buf + size, "VDDC_CURVE_VOLT[2]: %7dmV %11dmV\n",
                                od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].min_value,
                                od8_settings[OD8_SETTING_GFXCLK_VOLTAGE3].max_value);
                }
index 01168b8..8a32445 100644 (file)
@@ -1468,7 +1468,7 @@ static int smu_disable_dpms(struct smu_context *smu)
                        dev_err(adev->dev, "Failed to disable smu features.\n");
        }
 
-       if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0) &&
+       if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) &&
            adev->gfx.rlc.funcs->stop)
                adev->gfx.rlc.funcs->stop(adev);
 
index b53fee6..65f1728 100644 (file)
@@ -291,7 +291,7 @@ vga_pw_show(struct device *dev, struct device_attribute *attr, char *buf)
        if (rc)
                return rc;
 
-       return sprintf(buf, "%u\n", reg & 1);
+       return sprintf(buf, "%u\n", reg);
 }
 static DEVICE_ATTR_RO(vga_pw);
 
index cd818a6..00e53de 100644 (file)
@@ -225,12 +225,29 @@ static int hyperv_vmbus_remove(struct hv_device *hdev)
 {
        struct drm_device *dev = hv_get_drvdata(hdev);
        struct hyperv_drm_device *hv = to_hv(dev);
+       struct pci_dev *pdev;
 
        drm_dev_unplug(dev);
        drm_atomic_helper_shutdown(dev);
        vmbus_close(hdev->channel);
        hv_set_drvdata(hdev, NULL);
-       vmbus_free_mmio(hv->mem->start, hv->fb_size);
+
+       /*
+        * Free allocated MMIO memory only on Gen2 VMs.
+        * On Gen1 VMs, release the PCI device
+        */
+       if (efi_enabled(EFI_BOOT)) {
+               vmbus_free_mmio(hv->mem->start, hv->fb_size);
+       } else {
+               pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT,
+                                     PCI_DEVICE_ID_HYPERV_VIDEO, NULL);
+               if (!pdev) {
+                       drm_err(dev, "Unable to find PCI Hyper-V video\n");
+                       return -ENODEV;
+               }
+               pci_release_region(pdev, 0);
+               pci_dev_put(pdev);
+       }
 
        return 0;
 }
index 39e11ea..aa72382 100644 (file)
@@ -1640,6 +1640,9 @@ struct intel_dp {
        struct intel_dp_pcon_frl frl;
 
        struct intel_psr psr;
+
+       /* When we last wrote the OUI for eDP */
+       unsigned long last_oui_write;
 };
 
 enum lspcon_vendor {
index be88346..a552f05 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/i2c.h>
 #include <linux/notifier.h>
 #include <linux/slab.h>
+#include <linux/timekeeping.h>
 #include <linux/types.h>
 
 #include <asm/byteorder.h>
@@ -1955,6 +1956,16 @@ intel_edp_init_source_oui(struct intel_dp *intel_dp, bool careful)
 
        if (drm_dp_dpcd_write(&intel_dp->aux, DP_SOURCE_OUI, oui, sizeof(oui)) < 0)
                drm_err(&i915->drm, "Failed to write source OUI\n");
+
+       intel_dp->last_oui_write = jiffies;
+}
+
+void intel_dp_wait_source_oui(struct intel_dp *intel_dp)
+{
+       struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
+       drm_dbg_kms(&i915->drm, "Performing OUI wait\n");
+       wait_remaining_ms_from_jiffies(intel_dp->last_oui_write, 30);
 }
 
 /* If the device supports it, try to set the power state appropriately */
index ce22902..b64145a 100644 (file)
@@ -119,4 +119,6 @@ void intel_dp_pcon_dsc_configure(struct intel_dp *intel_dp,
                                 const struct intel_crtc_state *crtc_state);
 void intel_dp_phy_test(struct intel_encoder *encoder);
 
+void intel_dp_wait_source_oui(struct intel_dp *intel_dp);
+
 #endif /* __INTEL_DP_H__ */
index 569d17b..3897468 100644 (file)
@@ -36,6 +36,7 @@
 
 #include "intel_backlight.h"
 #include "intel_display_types.h"
+#include "intel_dp.h"
 #include "intel_dp_aux_backlight.h"
 
 /* TODO:
@@ -106,6 +107,8 @@ intel_dp_aux_supports_hdr_backlight(struct intel_connector *connector)
        int ret;
        u8 tcon_cap[4];
 
+       intel_dp_wait_source_oui(intel_dp);
+
        ret = drm_dp_dpcd_read(aux, INTEL_EDP_HDR_TCON_CAP0, tcon_cap, sizeof(tcon_cap));
        if (ret != sizeof(tcon_cap))
                return false;
@@ -204,6 +207,8 @@ intel_dp_aux_hdr_enable_backlight(const struct intel_crtc_state *crtc_state,
        int ret;
        u8 old_ctrl, ctrl;
 
+       intel_dp_wait_source_oui(intel_dp);
+
        ret = drm_dp_dpcd_readb(&intel_dp->aux, INTEL_EDP_HDR_GETSET_CTRL_PARAMS, &old_ctrl);
        if (ret != 1) {
                drm_err(&i915->drm, "Failed to read current backlight control mode: %d\n", ret);
@@ -293,6 +298,13 @@ intel_dp_aux_vesa_enable_backlight(const struct intel_crtc_state *crtc_state,
        struct intel_panel *panel = &connector->panel;
        struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 
+       if (!panel->backlight.edp.vesa.info.aux_enable) {
+               u32 pwm_level = intel_backlight_invert_pwm_level(connector,
+                                                                panel->backlight.pwm_level_max);
+
+               panel->backlight.pwm_funcs->enable(crtc_state, conn_state, pwm_level);
+       }
+
        drm_edp_backlight_enable(&intel_dp->aux, &panel->backlight.edp.vesa.info, level);
 }
 
@@ -304,6 +316,10 @@ static void intel_dp_aux_vesa_disable_backlight(const struct drm_connector_state
        struct intel_dp *intel_dp = enc_to_intel_dp(connector->encoder);
 
        drm_edp_backlight_disable(&intel_dp->aux, &panel->backlight.edp.vesa.info);
+
+       if (!panel->backlight.edp.vesa.info.aux_enable)
+               panel->backlight.pwm_funcs->disable(old_conn_state,
+                                                   intel_backlight_invert_pwm_level(connector, 0));
 }
 
 static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector, enum pipe pipe)
@@ -321,6 +337,15 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector,
        if (ret < 0)
                return ret;
 
+       if (!panel->backlight.edp.vesa.info.aux_enable) {
+               ret = panel->backlight.pwm_funcs->setup(connector, pipe);
+               if (ret < 0) {
+                       drm_err(&i915->drm,
+                               "Failed to setup PWM backlight controls for eDP backlight: %d\n",
+                               ret);
+                       return ret;
+               }
+       }
        panel->backlight.max = panel->backlight.edp.vesa.info.max;
        panel->backlight.min = 0;
        if (current_mode == DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD) {
@@ -340,12 +365,7 @@ intel_dp_aux_supports_vesa_backlight(struct intel_connector *connector)
        struct intel_dp *intel_dp = intel_attached_dp(connector);
        struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
-       /* TODO: We currently only support AUX only backlight configurations, not backlights which
-        * require a mix of PWM and AUX controls to work. In the mean time, these machines typically
-        * work just fine using normal PWM controls anyway.
-        */
-       if ((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) &&
-           drm_edp_backlight_supported(intel_dp->edp_dpcd)) {
+       if (drm_edp_backlight_supported(intel_dp->edp_dpcd)) {
                drm_dbg_kms(&i915->drm, "AUX Backlight Control Supported!\n");
                return true;
        }
index 524eaf6..795689e 100644 (file)
@@ -301,7 +301,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
        user_forcewake(gt, true);
        wait_for_suspend(gt);
 
-       intel_pxp_suspend(&gt->pxp, false);
+       intel_pxp_suspend_prepare(&gt->pxp);
 }
 
 static suspend_state_t pm_suspend_target(void)
@@ -326,6 +326,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
        GEM_BUG_ON(gt->awake);
 
        intel_uc_suspend(&gt->uc);
+       intel_pxp_suspend(&gt->pxp);
 
        /*
         * On disabling the device, we want to turn off HW access to memory
@@ -353,7 +354,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
 
 void intel_gt_runtime_suspend(struct intel_gt *gt)
 {
-       intel_pxp_suspend(&gt->pxp, true);
+       intel_pxp_runtime_suspend(&gt->pxp);
        intel_uc_runtime_suspend(&gt->uc);
 
        GT_TRACE(gt, "\n");
@@ -371,7 +372,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
        if (ret)
                return ret;
 
-       intel_pxp_resume(&gt->pxp);
+       intel_pxp_runtime_resume(&gt->pxp);
 
        return 0;
 }
index e1f3625..ed73d9b 100644 (file)
@@ -621,13 +621,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
               FF_MODE2_GS_TIMER_MASK,
               FF_MODE2_GS_TIMER_224,
               0, false);
-
-       /*
-        * Wa_14012131227:dg1
-        * Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
-        */
-       wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1,
-                    GEN9_RHWO_OPTIMIZATION_DISABLE);
 }
 
 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
index 23fd86d..6a7d4e2 100644 (file)
@@ -7,26 +7,29 @@
 #include "intel_pxp_irq.h"
 #include "intel_pxp_pm.h"
 #include "intel_pxp_session.h"
+#include "i915_drv.h"
 
-void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime)
+void intel_pxp_suspend_prepare(struct intel_pxp *pxp)
 {
        if (!intel_pxp_is_enabled(pxp))
                return;
 
        pxp->arb_is_valid = false;
 
-       /*
-        * Contexts using protected objects keep a runtime PM reference, so we
-        * can only runtime suspend when all of them have been either closed
-        * or banned. Therefore, there is no need to invalidate in that
-        * scenario.
-        */
-       if (!runtime)
-               intel_pxp_invalidate(pxp);
+       intel_pxp_invalidate(pxp);
+}
 
-       intel_pxp_fini_hw(pxp);
+void intel_pxp_suspend(struct intel_pxp *pxp)
+{
+       intel_wakeref_t wakeref;
 
-       pxp->hw_state_invalidated = false;
+       if (!intel_pxp_is_enabled(pxp))
+               return;
+
+       with_intel_runtime_pm(&pxp_to_gt(pxp)->i915->runtime_pm, wakeref) {
+               intel_pxp_fini_hw(pxp);
+               pxp->hw_state_invalidated = false;
+       }
 }
 
 void intel_pxp_resume(struct intel_pxp *pxp)
@@ -44,3 +47,15 @@ void intel_pxp_resume(struct intel_pxp *pxp)
 
        intel_pxp_init_hw(pxp);
 }
+
+void intel_pxp_runtime_suspend(struct intel_pxp *pxp)
+{
+       if (!intel_pxp_is_enabled(pxp))
+               return;
+
+       pxp->arb_is_valid = false;
+
+       intel_pxp_fini_hw(pxp);
+
+       pxp->hw_state_invalidated = false;
+}
index c89e97a..16990a3 100644 (file)
@@ -9,16 +9,29 @@
 #include "intel_pxp_types.h"
 
 #ifdef CONFIG_DRM_I915_PXP
-void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime);
+void intel_pxp_suspend_prepare(struct intel_pxp *pxp);
+void intel_pxp_suspend(struct intel_pxp *pxp);
 void intel_pxp_resume(struct intel_pxp *pxp);
+void intel_pxp_runtime_suspend(struct intel_pxp *pxp);
 #else
-static inline void intel_pxp_suspend(struct intel_pxp *pxp, bool runtime)
+static inline void intel_pxp_suspend_prepare(struct intel_pxp *pxp)
+{
+}
+
+static inline void intel_pxp_suspend(struct intel_pxp *pxp)
 {
 }
 
 static inline void intel_pxp_resume(struct intel_pxp *pxp)
 {
 }
-#endif
 
+static inline void intel_pxp_runtime_suspend(struct intel_pxp *pxp)
+{
+}
+#endif
+static inline void intel_pxp_runtime_resume(struct intel_pxp *pxp)
+{
+       intel_pxp_resume(pxp);
+}
 #endif /* __INTEL_PXP_PM_H__ */
index ae11061..39197b4 100644 (file)
@@ -4,8 +4,8 @@ config DRM_MSM
        tristate "MSM DRM"
        depends on DRM
        depends on ARCH_QCOM || SOC_IMX5 || COMPILE_TEST
+       depends on COMMON_CLK
        depends on IOMMU_SUPPORT
-       depends on (OF && COMMON_CLK) || COMPILE_TEST
        depends on QCOM_OCMEM || QCOM_OCMEM=n
        depends on QCOM_LLCC || QCOM_LLCC=n
        depends on QCOM_COMMAND_DB || QCOM_COMMAND_DB=n
index 40577f8..0934544 100644 (file)
@@ -23,8 +23,10 @@ msm-y := \
        hdmi/hdmi_i2c.o \
        hdmi/hdmi_phy.o \
        hdmi/hdmi_phy_8960.o \
+       hdmi/hdmi_phy_8996.o \
        hdmi/hdmi_phy_8x60.o \
        hdmi/hdmi_phy_8x74.o \
+       hdmi/hdmi_pll_8960.o \
        edp/edp.o \
        edp/edp_aux.o \
        edp/edp_bridge.o \
@@ -37,6 +39,7 @@ msm-y := \
        disp/mdp4/mdp4_dtv_encoder.o \
        disp/mdp4/mdp4_lcdc_encoder.o \
        disp/mdp4/mdp4_lvds_connector.o \
+       disp/mdp4/mdp4_lvds_pll.o \
        disp/mdp4/mdp4_irq.o \
        disp/mdp4/mdp4_kms.o \
        disp/mdp4/mdp4_plane.o \
@@ -116,9 +119,6 @@ msm-$(CONFIG_DRM_MSM_DP)+= dp/dp_aux.o \
        dp/dp_audio.o
 
 msm-$(CONFIG_DRM_FBDEV_EMULATION) += msm_fbdev.o
-msm-$(CONFIG_COMMON_CLK) += disp/mdp4/mdp4_lvds_pll.o
-msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_pll_8960.o
-msm-$(CONFIG_COMMON_CLK) += hdmi/hdmi_phy_8996.o
 
 msm-$(CONFIG_DRM_MSM_HDMI_HDCP) += hdmi/hdmi_hdcp.o
 
index 267a880..78aad52 100644 (file)
@@ -1424,17 +1424,24 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
 {
        struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
        struct msm_gpu *gpu = &adreno_gpu->base;
-       u32 gpu_scid, cntl1_regval = 0;
+       u32 cntl1_regval = 0;
 
        if (IS_ERR(a6xx_gpu->llc_mmio))
                return;
 
        if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
-               gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
+               u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);
 
                gpu_scid &= 0x1f;
                cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
                               (gpu_scid << 15) | (gpu_scid << 20);
+
+               /* On A660, the SCID programming for UCHE traffic is done in
+                * A6XX_GBIF_SCACHE_CNTL0[14:10]
+                */
+               if (adreno_is_a660_family(adreno_gpu))
+                       gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
+                               (1 << 8), (gpu_scid << 10) | (1 << 8));
        }
 
        /*
@@ -1471,13 +1478,6 @@ static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
        }
 
        gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, GENMASK(24, 0), cntl1_regval);
-
-       /* On A660, the SCID programming for UCHE traffic is done in
-        * A6XX_GBIF_SCACHE_CNTL0[14:10]
-        */
-       if (adreno_is_a660_family(adreno_gpu))
-               gpu_rmw(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, (0x1f << 10) |
-                       (1 << 8), (gpu_scid << 10) | (1 << 8));
 }
 
 static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
@@ -1640,7 +1640,7 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
        return (unsigned long)busy_time;
 }
 
-void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
+static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
        struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
index 7501849..6e90209 100644 (file)
@@ -777,12 +777,12 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
        struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
 
        a6xx_state->gmu_registers = state_kcalloc(a6xx_state,
-               2, sizeof(*a6xx_state->gmu_registers));
+               3, sizeof(*a6xx_state->gmu_registers));
 
        if (!a6xx_state->gmu_registers)
                return;
 
-       a6xx_state->nr_gmu_registers = 2;
+       a6xx_state->nr_gmu_registers = 3;
 
        /* Get the CX GMU registers from AHB */
        _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0],
index eb40d84..6d36f63 100644 (file)
@@ -33,6 +33,7 @@ struct dp_aux_private {
        bool read;
        bool no_send_addr;
        bool no_send_stop;
+       bool initted;
        u32 offset;
        u32 segment;
 
@@ -331,6 +332,10 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
        }
 
        mutex_lock(&aux->mutex);
+       if (!aux->initted) {
+               ret = -EIO;
+               goto exit;
+       }
 
        dp_aux_update_offset_and_segment(aux, msg);
        dp_aux_transfer_helper(aux, msg, true);
@@ -380,6 +385,8 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
        }
 
        aux->cmd_busy = false;
+
+exit:
        mutex_unlock(&aux->mutex);
 
        return ret;
@@ -431,8 +438,13 @@ void dp_aux_init(struct drm_dp_aux *dp_aux)
 
        aux = container_of(dp_aux, struct dp_aux_private, dp_aux);
 
+       mutex_lock(&aux->mutex);
+
        dp_catalog_aux_enable(aux->catalog, true);
        aux->retry_cnt = 0;
+       aux->initted = true;
+
+       mutex_unlock(&aux->mutex);
 }
 
 void dp_aux_deinit(struct drm_dp_aux *dp_aux)
@@ -441,7 +453,12 @@ void dp_aux_deinit(struct drm_dp_aux *dp_aux)
 
        aux = container_of(dp_aux, struct dp_aux_private, dp_aux);
 
+       mutex_lock(&aux->mutex);
+
+       aux->initted = false;
        dp_catalog_aux_enable(aux->catalog, false);
+
+       mutex_unlock(&aux->mutex);
 }
 
 int dp_aux_register(struct drm_dp_aux *dp_aux)
index f69a125..0afc3b7 100644 (file)
@@ -1658,6 +1658,8 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
        if (!prop) {
                DRM_DEV_DEBUG(dev,
                        "failed to find data lane mapping, using default\n");
+               /* Set the number of date lanes to 4 by default. */
+               msm_host->num_data_lanes = 4;
                return 0;
        }
 
index 09d2d27..dee13fe 100644 (file)
@@ -77,6 +77,7 @@ static int msm_gpu_open(struct inode *inode, struct file *file)
                goto free_priv;
 
        pm_runtime_get_sync(&gpu->pdev->dev);
+       msm_gpu_hw_init(gpu);
        show_priv->state = gpu->funcs->gpu_state_get(gpu);
        pm_runtime_put_sync(&gpu->pdev->dev);
 
index 7936e8d..892c043 100644 (file)
@@ -967,29 +967,18 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
        return ret;
 }
 
-static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
-               struct drm_file *file)
+static int wait_fence(struct msm_gpu_submitqueue *queue, uint32_t fence_id,
+                     ktime_t timeout)
 {
-       struct msm_drm_private *priv = dev->dev_private;
-       struct drm_msm_wait_fence *args = data;
-       ktime_t timeout = to_ktime(args->timeout);
-       struct msm_gpu_submitqueue *queue;
-       struct msm_gpu *gpu = priv->gpu;
        struct dma_fence *fence;
        int ret;
 
-       if (args->pad) {
-               DRM_ERROR("invalid pad: %08x\n", args->pad);
+       if (fence_id > queue->last_fence) {
+               DRM_ERROR_RATELIMITED("waiting on invalid fence: %u (of %u)\n",
+                                     fence_id, queue->last_fence);
                return -EINVAL;
        }
 
-       if (!gpu)
-               return 0;
-
-       queue = msm_submitqueue_get(file->driver_priv, args->queueid);
-       if (!queue)
-               return -ENOENT;
-
        /*
         * Map submitqueue scoped "seqno" (which is actually an idr key)
         * back to underlying dma-fence
@@ -1001,7 +990,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
        ret = mutex_lock_interruptible(&queue->lock);
        if (ret)
                return ret;
-       fence = idr_find(&queue->fence_idr, args->fence);
+       fence = idr_find(&queue->fence_idr, fence_id);
        if (fence)
                fence = dma_fence_get_rcu(fence);
        mutex_unlock(&queue->lock);
@@ -1017,6 +1006,32 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
        }
 
        dma_fence_put(fence);
+
+       return ret;
+}
+
+static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
+               struct drm_file *file)
+{
+       struct msm_drm_private *priv = dev->dev_private;
+       struct drm_msm_wait_fence *args = data;
+       struct msm_gpu_submitqueue *queue;
+       int ret;
+
+       if (args->pad) {
+               DRM_ERROR("invalid pad: %08x\n", args->pad);
+               return -EINVAL;
+       }
+
+       if (!priv->gpu)
+               return 0;
+
+       queue = msm_submitqueue_get(file->driver_priv, args->queueid);
+       if (!queue)
+               return -ENOENT;
+
+       ret = wait_fence(queue, args->fence, to_ktime(args->timeout));
+
        msm_submitqueue_put(queue);
 
        return ret;
index 104fdfc..512d55e 100644 (file)
@@ -1056,8 +1056,7 @@ static int msm_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct
 {
        struct msm_gem_object *msm_obj = to_msm_bo(obj);
 
-       vma->vm_flags &= ~VM_PFNMAP;
-       vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
+       vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
        vma->vm_page_prot = msm_gem_pgprot(msm_obj, vm_get_page_prot(vma->vm_flags));
 
        return 0;
@@ -1121,7 +1120,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
                        break;
                fallthrough;
        default:
-               DRM_DEV_ERROR(dev->dev, "invalid cache flag: %x\n",
+               DRM_DEV_DEBUG(dev->dev, "invalid cache flag: %x\n",
                                (flags & MSM_BO_CACHE_MASK));
                return -EINVAL;
        }
index 3cb029f..282628d 100644 (file)
@@ -772,6 +772,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
                args->nr_cmds);
        if (IS_ERR(submit)) {
                ret = PTR_ERR(submit);
+               submit = NULL;
                goto out_unlock;
        }
 
@@ -904,6 +905,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
        drm_sched_entity_push_job(&submit->base);
 
        args->fence = submit->fence_id;
+       queue->last_fence = submit->fence_id;
 
        msm_reset_syncobjs(syncobjs_to_reset, args->nr_in_syncobjs);
        msm_process_post_deps(post_deps, args->nr_out_syncobjs,
index 59cdd00..48ea2de 100644 (file)
@@ -359,6 +359,8 @@ static inline int msm_gpu_convert_priority(struct msm_gpu *gpu, int prio,
  * @ring_nr:   the ringbuffer used by this submitqueue, which is determined
  *             by the submitqueue's priority
  * @faults:    the number of GPU hangs associated with this submitqueue
+ * @last_fence: the sequence number of the last allocated fence (for error
+ *             checking)
  * @ctx:       the per-drm_file context associated with the submitqueue (ie.
  *             which set of pgtables do submits jobs associated with the
  *             submitqueue use)
@@ -374,6 +376,7 @@ struct msm_gpu_submitqueue {
        u32 flags;
        u32 ring_nr;
        int faults;
+       uint32_t last_fence;
        struct msm_file_private *ctx;
        struct list_head node;
        struct idr fence_idr;
index 8b7473f..384e90c 100644 (file)
@@ -20,6 +20,10 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
        struct msm_gpu *gpu = dev_to_gpu(dev);
        struct dev_pm_opp *opp;
 
+       /*
+        * Note that devfreq_recommended_opp() can modify the freq
+        * to something that actually is in the opp table:
+        */
        opp = devfreq_recommended_opp(dev, freq, flags);
 
        /*
@@ -28,6 +32,7 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq,
         */
        if (gpu->devfreq.idle_freq) {
                gpu->devfreq.idle_freq = *freq;
+               dev_pm_opp_put(opp);
                return 0;
        }
 
@@ -203,9 +208,6 @@ static void msm_devfreq_idle_work(struct kthread_work *work)
        struct msm_gpu *gpu = container_of(df, struct msm_gpu, devfreq);
        unsigned long idle_freq, target_freq = 0;
 
-       if (!df->devfreq)
-               return;
-
        /*
         * Hold devfreq lock to synchronize with get_dev_status()/
         * target() callbacks
@@ -227,6 +229,9 @@ void msm_devfreq_idle(struct msm_gpu *gpu)
 {
        struct msm_gpu_devfreq *df = &gpu->devfreq;
 
+       if (!df->devfreq)
+               return;
+
        msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1),
-                              HRTIMER_MODE_ABS);
+                              HRTIMER_MODE_REL);
 }
index b51d690..88d262b 100644 (file)
@@ -2626,6 +2626,27 @@ nv174_chipset = {
        .fifo     = { 0x00000001, ga102_fifo_new },
 };
 
+static const struct nvkm_device_chip
+nv176_chipset = {
+       .name = "GA106",
+       .bar      = { 0x00000001, tu102_bar_new },
+       .bios     = { 0x00000001, nvkm_bios_new },
+       .devinit  = { 0x00000001, ga100_devinit_new },
+       .fb       = { 0x00000001, ga102_fb_new },
+       .gpio     = { 0x00000001, ga102_gpio_new },
+       .i2c      = { 0x00000001, gm200_i2c_new },
+       .imem     = { 0x00000001, nv50_instmem_new },
+       .mc       = { 0x00000001, ga100_mc_new },
+       .mmu      = { 0x00000001, tu102_mmu_new },
+       .pci      = { 0x00000001, gp100_pci_new },
+       .privring = { 0x00000001, gm200_privring_new },
+       .timer    = { 0x00000001, gk20a_timer_new },
+       .top      = { 0x00000001, ga100_top_new },
+       .disp     = { 0x00000001, ga102_disp_new },
+       .dma      = { 0x00000001, gv100_dma_new },
+       .fifo     = { 0x00000001, ga102_fifo_new },
+};
+
 static const struct nvkm_device_chip
 nv177_chipset = {
        .name = "GA107",
@@ -3072,6 +3093,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
                case 0x168: device->chip = &nv168_chipset; break;
                case 0x172: device->chip = &nv172_chipset; break;
                case 0x174: device->chip = &nv174_chipset; break;
+               case 0x176: device->chip = &nv176_chipset; break;
                case 0x177: device->chip = &nv177_chipset; break;
                default:
                        if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) {
index cdb1ead..82b4c8e 100644 (file)
@@ -207,11 +207,13 @@ int
 gm200_acr_wpr_parse(struct nvkm_acr *acr)
 {
        const struct wpr_header *hdr = (void *)acr->wpr_fw->data;
+       struct nvkm_acr_lsfw *lsfw;
 
        while (hdr->falcon_id != WPR_HEADER_V0_FALCON_ID_INVALID) {
                wpr_header_dump(&acr->subdev, hdr);
-               if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id))
-                       return -ENOMEM;
+               lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id);
+               if (IS_ERR(lsfw))
+                       return PTR_ERR(lsfw);
        }
 
        return 0;
index fb9132a..fd97a93 100644 (file)
@@ -161,11 +161,13 @@ int
 gp102_acr_wpr_parse(struct nvkm_acr *acr)
 {
        const struct wpr_header_v1 *hdr = (void *)acr->wpr_fw->data;
+       struct nvkm_acr_lsfw *lsfw;
 
        while (hdr->falcon_id != WPR_HEADER_V1_FALCON_ID_INVALID) {
                wpr_header_v1_dump(&acr->subdev, hdr);
-               if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id))
-                       return -ENOMEM;
+               lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id);
+               if (IS_ERR(lsfw))
+                       return PTR_ERR(lsfw);
        }
 
        return 0;
index fddaeb0..f642bd6 100644 (file)
@@ -391,7 +391,7 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
 
        bo = kzalloc(sizeof(*bo), GFP_KERNEL);
        if (!bo)
-               return ERR_PTR(-ENOMEM);
+               return NULL;
 
        bo->madv = VC4_MADV_WILLNEED;
        refcount_set(&bo->usecnt, 0);
index f0b3e4c..b61792d 100644 (file)
@@ -337,10 +337,10 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state)
        struct drm_device *dev = state->dev;
        struct vc4_dev *vc4 = to_vc4_dev(dev);
        struct vc4_hvs *hvs = vc4->hvs;
-       struct drm_crtc_state *old_crtc_state;
        struct drm_crtc_state *new_crtc_state;
        struct drm_crtc *crtc;
        struct vc4_hvs_state *old_hvs_state;
+       unsigned int channel;
        int i;
 
        for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
@@ -353,30 +353,32 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state)
                vc4_hvs_mask_underrun(dev, vc4_crtc_state->assigned_channel);
        }
 
-       if (vc4->hvs->hvs5)
-               clk_set_min_rate(hvs->core_clk, 500000000);
-
        old_hvs_state = vc4_hvs_get_old_global_state(state);
-       if (!old_hvs_state)
+       if (IS_ERR(old_hvs_state))
                return;
 
-       for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) {
-               struct vc4_crtc_state *vc4_crtc_state =
-                       to_vc4_crtc_state(old_crtc_state);
-               unsigned int channel = vc4_crtc_state->assigned_channel;
+       for (channel = 0; channel < HVS_NUM_CHANNELS; channel++) {
+               struct drm_crtc_commit *commit;
                int ret;
 
-               if (channel == VC4_HVS_CHANNEL_DISABLED)
+               if (!old_hvs_state->fifo_state[channel].in_use)
                        continue;
 
-               if (!old_hvs_state->fifo_state[channel].in_use)
+               commit = old_hvs_state->fifo_state[channel].pending_commit;
+               if (!commit)
                        continue;
 
-               ret = drm_crtc_commit_wait(old_hvs_state->fifo_state[channel].pending_commit);
+               ret = drm_crtc_commit_wait(commit);
                if (ret)
                        drm_err(dev, "Timed out waiting for commit\n");
+
+               drm_crtc_commit_put(commit);
+               old_hvs_state->fifo_state[channel].pending_commit = NULL;
        }
 
+       if (vc4->hvs->hvs5)
+               clk_set_min_rate(hvs->core_clk, 500000000);
+
        drm_atomic_helper_commit_modeset_disables(dev, state);
 
        vc4_ctm_commit(vc4, state);
@@ -410,8 +412,8 @@ static int vc4_atomic_commit_setup(struct drm_atomic_state *state)
        unsigned int i;
 
        hvs_state = vc4_hvs_get_new_global_state(state);
-       if (!hvs_state)
-               return -EINVAL;
+       if (WARN_ON(IS_ERR(hvs_state)))
+               return PTR_ERR(hvs_state);
 
        for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
                struct vc4_crtc_state *vc4_crtc_state =
@@ -668,12 +670,6 @@ vc4_hvs_channels_duplicate_state(struct drm_private_obj *obj)
 
        for (i = 0; i < HVS_NUM_CHANNELS; i++) {
                state->fifo_state[i].in_use = old_state->fifo_state[i].in_use;
-
-               if (!old_state->fifo_state[i].pending_commit)
-                       continue;
-
-               state->fifo_state[i].pending_commit =
-                       drm_crtc_commit_get(old_state->fifo_state[i].pending_commit);
        }
 
        return &state->base;
@@ -762,8 +758,8 @@ static int vc4_pv_muxing_atomic_check(struct drm_device *dev,
        unsigned int i;
 
        hvs_new_state = vc4_hvs_get_global_state(state);
-       if (!hvs_new_state)
-               return -EINVAL;
+       if (IS_ERR(hvs_new_state))
+               return PTR_ERR(hvs_new_state);
 
        for (i = 0; i < ARRAY_SIZE(hvs_new_state->fifo_state); i++)
                if (!hvs_new_state->fifo_state[i].in_use)
index d86e1ad..5072dbb 100644 (file)
@@ -157,36 +157,6 @@ static void virtio_gpu_config_changed(struct virtio_device *vdev)
        schedule_work(&vgdev->config_changed_work);
 }
 
-static __poll_t virtio_gpu_poll(struct file *filp,
-                               struct poll_table_struct *wait)
-{
-       struct drm_file *drm_file = filp->private_data;
-       struct virtio_gpu_fpriv *vfpriv = drm_file->driver_priv;
-       struct drm_device *dev = drm_file->minor->dev;
-       struct virtio_gpu_device *vgdev = dev->dev_private;
-       struct drm_pending_event *e = NULL;
-       __poll_t mask = 0;
-
-       if (!vgdev->has_virgl_3d || !vfpriv || !vfpriv->ring_idx_mask)
-               return drm_poll(filp, wait);
-
-       poll_wait(filp, &drm_file->event_wait, wait);
-
-       if (!list_empty(&drm_file->event_list)) {
-               spin_lock_irq(&dev->event_lock);
-               e = list_first_entry(&drm_file->event_list,
-                                    struct drm_pending_event, link);
-               drm_file->event_space += e->event->length;
-               list_del(&e->link);
-               spin_unlock_irq(&dev->event_lock);
-
-               kfree(e);
-               mask |= EPOLLIN | EPOLLRDNORM;
-       }
-
-       return mask;
-}
-
 static struct virtio_device_id id_table[] = {
        { VIRTIO_ID_GPU, VIRTIO_DEV_ANY_ID },
        { 0 },
@@ -226,17 +196,7 @@ MODULE_AUTHOR("Dave Airlie <airlied@redhat.com>");
 MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
 MODULE_AUTHOR("Alon Levy");
 
-static const struct file_operations virtio_gpu_driver_fops = {
-       .owner          = THIS_MODULE,
-       .open           = drm_open,
-       .release        = drm_release,
-       .unlocked_ioctl = drm_ioctl,
-       .compat_ioctl   = drm_compat_ioctl,
-       .poll           = virtio_gpu_poll,
-       .read           = drm_read,
-       .llseek         = noop_llseek,
-       .mmap           = drm_gem_mmap
-};
+DEFINE_DRM_GEM_FOPS(virtio_gpu_driver_fops);
 
 static const struct drm_driver driver = {
        .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_RENDER | DRIVER_ATOMIC,
index e0265fe..0a194aa 100644 (file)
@@ -138,7 +138,6 @@ struct virtio_gpu_fence_driver {
        spinlock_t       lock;
 };
 
-#define VIRTGPU_EVENT_FENCE_SIGNALED_INTERNAL 0x10000000
 struct virtio_gpu_fence_event {
        struct drm_pending_event base;
        struct drm_event event;
index 5618a1d..3607646 100644 (file)
@@ -54,7 +54,7 @@ static int virtio_gpu_fence_event_create(struct drm_device *dev,
        if (!e)
                return -ENOMEM;
 
-       e->event.type = VIRTGPU_EVENT_FENCE_SIGNALED_INTERNAL;
+       e->event.type = VIRTGPU_EVENT_FENCE_SIGNALED;
        e->event.length = sizeof(e->event);
 
        ret = drm_event_reserve_init(dev, file, &e->base, &e->event);
index 9f14d99..bc76053 100644 (file)
@@ -773,6 +773,7 @@ static struct xenbus_driver xen_driver = {
        .probe = xen_drv_probe,
        .remove = xen_drv_remove,
        .otherend_changed = displback_changed,
+       .not_essential = true,
 };
 
 static int __init xen_drv_init(void)
index 5d57214..f3ecddc 100644 (file)
@@ -854,7 +854,7 @@ static int asus_input_mapping(struct hid_device *hdev,
                switch (usage->hid & HID_USAGE) {
                case 0x10: asus_map_key_clear(KEY_BRIGHTNESSDOWN);      break;
                case 0x20: asus_map_key_clear(KEY_BRIGHTNESSUP);                break;
-               case 0x35: asus_map_key_clear(KEY_SCREENLOCK);          break;
+               case 0x35: asus_map_key_clear(KEY_DISPLAY_OFF);         break;
                case 0x6c: asus_map_key_clear(KEY_SLEEP);               break;
                case 0x7c: asus_map_key_clear(KEY_MICMUTE);             break;
                case 0x82: asus_map_key_clear(KEY_CAMERA);              break;
index 4ef1c3b..8ee77f4 100644 (file)
@@ -966,24 +966,23 @@ static int ft260_probe(struct hid_device *hdev, const struct hid_device_id *id)
        mutex_init(&dev->lock);
        init_completion(&dev->wait);
 
+       ret = ft260_xfer_status(dev);
+       if (ret)
+               ft260_i2c_reset(hdev);
+
+       i2c_set_adapdata(&dev->adap, dev);
        ret = i2c_add_adapter(&dev->adap);
        if (ret) {
                hid_err(hdev, "failed to add i2c adapter\n");
                goto err_hid_close;
        }
 
-       i2c_set_adapdata(&dev->adap, dev);
-
        ret = sysfs_create_group(&hdev->dev.kobj, &ft260_attr_group);
        if (ret < 0) {
                hid_err(hdev, "failed to create sysfs attrs\n");
                goto err_i2c_free;
        }
 
-       ret = ft260_xfer_status(dev);
-       if (ret)
-               ft260_i2c_reset(hdev);
-
        return 0;
 
 err_i2c_free:
index 95037a3..96a4559 100644 (file)
 #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W        0x0401
 #define USB_DEVICE_ID_HP_X2            0x074d
 #define USB_DEVICE_ID_HP_X2_10_COVER   0x0755
+#define I2C_DEVICE_ID_HP_ENVY_X360_15  0x2d05
 #define I2C_DEVICE_ID_HP_SPECTRE_X360_15       0x2817
 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN   0x2706
 #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN   0x261A
index 2c72ce4..217f2d1 100644 (file)
@@ -160,6 +160,7 @@ static int hidinput_setkeycode(struct input_dev *dev,
        if (usage) {
                *old_keycode = usage->type == EV_KEY ?
                                usage->code : KEY_RESERVED;
+               usage->type = EV_KEY;
                usage->code = ke->keycode;
 
                clear_bit(*old_keycode, dev->keybit);
@@ -324,6 +325,8 @@ static const struct hid_device_id hid_battery_quirks[] = {
          HID_BATTERY_QUIRK_IGNORE },
        { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN),
          HID_BATTERY_QUIRK_IGNORE },
+       { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_ENVY_X360_15),
+         HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
          HID_BATTERY_QUIRK_IGNORE },
        { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
@@ -650,10 +653,9 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
                                                code += KEY_MACRO1;
                                        else
                                                code += BTN_TRIGGER_HAPPY - 0x1e;
-                               } else {
-                                       goto ignore;
+                                       break;
                                }
-                               break;
+                               fallthrough;
                default:
                        switch (field->physical) {
                        case HID_GD_MOUSE:
index 686788e..d7687ce 100644 (file)
@@ -256,8 +256,11 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda
                unsigned long now = jiffies;
                int step_x = msc->touches[id].scroll_x - x;
                int step_y = msc->touches[id].scroll_y - y;
-               int step_hr = ((64 - (int)scroll_speed) * msc->scroll_accel) /
-                             SCROLL_HR_STEPS;
+               int step_hr =
+                       max_t(int,
+                             ((64 - (int)scroll_speed) * msc->scroll_accel) /
+                                       SCROLL_HR_STEPS,
+                             1);
                int step_x_hr = msc->touches[id].scroll_x_hr - x;
                int step_y_hr = msc->touches[id].scroll_y_hr - y;
 
index e1afddb..082376a 100644 (file)
@@ -1888,6 +1888,11 @@ static const struct hid_device_id mt_devices[] = {
                MT_USB_DEVICE(USB_VENDOR_ID_CVTOUCH,
                        USB_DEVICE_ID_CVTOUCH_SCREEN) },
 
+       /* eGalax devices (SAW) */
+       { .driver_data = MT_CLS_EXPORT_ALL_INPUTS,
+               MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
+                       USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER) },
+
        /* eGalax devices (resistive) */
        { .driver_data = MT_CLS_EGALAX,
                MT_USB_DEVICE(USB_VENDOR_ID_DWAV,
index a1e0f68..b6a9a0f 100644 (file)
@@ -189,6 +189,7 @@ struct joycon_rumble_amp_data {
        u16 amp;
 };
 
+#if IS_ENABLED(CONFIG_NINTENDO_FF)
 /*
  * These tables are from
  * https://github.com/dekuNukem/Nintendo_Switch_Reverse_Engineering/blob/master/rumble_data_table.md
@@ -289,6 +290,10 @@ static const struct joycon_rumble_amp_data joycon_rumble_amplitudes[] = {
        { 0xc2, 0x8070,  940 }, { 0xc4, 0x0071,  960 }, { 0xc6, 0x8071,  981 },
        { 0xc8, 0x0072, joycon_max_rumble_amp }
 };
+static const u16 JC_RUMBLE_DFLT_LOW_FREQ = 160;
+static const u16 JC_RUMBLE_DFLT_HIGH_FREQ = 320;
+#endif /* IS_ENABLED(CONFIG_NINTENDO_FF) */
+static const u16 JC_RUMBLE_PERIOD_MS = 50;
 
 /* States for controller state machine */
 enum joycon_ctlr_state {
@@ -397,9 +402,6 @@ struct joycon_input_report {
 #define JC_RUMBLE_DATA_SIZE    8
 #define JC_RUMBLE_QUEUE_SIZE   8
 
-static const u16 JC_RUMBLE_DFLT_LOW_FREQ = 160;
-static const u16 JC_RUMBLE_DFLT_HIGH_FREQ = 320;
-static const u16 JC_RUMBLE_PERIOD_MS = 50;
 static const unsigned short JC_RUMBLE_ZERO_AMP_PKT_CNT = 5;
 
 static const char * const joycon_player_led_names[] = {
@@ -1850,8 +1852,10 @@ static int joycon_leds_create(struct joycon_ctlr *ctlr)
                                      d_name,
                                      "green",
                                      joycon_player_led_names[i]);
-               if (!name)
+               if (!name) {
+                       mutex_unlock(&joycon_input_num_mutex);
                        return -ENOMEM;
+               }
 
                led = &ctlr->leds[i];
                led->name = name;
@@ -1864,6 +1868,7 @@ static int joycon_leds_create(struct joycon_ctlr *ctlr)
                ret = devm_led_classdev_register(&hdev->dev, led);
                if (ret) {
                        hid_err(hdev, "Failed registering %s LED\n", led->name);
+                       mutex_unlock(&joycon_input_num_mutex);
                        return ret;
                }
        }
index d44550a..3a53334 100644 (file)
@@ -205,7 +205,7 @@ static void thrustmaster_model_handler(struct urb *urb)
        struct tm_wheel *tm_wheel = hid_get_drvdata(hdev);
        uint16_t model = 0;
        int i, ret;
-       const struct tm_wheel_info *twi = 0;
+       const struct tm_wheel_info *twi = NULL;
 
        if (urb->status) {
                hid_err(hdev, "URB to get model id failed with error %d\n", urb->status);
@@ -238,7 +238,7 @@ static void thrustmaster_model_handler(struct urb *urb)
                tm_wheel->usb_dev,
                usb_sndctrlpipe(tm_wheel->usb_dev, 0),
                (char *)tm_wheel->change_request,
-               0, 0, // We do not expect any response from the wheel
+               NULL, 0, // We do not expect any response from the wheel
                thrustmaster_change_handler,
                hdev
        );
@@ -272,7 +272,7 @@ static void thrustmaster_remove(struct hid_device *hdev)
 static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
        int ret = 0;
-       struct tm_wheel *tm_wheel = 0;
+       struct tm_wheel *tm_wheel = NULL;
 
        ret = hid_parse(hdev);
        if (ret) {
index 1b486f2..0e1183e 100644 (file)
@@ -76,9 +76,12 @@ enum ish_loader_commands {
 #define LOADER_XFER_MODE_ISHTP                 BIT(1)
 
 /* ISH Transport Loader client unique GUID */
-static const guid_t loader_ishtp_guid =
-       GUID_INIT(0xc804d06a, 0x55bd, 0x4ea7,
-                 0xad, 0xed, 0x1e, 0x31, 0x22, 0x8c, 0x76, 0xdc);
+static const struct ishtp_device_id loader_ishtp_id_table[] = {
+       { .guid = GUID_INIT(0xc804d06a, 0x55bd, 0x4ea7,
+                 0xad, 0xed, 0x1e, 0x31, 0x22, 0x8c, 0x76, 0xdc) },
+       { }
+};
+MODULE_DEVICE_TABLE(ishtp, loader_ishtp_id_table);
 
 #define FILENAME_SIZE                          256
 
@@ -880,7 +883,7 @@ static int loader_init(struct ishtp_cl *loader_ishtp_cl, int reset)
 
        fw_client =
                ishtp_fw_cl_get_client(ishtp_get_ishtp_device(loader_ishtp_cl),
-                                      &loader_ishtp_guid);
+                                      &loader_ishtp_id_table[0].guid);
        if (!fw_client) {
                dev_err(cl_data_to_dev(client_data),
                        "ISH client uuid not found\n");
@@ -1057,7 +1060,7 @@ static int loader_ishtp_cl_reset(struct ishtp_cl_device *cl_device)
 
 static struct ishtp_cl_driver  loader_ishtp_cl_driver = {
        .name = "ish-loader",
-       .guid = &loader_ishtp_guid,
+       .id = loader_ishtp_id_table,
        .probe = loader_ishtp_cl_probe,
        .remove = loader_ishtp_cl_remove,
        .reset = loader_ishtp_cl_reset,
@@ -1083,4 +1086,3 @@ MODULE_DESCRIPTION("ISH ISH-TP Host firmware Loader Client Driver");
 MODULE_AUTHOR("Rushikesh S Kadam <rushikesh.s.kadam@intel.com>");
 
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("ishtp:*");
index 91bf4d0..4338c9b 100644 (file)
 #include "ishtp-hid.h"
 
 /* ISH Transport protocol (ISHTP in short) GUID */
-static const guid_t hid_ishtp_guid =
-       GUID_INIT(0x33AECD58, 0xB679, 0x4E54,
-                 0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26);
+static const struct ishtp_device_id hid_ishtp_id_table[] = {
+       { .guid = GUID_INIT(0x33AECD58, 0xB679, 0x4E54,
+                 0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26), },
+       { }
+};
+MODULE_DEVICE_TABLE(ishtp, hid_ishtp_id_table);
 
 /* Rx ring buffer pool size */
 #define HID_CL_RX_RING_SIZE    32
@@ -662,7 +665,7 @@ static int hid_ishtp_cl_init(struct ishtp_cl *hid_ishtp_cl, int reset)
        ishtp_set_tx_ring_size(hid_ishtp_cl, HID_CL_TX_RING_SIZE);
        ishtp_set_rx_ring_size(hid_ishtp_cl, HID_CL_RX_RING_SIZE);
 
-       fw_client = ishtp_fw_cl_get_client(dev, &hid_ishtp_guid);
+       fw_client = ishtp_fw_cl_get_client(dev, &hid_ishtp_id_table[0].guid);
        if (!fw_client) {
                dev_err(cl_data_to_dev(client_data),
                        "ish client uuid not found\n");
@@ -945,7 +948,7 @@ static const struct dev_pm_ops hid_ishtp_pm_ops = {
 
 static struct ishtp_cl_driver  hid_ishtp_cl_driver = {
        .name = "ish-hid",
-       .guid = &hid_ishtp_guid,
+       .id = hid_ishtp_id_table,
        .probe = hid_ishtp_cl_probe,
        .remove = hid_ishtp_cl_remove,
        .reset = hid_ishtp_cl_reset,
@@ -981,4 +984,3 @@ MODULE_AUTHOR("Daniel Drubin <daniel.drubin@intel.com>");
 MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
 
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("ishtp:*");
index 334eac6..f68aba8 100644 (file)
@@ -241,7 +241,7 @@ static int ishtp_cl_bus_match(struct device *dev, struct device_driver *drv)
        struct ishtp_cl_device *device = to_ishtp_cl_device(dev);
        struct ishtp_cl_driver *driver = to_ishtp_cl_driver(drv);
 
-       return guid_equal(driver->guid,
+       return guid_equal(&driver->id[0].guid,
                          &device->fw_client->props.protocol_name);
 }
 
@@ -350,7 +350,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
 {
        int len;
 
-       len = snprintf(buf, PAGE_SIZE, "ishtp:%s\n", dev_name(dev));
+       len = snprintf(buf, PAGE_SIZE, ISHTP_MODULE_PREFIX "%s\n", dev_name(dev));
        return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
 }
 static DEVICE_ATTR_RO(modalias);
@@ -363,7 +363,7 @@ ATTRIBUTE_GROUPS(ishtp_cl_dev);
 
 static int ishtp_cl_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
-       if (add_uevent_var(env, "MODALIAS=ishtp:%s", dev_name(dev)))
+       if (add_uevent_var(env, "MODALIAS=" ISHTP_MODULE_PREFIX "%s", dev_name(dev)))
                return -ENOMEM;
        return 0;
 }
index 33a6908..2a4cc39 100644 (file)
@@ -2603,6 +2603,9 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
                return;
 
        switch (equivalent_usage) {
+       case HID_DG_CONFIDENCE:
+               wacom_wac->hid_data.confidence = value;
+               break;
        case HID_GD_X:
                wacom_wac->hid_data.x = value;
                break;
@@ -2635,7 +2638,8 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
        }
 
        if (usage->usage_index + 1 == field->report_count) {
-               if (equivalent_usage == wacom_wac->hid_data.last_slot_field)
+               if (equivalent_usage == wacom_wac->hid_data.last_slot_field &&
+                   wacom_wac->hid_data.confidence)
                        wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input);
        }
 }
@@ -2653,6 +2657,8 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev,
 
        wacom_wac->is_invalid_bt_frame = false;
 
+       hid_data->confidence = true;
+
        for (i = 0; i < report->maxfield; i++) {
                struct hid_field *field = report->field[i];
                int j;
index 8b2d4e5..466b62c 100644 (file)
@@ -301,6 +301,7 @@ struct hid_data {
        bool barrelswitch;
        bool barrelswitch2;
        bool serialhi;
+       bool confidence;
        int x;
        int y;
        int pressure;
index 72df563..f8639a4 100644 (file)
@@ -195,8 +195,9 @@ static u32 cbus_i2c_func(struct i2c_adapter *adapter)
 }
 
 static const struct i2c_algorithm cbus_i2c_algo = {
-       .smbus_xfer     = cbus_i2c_smbus_xfer,
-       .functionality  = cbus_i2c_func,
+       .smbus_xfer             = cbus_i2c_smbus_xfer,
+       .smbus_xfer_atomic      = cbus_i2c_smbus_xfer,
+       .functionality          = cbus_i2c_func,
 };
 
 static int cbus_i2c_remove(struct platform_device *pdev)
index 0518745..41446f9 100644 (file)
 #define SMBSLVSTS_HST_NTFY_STS BIT(0)
 
 /* Host Notify Command register bits */
+#define SMBSLVCMD_SMBALERT_DISABLE     BIT(2)
 #define SMBSLVCMD_HST_NTFY_INTREN      BIT(0)
 
 #define STATUS_ERROR_FLAGS     (SMBHSTSTS_FAILED | SMBHSTSTS_BUS_ERR | \
@@ -259,6 +260,7 @@ struct i801_priv {
        struct i2c_adapter adapter;
        unsigned long smba;
        unsigned char original_hstcfg;
+       unsigned char original_hstcnt;
        unsigned char original_slvcmd;
        struct pci_dev *pci_dev;
        unsigned int features;
@@ -641,12 +643,20 @@ static irqreturn_t i801_isr(int irq, void *dev_id)
                i801_isr_byte_done(priv);
 
        /*
-        * Clear irq sources and report transaction result.
+        * Clear remaining IRQ sources: Completion of last command, errors
+        * and the SMB_ALERT signal. SMB_ALERT status is set after signal
+        * assertion independently of the interrupt generation being blocked
+        * or not so clear it always when the status is set.
+        */
+       status &= SMBHSTSTS_INTR | STATUS_ERROR_FLAGS | SMBHSTSTS_SMBALERT_STS;
+       if (status)
+               outb_p(status, SMBHSTSTS(priv));
+       status &= ~SMBHSTSTS_SMBALERT_STS; /* SMB_ALERT not reported */
+       /*
+        * Report transaction result.
         * ->status must be cleared before the next transaction is started.
         */
-       status &= SMBHSTSTS_INTR | STATUS_ERROR_FLAGS;
        if (status) {
-               outb_p(status, SMBHSTSTS(priv));
                priv->status = status;
                complete(&priv->done);
        }
@@ -974,9 +984,13 @@ static void i801_enable_host_notify(struct i2c_adapter *adapter)
        if (!(priv->features & FEATURE_HOST_NOTIFY))
                return;
 
-       if (!(SMBSLVCMD_HST_NTFY_INTREN & priv->original_slvcmd))
-               outb_p(SMBSLVCMD_HST_NTFY_INTREN | priv->original_slvcmd,
-                      SMBSLVCMD(priv));
+       /*
+        * Enable host notify interrupt and block the generation of interrupt
+        * from the SMB_ALERT signal because the driver does not support
+        * SMBus Alert.
+        */
+       outb_p(SMBSLVCMD_HST_NTFY_INTREN | SMBSLVCMD_SMBALERT_DISABLE |
+              priv->original_slvcmd, SMBSLVCMD(priv));
 
        /* clear Host Notify bit to allow a new notification */
        outb_p(SMBSLVSTS_HST_NTFY_STS, SMBSLVSTS(priv));
@@ -1805,7 +1819,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
                outb_p(inb_p(SMBAUXCTL(priv)) &
                       ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv));
 
-       /* Remember original Host Notify setting */
+       /* Remember original Interrupt and Host Notify settings */
+       priv->original_hstcnt = inb_p(SMBHSTCNT(priv)) & ~SMBHSTCNT_KILL;
        if (priv->features & FEATURE_HOST_NOTIFY)
                priv->original_slvcmd = inb_p(SMBSLVCMD(priv));
 
@@ -1869,6 +1884,7 @@ static void i801_remove(struct pci_dev *dev)
 {
        struct i801_priv *priv = pci_get_drvdata(dev);
 
+       outb_p(priv->original_hstcnt, SMBHSTCNT(priv));
        i801_disable_host_notify(priv);
        i801_del_mux(priv);
        i2c_del_adapter(&priv->adapter);
@@ -1892,6 +1908,7 @@ static void i801_shutdown(struct pci_dev *dev)
        struct i801_priv *priv = pci_get_drvdata(dev);
 
        /* Restore config registers to avoid hard hang on some systems */
+       outb_p(priv->original_hstcnt, SMBHSTCNT(priv));
        i801_disable_host_notify(priv);
        pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
 }
@@ -1901,6 +1918,7 @@ static int i801_suspend(struct device *dev)
 {
        struct i801_priv *priv = dev_get_drvdata(dev);
 
+       outb_p(priv->original_hstcnt, SMBHSTCNT(priv));
        pci_write_config_byte(priv->pci_dev, SMBHSTCFG, priv->original_hstcfg);
        return 0;
 }
index 819ab4e..02ddb23 100644 (file)
@@ -423,8 +423,8 @@ static void rk3x_i2c_handle_read(struct rk3x_i2c *i2c, unsigned int ipd)
        if (!(ipd & REG_INT_MBRF))
                return;
 
-       /* ack interrupt */
-       i2c_writel(i2c, REG_INT_MBRF, REG_IPD);
+       /* ack interrupt (read also produces a spurious START flag, clear it too) */
+       i2c_writel(i2c, REG_INT_MBRF | REG_INT_START, REG_IPD);
 
        /* Can only handle a maximum of 32 bytes at a time */
        if (len > 32)
index b9b19a2..66145d2 100644 (file)
@@ -1493,6 +1493,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data)
 {
        struct stm32f7_i2c_dev *i2c_dev = data;
        struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg;
+       struct stm32_i2c_dma *dma = i2c_dev->dma;
        void __iomem *base = i2c_dev->base;
        u32 status, mask;
        int ret = IRQ_HANDLED;
@@ -1518,6 +1519,10 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data)
                dev_dbg(i2c_dev->dev, "<%s>: Receive NACK (addr %x)\n",
                        __func__, f7_msg->addr);
                writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR);
+               if (i2c_dev->use_dma) {
+                       stm32f7_i2c_disable_dma_req(i2c_dev);
+                       dmaengine_terminate_async(dma->chan_using);
+               }
                f7_msg->result = -ENXIO;
        }
 
@@ -1533,7 +1538,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data)
                /* Clear STOP flag */
                writel_relaxed(STM32F7_I2C_ICR_STOPCF, base + STM32F7_I2C_ICR);
 
-               if (i2c_dev->use_dma) {
+               if (i2c_dev->use_dma && !f7_msg->result) {
                        ret = IRQ_WAKE_THREAD;
                } else {
                        i2c_dev->master_mode = false;
@@ -1546,7 +1551,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data)
                if (f7_msg->stop) {
                        mask = STM32F7_I2C_CR2_STOP;
                        stm32f7_i2c_set_bits(base + STM32F7_I2C_CR2, mask);
-               } else if (i2c_dev->use_dma) {
+               } else if (i2c_dev->use_dma && !f7_msg->result) {
                        ret = IRQ_WAKE_THREAD;
                } else if (f7_msg->smbus) {
                        stm32f7_i2c_smbus_rep_start(i2c_dev);
@@ -1583,7 +1588,7 @@ static irqreturn_t stm32f7_i2c_isr_event_thread(int irq, void *data)
        if (!ret) {
                dev_dbg(i2c_dev->dev, "<%s>: Timed out\n", __func__);
                stm32f7_i2c_disable_dma_req(i2c_dev);
-               dmaengine_terminate_all(dma->chan_using);
+               dmaengine_terminate_async(dma->chan_using);
                f7_msg->result = -ETIMEDOUT;
        }
 
@@ -1660,7 +1665,7 @@ static irqreturn_t stm32f7_i2c_isr_error(int irq, void *data)
        /* Disable dma */
        if (i2c_dev->use_dma) {
                stm32f7_i2c_disable_dma_req(i2c_dev);
-               dmaengine_terminate_all(dma->chan_using);
+               dmaengine_terminate_async(dma->chan_using);
        }
 
        i2c_dev->master_mode = false;
@@ -1696,12 +1701,26 @@ static int stm32f7_i2c_xfer(struct i2c_adapter *i2c_adap,
        time_left = wait_for_completion_timeout(&i2c_dev->complete,
                                                i2c_dev->adap.timeout);
        ret = f7_msg->result;
+       if (ret) {
+               if (i2c_dev->use_dma)
+                       dmaengine_synchronize(dma->chan_using);
+
+               /*
+                * It is possible that some unsent data have already been
+                * written into TXDR. To avoid sending old data in a
+                * further transfer, flush TXDR in case of any error
+                */
+               writel_relaxed(STM32F7_I2C_ISR_TXE,
+                              i2c_dev->base + STM32F7_I2C_ISR);
+               goto pm_free;
+       }
 
        if (!time_left) {
                dev_dbg(i2c_dev->dev, "Access to slave 0x%x timed out\n",
                        i2c_dev->msg->addr);
                if (i2c_dev->use_dma)
-                       dmaengine_terminate_all(dma->chan_using);
+                       dmaengine_terminate_sync(dma->chan_using);
+               stm32f7_i2c_wait_free_bus(i2c_dev);
                ret = -ETIMEDOUT;
        }
 
@@ -1744,13 +1763,25 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
        timeout = wait_for_completion_timeout(&i2c_dev->complete,
                                              i2c_dev->adap.timeout);
        ret = f7_msg->result;
-       if (ret)
+       if (ret) {
+               if (i2c_dev->use_dma)
+                       dmaengine_synchronize(dma->chan_using);
+
+               /*
+                * It is possible that some unsent data have already been
+                * written into TXDR. To avoid sending old data in a
+                * further transfer, flush TXDR in case of any error
+                */
+               writel_relaxed(STM32F7_I2C_ISR_TXE,
+                              i2c_dev->base + STM32F7_I2C_ISR);
                goto pm_free;
+       }
 
        if (!timeout) {
                dev_dbg(dev, "Access to slave 0x%x timed out\n", f7_msg->addr);
                if (i2c_dev->use_dma)
-                       dmaengine_terminate_all(dma->chan_using);
+                       dmaengine_terminate_sync(dma->chan_using);
+               stm32f7_i2c_wait_free_bus(i2c_dev);
                ret = -ETIMEDOUT;
                goto pm_free;
        }
index 1ed4daa..9537878 100644 (file)
@@ -104,11 +104,10 @@ static int virtio_i2c_prepare_reqs(struct virtqueue *vq,
 
 static int virtio_i2c_complete_reqs(struct virtqueue *vq,
                                    struct virtio_i2c_req *reqs,
-                                   struct i2c_msg *msgs, int num,
-                                   bool timedout)
+                                   struct i2c_msg *msgs, int num)
 {
        struct virtio_i2c_req *req;
-       bool failed = timedout;
+       bool failed = false;
        unsigned int len;
        int i, j = 0;
 
@@ -130,7 +129,7 @@ static int virtio_i2c_complete_reqs(struct virtqueue *vq,
                        j++;
        }
 
-       return timedout ? -ETIMEDOUT : j;
+       return j;
 }
 
 static int virtio_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
@@ -139,7 +138,6 @@ static int virtio_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
        struct virtio_i2c *vi = i2c_get_adapdata(adap);
        struct virtqueue *vq = vi->vq;
        struct virtio_i2c_req *reqs;
-       unsigned long time_left;
        int count;
 
        reqs = kcalloc(num, sizeof(*reqs), GFP_KERNEL);
@@ -162,11 +160,9 @@ static int virtio_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
        reinit_completion(&vi->completion);
        virtqueue_kick(vq);
 
-       time_left = wait_for_completion_timeout(&vi->completion, adap->timeout);
-       if (!time_left)
-               dev_err(&adap->dev, "virtio i2c backend timeout.\n");
+       wait_for_completion(&vi->completion);
 
-       count = virtio_i2c_complete_reqs(vq, reqs, msgs, count, !time_left);
+       count = virtio_i2c_complete_reqs(vq, reqs, msgs, count);
 
 err_free:
        kfree(reqs);
index 4ff5cd2..3d17a0b 100644 (file)
@@ -542,6 +542,7 @@ static struct xenbus_driver xenkbd_driver = {
        .remove = xenkbd_remove,
        .resume = xenkbd_resume,
        .otherend_changed = xenkbd_backend_changed,
+       .not_essential = true,
 };
 
 static int __init xenkbd_init(void)
index 13cbeb9..58da08c 100644 (file)
@@ -929,10 +929,8 @@ static int __init amd_iommu_v2_init(void)
 {
        int ret;
 
-       pr_info("AMD IOMMUv2 driver by Joerg Roedel <jroedel@suse.de>\n");
-
        if (!amd_iommu_v2_supported()) {
-               pr_info("AMD IOMMUv2 functionality not available on this system\n");
+               pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n");
                /*
                 * Load anyway to provide the symbols to other modules
                 * which may use AMD IOMMUv2 optionally.
@@ -947,6 +945,8 @@ static int __init amd_iommu_v2_init(void)
 
        amd_iommu_register_ppr_notifier(&ppr_nb);
 
+       pr_info("AMD IOMMUv2 loaded and initialized\n");
+
        return 0;
 
 out:
index b39d223..71596fc 100644 (file)
@@ -144,6 +144,7 @@ static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type)
 {
        struct dmar_drhd_unit *d;
        struct intel_iommu *i;
+       int rc = 0;
 
        rcu_read_lock();
        if (list_empty(&dmar_drhd_units))
@@ -169,11 +170,11 @@ static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type)
         */
        if (intel_cap_smts_sanity() &&
            !intel_cap_flts_sanity() && !intel_cap_slts_sanity())
-               return -EOPNOTSUPP;
+               rc = -EOPNOTSUPP;
 
 out:
        rcu_read_unlock();
-       return 0;
+       return rc;
 }
 
 int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu)
index 0bde0c8..b6a8f32 100644 (file)
@@ -1339,13 +1339,11 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
        pte = &pte[pfn_level_offset(pfn, level)];
 
        do {
-               unsigned long level_pfn;
+               unsigned long level_pfn = pfn & level_mask(level);
 
                if (!dma_pte_present(pte))
                        goto next;
 
-               level_pfn = pfn & level_mask(level);
-
                /* If range covers entire pagetable, free it */
                if (start_pfn <= level_pfn &&
                    last_pfn >= level_pfn + level_size(level) - 1) {
@@ -1366,7 +1364,7 @@ static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
                                                       freelist);
                }
 next:
-               pfn += level_size(level);
+               pfn = level_pfn + level_size(level);
        } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
 
        if (first_pte)
index 5cb2608..7f23ad6 100644 (file)
@@ -200,8 +200,8 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte)
 #define DTE_HI_MASK2   GENMASK(7, 4)
 #define DTE_HI_SHIFT1  24 /* shift bit 8 to bit 32 */
 #define DTE_HI_SHIFT2  32 /* shift bit 4 to bit 36 */
-#define PAGE_DESC_HI_MASK1     GENMASK_ULL(39, 36)
-#define PAGE_DESC_HI_MASK2     GENMASK_ULL(35, 32)
+#define PAGE_DESC_HI_MASK1     GENMASK_ULL(35, 32)
+#define PAGE_DESC_HI_MASK2     GENMASK_ULL(39, 36)
 
 static inline phys_addr_t rk_dte_pt_address_v2(u32 dte)
 {
index 79fa36d..cd9cb35 100644 (file)
@@ -1199,6 +1199,7 @@ void cec_received_msg_ts(struct cec_adapter *adap,
                        if (abort)
                                dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT;
                        msg->flags = dst->flags;
+                       msg->sequence = dst->sequence;
                        /* Remove it from the wait_queue */
                        list_del_init(&data->list);
 
index 1094575..90acafd 100644 (file)
@@ -241,6 +241,7 @@ static void *vb2_dma_sg_get_userptr(struct vb2_buffer *vb, struct device *dev,
        buf->offset = vaddr & ~PAGE_MASK;
        buf->size = size;
        buf->dma_sgt = &buf->sg_table;
+       buf->vb = vb;
        vec = vb2_create_framevec(vaddr, size);
        if (IS_ERR(vec))
                goto userptr_fail_pfnvec;
@@ -642,6 +643,7 @@ static void *vb2_dma_sg_attach_dmabuf(struct vb2_buffer *vb, struct device *dev,
        buf->dma_dir = vb->vb2_queue->dma_dir;
        buf->size = size;
        buf->db_attach = dba;
+       buf->vb = vb;
 
        return buf;
 }
index 822ce30..48909fa 100644 (file)
@@ -7,9 +7,9 @@
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/of_graph.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm.h>
+#include <linux/property.h>
 #include <linux/regulator/consumer.h>
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
@@ -2176,7 +2176,7 @@ static struct i2c_driver hi846_i2c_driver = {
        .driver = {
                .name = "hi846",
                .pm = &hi846_pm_ops,
-               .of_match_table = of_match_ptr(hi846_of_match),
+               .of_match_table = hi846_of_match,
        },
        .probe_new = hi846_probe,
        .remove = hi846_remove,
index 8176769..0f3d6b5 100644 (file)
@@ -751,10 +751,6 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *p64,
 /*
  * x86 is the only compat architecture with different struct alignment
  * between 32-bit and 64-bit tasks.
- *
- * On all other architectures, v4l2_event32 and v4l2_event32_time32 are
- * the same as v4l2_event and v4l2_event_time32, so we can use the native
- * handlers, converting v4l2_event to v4l2_event_time32 if necessary.
  */
 struct v4l2_event32 {
        __u32                           type;
@@ -772,21 +768,6 @@ struct v4l2_event32 {
        __u32                           reserved[8];
 };
 
-#ifdef CONFIG_COMPAT_32BIT_TIME
-struct v4l2_event32_time32 {
-       __u32                           type;
-       union {
-               compat_s64              value64;
-               __u8                    data[64];
-       } u;
-       __u32                           pending;
-       __u32                           sequence;
-       struct old_timespec32           timestamp;
-       __u32                           id;
-       __u32                           reserved[8];
-};
-#endif
-
 static int put_v4l2_event32(struct v4l2_event *p64,
                            struct v4l2_event32 __user *p32)
 {
@@ -802,7 +783,22 @@ static int put_v4l2_event32(struct v4l2_event *p64,
        return 0;
 }
 
+#endif
+
 #ifdef CONFIG_COMPAT_32BIT_TIME
+struct v4l2_event32_time32 {
+       __u32                           type;
+       union {
+               compat_s64              value64;
+               __u8                    data[64];
+       } u;
+       __u32                           pending;
+       __u32                           sequence;
+       struct old_timespec32           timestamp;
+       __u32                           id;
+       __u32                           reserved[8];
+};
+
 static int put_v4l2_event32_time32(struct v4l2_event *p64,
                                   struct v4l2_event32_time32 __user *p32)
 {
@@ -818,7 +814,6 @@ static int put_v4l2_event32_time32(struct v4l2_event *p64,
        return 0;
 }
 #endif
-#endif
 
 struct v4l2_edid32 {
        __u32 pad;
@@ -880,9 +875,7 @@ static int put_v4l2_edid32(struct v4l2_edid *p64,
 #define VIDIOC_QUERYBUF32_TIME32       _IOWR('V',  9, struct v4l2_buffer32_time32)
 #define VIDIOC_QBUF32_TIME32           _IOWR('V', 15, struct v4l2_buffer32_time32)
 #define VIDIOC_DQBUF32_TIME32          _IOWR('V', 17, struct v4l2_buffer32_time32)
-#ifdef CONFIG_X86_64
 #define        VIDIOC_DQEVENT32_TIME32         _IOR ('V', 89, struct v4l2_event32_time32)
-#endif
 #define VIDIOC_PREPARE_BUF32_TIME32    _IOWR('V', 93, struct v4l2_buffer32_time32)
 #endif
 
@@ -936,10 +929,10 @@ unsigned int v4l2_compat_translate_cmd(unsigned int cmd)
 #ifdef CONFIG_X86_64
        case VIDIOC_DQEVENT32:
                return VIDIOC_DQEVENT;
+#endif
 #ifdef CONFIG_COMPAT_32BIT_TIME
        case VIDIOC_DQEVENT32_TIME32:
                return VIDIOC_DQEVENT;
-#endif
 #endif
        }
        return cmd;
@@ -1032,10 +1025,10 @@ int v4l2_compat_put_user(void __user *arg, void *parg, unsigned int cmd)
 #ifdef CONFIG_X86_64
        case VIDIOC_DQEVENT32:
                return put_v4l2_event32(parg, arg);
+#endif
 #ifdef CONFIG_COMPAT_32BIT_TIME
        case VIDIOC_DQEVENT32_TIME32:
                return put_v4l2_event32_time32(parg, arg);
-#endif
 #endif
        }
        return 0;
index b883dcc..e201e59 100644 (file)
@@ -241,7 +241,7 @@ static void mtk_smi_larb_config_port_gen2_general(struct device *dev)
 {
        struct mtk_smi_larb *larb = dev_get_drvdata(dev);
        u32 reg, flags_general = larb->larb_gen->flags_general;
-       const u8 *larbostd = larb->larb_gen->ostd[larb->larbid];
+       const u8 *larbostd = larb->larb_gen->ostd ? larb->larb_gen->ostd[larb->larbid] : NULL;
        int i;
 
        if (BIT(larb->larbid) & larb->larb_gen->larb_direct_to_common_mask)
index f4c8e1a..b431cdd 100644 (file)
@@ -1514,6 +1514,12 @@ static int mmc_spi_remove(struct spi_device *spi)
        return 0;
 }
 
+static const struct spi_device_id mmc_spi_dev_ids[] = {
+       { "mmc-spi-slot"},
+       { },
+};
+MODULE_DEVICE_TABLE(spi, mmc_spi_dev_ids);
+
 static const struct of_device_id mmc_spi_of_match_table[] = {
        { .compatible = "mmc-spi-slot", },
        {},
@@ -1525,6 +1531,7 @@ static struct spi_driver mmc_spi_driver = {
                .name =         "mmc_spi",
                .of_match_table = mmc_spi_of_match_table,
        },
+       .id_table =     mmc_spi_dev_ids,
        .probe =        mmc_spi_probe,
        .remove =       mmc_spi_remove,
 };
index afaf337..764ee1b 100644 (file)
@@ -310,7 +310,6 @@ static struct esdhc_soc_data usdhc_imx8qxp_data = {
        .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
                        | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
                        | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES
-                       | ESDHC_FLAG_CQHCI
                        | ESDHC_FLAG_STATE_LOST_IN_LPMODE
                        | ESDHC_FLAG_CLK_RATE_LOST_IN_PM_RUNTIME,
 };
@@ -319,7 +318,6 @@ static struct esdhc_soc_data usdhc_imx8mm_data = {
        .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING
                        | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200
                        | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES
-                       | ESDHC_FLAG_CQHCI
                        | ESDHC_FLAG_STATE_LOST_IN_LPMODE,
 };
 
index 269c865..07c6da1 100644 (file)
@@ -771,7 +771,19 @@ static void sdhci_adma_table_pre(struct sdhci_host *host,
                        len -= offset;
                }
 
-               BUG_ON(len > 65536);
+               /*
+                * The block layer forces a minimum segment size of PAGE_SIZE,
+                * so 'len' can be too big here if PAGE_SIZE >= 64KiB. Write
+                * multiple descriptors, noting that the ADMA table is sized
+                * for 4KiB chunks anyway, so it will be big enough.
+                */
+               while (len > host->max_adma) {
+                       int n = 32 * 1024; /* 32KiB*/
+
+                       __sdhci_adma_write_desc(host, &desc, addr, n, ADMA2_TRAN_VALID);
+                       addr += n;
+                       len -= n;
+               }
 
                /* tran, valid */
                if (len)
@@ -3968,6 +3980,7 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev,
         * descriptor for each segment, plus 1 for a nop end descriptor.
         */
        host->adma_table_cnt = SDHCI_MAX_SEGS * 2 + 1;
+       host->max_adma = 65536;
 
        host->max_timeout_count = 0xE;
 
@@ -4633,10 +4646,12 @@ int sdhci_setup_host(struct sdhci_host *host)
         * be larger than 64 KiB though.
         */
        if (host->flags & SDHCI_USE_ADMA) {
-               if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC)
+               if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) {
+                       host->max_adma = 65532; /* 32-bit alignment */
                        mmc->max_seg_size = 65535;
-               else
+               } else {
                        mmc->max_seg_size = 65536;
+               }
        } else {
                mmc->max_seg_size = mmc->max_req_size;
        }
index bb88355..d7929d7 100644 (file)
@@ -340,7 +340,8 @@ struct sdhci_adma2_64_desc {
 
 /*
  * Maximum segments assuming a 512KiB maximum requisition size and a minimum
- * 4KiB page size.
+ * 4KiB page size. Note this also allows enough for multiple descriptors in
+ * case of PAGE_SIZE >= 64KiB.
  */
 #define SDHCI_MAX_SEGS         128
 
@@ -543,6 +544,7 @@ struct sdhci_host {
        unsigned int blocks;    /* remaining PIO blocks */
 
        int sg_count;           /* Mapped sg entries */
+       int max_adma;           /* Max. length in ADMA descriptor */
 
        void *adma_table;       /* ADMA descriptor table */
        void *align_buffer;     /* Bounce buffer */
index 10506a4..6cccc3d 100644 (file)
@@ -567,9 +567,7 @@ config XEN_NETDEV_BACKEND
 config VMXNET3
        tristate "VMware VMXNET3 ethernet driver"
        depends on PCI && INET
-       depends on !(PAGE_SIZE_64KB || ARM64_64K_PAGES || \
-                    IA64_PAGE_SIZE_64KB || PARISC_PAGE_SIZE_64KB || \
-                    PPC_64K_PAGES)
+       depends on PAGE_SIZE_LESS_THAN_64KB
        help
          This driver supports VMware's vmxnet3 virtual ethernet NIC.
          To compile this driver as a module, choose M here: the
index 01e37b7..2b88f03 100644 (file)
@@ -349,6 +349,19 @@ static const struct of_device_id b53_spi_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, b53_spi_of_match);
 
+static const struct spi_device_id b53_spi_ids[] = {
+       { .name = "bcm5325" },
+       { .name = "bcm5365" },
+       { .name = "bcm5395" },
+       { .name = "bcm5397" },
+       { .name = "bcm5398" },
+       { .name = "bcm53115" },
+       { .name = "bcm53125" },
+       { .name = "bcm53128" },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(spi, b53_spi_ids);
+
 static struct spi_driver b53_spi_driver = {
        .driver = {
                .name   = "b53-switch",
@@ -357,6 +370,7 @@ static struct spi_driver b53_spi_driver = {
        .probe  = b53_spi_probe,
        .remove = b53_spi_remove,
        .shutdown = b53_spi_shutdown,
+       .id_table = b53_spi_ids,
 };
 
 module_spi_driver(b53_spi_driver);
index 43fc308..013e9c0 100644 (file)
@@ -1002,57 +1002,32 @@ static void ksz8_cfg_port_member(struct ksz_device *dev, int port, u8 member)
        data &= ~PORT_VLAN_MEMBERSHIP;
        data |= (member & dev->port_mask);
        ksz_pwrite8(dev, port, P_MIRROR_CTRL, data);
-       dev->ports[port].member = member;
 }
 
 static void ksz8_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
 {
        struct ksz_device *dev = ds->priv;
-       int forward = dev->member;
        struct ksz_port *p;
-       int member = -1;
        u8 data;
 
-       p = &dev->ports[port];
-
        ksz_pread8(dev, port, P_STP_CTRL, &data);
        data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
 
        switch (state) {
        case BR_STATE_DISABLED:
                data |= PORT_LEARN_DISABLE;
-               if (port < dev->phy_port_cnt)
-                       member = 0;
                break;
        case BR_STATE_LISTENING:
                data |= (PORT_RX_ENABLE | PORT_LEARN_DISABLE);
-               if (port < dev->phy_port_cnt &&
-                   p->stp_state == BR_STATE_DISABLED)
-                       member = dev->host_mask | p->vid_member;
                break;
        case BR_STATE_LEARNING:
                data |= PORT_RX_ENABLE;
                break;
        case BR_STATE_FORWARDING:
                data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
-
-               /* This function is also used internally. */
-               if (port == dev->cpu_port)
-                       break;
-
-               /* Port is a member of a bridge. */
-               if (dev->br_member & BIT(port)) {
-                       dev->member |= BIT(port);
-                       member = dev->member;
-               } else {
-                       member = dev->host_mask | p->vid_member;
-               }
                break;
        case BR_STATE_BLOCKING:
                data |= PORT_LEARN_DISABLE;
-               if (port < dev->phy_port_cnt &&
-                   p->stp_state == BR_STATE_DISABLED)
-                       member = dev->host_mask | p->vid_member;
                break;
        default:
                dev_err(ds->dev, "invalid STP state: %d\n", state);
@@ -1060,22 +1035,11 @@ static void ksz8_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
        }
 
        ksz_pwrite8(dev, port, P_STP_CTRL, data);
+
+       p = &dev->ports[port];
        p->stp_state = state;
-       /* Port membership may share register with STP state. */
-       if (member >= 0 && member != p->member)
-               ksz8_cfg_port_member(dev, port, (u8)member);
-
-       /* Check if forwarding needs to be updated. */
-       if (state != BR_STATE_FORWARDING) {
-               if (dev->br_member & BIT(port))
-                       dev->member &= ~BIT(port);
-       }
 
-       /* When topology has changed the function ksz_update_port_member
-        * should be called to modify port forwarding behavior.
-        */
-       if (forward != dev->member)
-               ksz_update_port_member(dev, port);
+       ksz_update_port_member(dev, port);
 }
 
 static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
@@ -1341,7 +1305,7 @@ static void ksz8795_cpu_interface_select(struct ksz_device *dev, int port)
 
 static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 {
-       struct ksz_port *p = &dev->ports[port];
+       struct dsa_switch *ds = dev->ds;
        struct ksz8 *ksz8 = dev->priv;
        const u32 *masks;
        u8 member;
@@ -1368,10 +1332,11 @@ static void ksz8_port_setup(struct ksz_device *dev, int port, bool cpu_port)
                if (!ksz_is_ksz88x3(dev))
                        ksz8795_cpu_interface_select(dev, port);
 
-               member = dev->port_mask;
+               member = dsa_user_ports(ds);
        } else {
-               member = dev->host_mask | p->vid_member;
+               member = BIT(dsa_upstream_port(ds, port));
        }
+
        ksz8_cfg_port_member(dev, port, member);
 }
 
@@ -1392,20 +1357,13 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds)
        ksz_cfg(dev, regs[S_TAIL_TAG_CTRL], masks[SW_TAIL_TAG_ENABLE], true);
 
        p = &dev->ports[dev->cpu_port];
-       p->vid_member = dev->port_mask;
        p->on = 1;
 
        ksz8_port_setup(dev, dev->cpu_port, true);
-       dev->member = dev->host_mask;
 
        for (i = 0; i < dev->phy_port_cnt; i++) {
                p = &dev->ports[i];
 
-               /* Initialize to non-zero so that ksz_cfg_port_member() will
-                * be called.
-                */
-               p->vid_member = BIT(i);
-               p->member = dev->port_mask;
                ksz8_port_stp_state_set(ds, i, BR_STATE_DISABLED);
 
                /* Last port may be disabled. */
index 854e25f..353b5f9 100644 (file)
@@ -391,7 +391,6 @@ static void ksz9477_cfg_port_member(struct ksz_device *dev, int port,
                                    u8 member)
 {
        ksz_pwrite32(dev, port, REG_PORT_VLAN_MEMBERSHIP__4, member);
-       dev->ports[port].member = member;
 }
 
 static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
@@ -400,8 +399,6 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
        struct ksz_device *dev = ds->priv;
        struct ksz_port *p = &dev->ports[port];
        u8 data;
-       int member = -1;
-       int forward = dev->member;
 
        ksz_pread8(dev, port, P_STP_CTRL, &data);
        data &= ~(PORT_TX_ENABLE | PORT_RX_ENABLE | PORT_LEARN_DISABLE);
@@ -409,40 +406,18 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
        switch (state) {
        case BR_STATE_DISABLED:
                data |= PORT_LEARN_DISABLE;
-               if (port != dev->cpu_port)
-                       member = 0;
                break;
        case BR_STATE_LISTENING:
                data |= (PORT_RX_ENABLE | PORT_LEARN_DISABLE);
-               if (port != dev->cpu_port &&
-                   p->stp_state == BR_STATE_DISABLED)
-                       member = dev->host_mask | p->vid_member;
                break;
        case BR_STATE_LEARNING:
                data |= PORT_RX_ENABLE;
                break;
        case BR_STATE_FORWARDING:
                data |= (PORT_TX_ENABLE | PORT_RX_ENABLE);
-
-               /* This function is also used internally. */
-               if (port == dev->cpu_port)
-                       break;
-
-               member = dev->host_mask | p->vid_member;
-               mutex_lock(&dev->dev_mutex);
-
-               /* Port is a member of a bridge. */
-               if (dev->br_member & (1 << port)) {
-                       dev->member |= (1 << port);
-                       member = dev->member;
-               }
-               mutex_unlock(&dev->dev_mutex);
                break;
        case BR_STATE_BLOCKING:
                data |= PORT_LEARN_DISABLE;
-               if (port != dev->cpu_port &&
-                   p->stp_state == BR_STATE_DISABLED)
-                       member = dev->host_mask | p->vid_member;
                break;
        default:
                dev_err(ds->dev, "invalid STP state: %d\n", state);
@@ -451,23 +426,8 @@ static void ksz9477_port_stp_state_set(struct dsa_switch *ds, int port,
 
        ksz_pwrite8(dev, port, P_STP_CTRL, data);
        p->stp_state = state;
-       mutex_lock(&dev->dev_mutex);
-       /* Port membership may share register with STP state. */
-       if (member >= 0 && member != p->member)
-               ksz9477_cfg_port_member(dev, port, (u8)member);
-
-       /* Check if forwarding needs to be updated. */
-       if (state != BR_STATE_FORWARDING) {
-               if (dev->br_member & (1 << port))
-                       dev->member &= ~(1 << port);
-       }
 
-       /* When topology has changed the function ksz_update_port_member
-        * should be called to modify port forwarding behavior.
-        */
-       if (forward != dev->member)
-               ksz_update_port_member(dev, port);
-       mutex_unlock(&dev->dev_mutex);
+       ksz_update_port_member(dev, port);
 }
 
 static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
@@ -1168,10 +1128,10 @@ static void ksz9477_phy_errata_setup(struct ksz_device *dev, int port)
 
 static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
 {
-       u8 data8;
-       u8 member;
-       u16 data16;
        struct ksz_port *p = &dev->ports[port];
+       struct dsa_switch *ds = dev->ds;
+       u8 data8, member;
+       u16 data16;
 
        /* enable tag tail for host port */
        if (cpu_port)
@@ -1250,12 +1210,12 @@ static void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port)
                ksz_pwrite8(dev, port, REG_PORT_XMII_CTRL_1, data8);
                p->phydev.duplex = 1;
        }
-       mutex_lock(&dev->dev_mutex);
+
        if (cpu_port)
-               member = dev->port_mask;
+               member = dsa_user_ports(ds);
        else
-               member = dev->host_mask | p->vid_member;
-       mutex_unlock(&dev->dev_mutex);
+               member = BIT(dsa_upstream_port(ds, port));
+
        ksz9477_cfg_port_member(dev, port, member);
 
        /* clear pending interrupts */
@@ -1276,8 +1236,6 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds)
                        const char *prev_mode;
 
                        dev->cpu_port = i;
-                       dev->host_mask = (1 << dev->cpu_port);
-                       dev->port_mask |= dev->host_mask;
                        p = &dev->ports[i];
 
                        /* Read from XMII register to determine host port
@@ -1312,23 +1270,15 @@ static void ksz9477_config_cpu_port(struct dsa_switch *ds)
 
                        /* enable cpu port */
                        ksz9477_port_setup(dev, i, true);
-                       p->vid_member = dev->port_mask;
                        p->on = 1;
                }
        }
 
-       dev->member = dev->host_mask;
-
        for (i = 0; i < dev->port_cnt; i++) {
                if (i == dev->cpu_port)
                        continue;
                p = &dev->ports[i];
 
-               /* Initialize to non-zero so that ksz_cfg_port_member() will
-                * be called.
-                */
-               p->vid_member = (1 << i);
-               p->member = dev->port_mask;
                ksz9477_port_stp_state_set(ds, i, BR_STATE_DISABLED);
                p->on = 1;
                if (i < dev->phy_port_cnt)
index 7c2968a..8a04302 100644 (file)
 
 void ksz_update_port_member(struct ksz_device *dev, int port)
 {
-       struct ksz_port *p;
+       struct ksz_port *p = &dev->ports[port];
+       struct dsa_switch *ds = dev->ds;
+       u8 port_member = 0, cpu_port;
+       const struct dsa_port *dp;
        int i;
 
-       for (i = 0; i < dev->port_cnt; i++) {
-               if (i == port || i == dev->cpu_port)
+       if (!dsa_is_user_port(ds, port))
+               return;
+
+       dp = dsa_to_port(ds, port);
+       cpu_port = BIT(dsa_upstream_port(ds, port));
+
+       for (i = 0; i < ds->num_ports; i++) {
+               const struct dsa_port *other_dp = dsa_to_port(ds, i);
+               struct ksz_port *other_p = &dev->ports[i];
+               u8 val = 0;
+
+               if (!dsa_is_user_port(ds, i))
                        continue;
-               p = &dev->ports[i];
-               if (!(dev->member & (1 << i)))
+               if (port == i)
+                       continue;
+               if (!dp->bridge_dev || dp->bridge_dev != other_dp->bridge_dev)
                        continue;
 
-               /* Port is a member of the bridge and is forwarding. */
-               if (p->stp_state == BR_STATE_FORWARDING &&
-                   p->member != dev->member)
-                       dev->dev_ops->cfg_port_member(dev, i, dev->member);
+               if (other_p->stp_state == BR_STATE_FORWARDING &&
+                   p->stp_state == BR_STATE_FORWARDING) {
+                       val |= BIT(port);
+                       port_member |= BIT(i);
+               }
+
+               dev->dev_ops->cfg_port_member(dev, i, val | cpu_port);
        }
+
+       dev->dev_ops->cfg_port_member(dev, port, port_member | cpu_port);
 }
 EXPORT_SYMBOL_GPL(ksz_update_port_member);
 
@@ -175,12 +194,6 @@ EXPORT_SYMBOL_GPL(ksz_get_ethtool_stats);
 int ksz_port_bridge_join(struct dsa_switch *ds, int port,
                         struct net_device *br)
 {
-       struct ksz_device *dev = ds->priv;
-
-       mutex_lock(&dev->dev_mutex);
-       dev->br_member |= (1 << port);
-       mutex_unlock(&dev->dev_mutex);
-
        /* port_stp_state_set() will be called after to put the port in
         * appropriate state so there is no need to do anything.
         */
@@ -192,13 +205,6 @@ EXPORT_SYMBOL_GPL(ksz_port_bridge_join);
 void ksz_port_bridge_leave(struct dsa_switch *ds, int port,
                           struct net_device *br)
 {
-       struct ksz_device *dev = ds->priv;
-
-       mutex_lock(&dev->dev_mutex);
-       dev->br_member &= ~(1 << port);
-       dev->member &= ~(1 << port);
-       mutex_unlock(&dev->dev_mutex);
-
        /* port_stp_state_set() will be called after to put the port in
         * forwarding state so there is no need to do anything.
         */
index 1597c63..54b456b 100644 (file)
@@ -25,8 +25,6 @@ struct ksz_port_mib {
 };
 
 struct ksz_port {
-       u16 member;
-       u16 vid_member;
        bool remove_tag;                /* Remove Tag flag set, for ksz8795 only */
        int stp_state;
        struct phy_device phydev;
@@ -83,8 +81,6 @@ struct ksz_device {
        struct ksz_port *ports;
        struct delayed_work mib_read;
        unsigned long mib_read_interval;
-       u16 br_member;
-       u16 member;
        u16 mirror_rx;
        u16 mirror_tx;
        u32 features;                   /* chip specific features */
index 6ea0036..5527301 100644 (file)
@@ -50,11 +50,22 @@ static int mv88e6390_serdes_write(struct mv88e6xxx_chip *chip,
 }
 
 static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
-                                         u16 status, u16 lpa,
+                                         u16 ctrl, u16 status, u16 lpa,
                                          struct phylink_link_state *state)
 {
+       state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK);
+
        if (status & MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID) {
-               state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK);
+               /* The Spped and Duplex Resolved register is 1 if AN is enabled
+                * and complete, or if AN is disabled. So with disabled AN we
+                * still get here on link up. But we want to set an_complete
+                * only if AN was enabled, thus we look at BMCR_ANENABLE.
+                * (According to 802.3-2008 section 22.2.4.2.10, we should be
+                *  able to get this same value from BMSR_ANEGCAPABLE, but tests
+                *  show that these Marvell PHYs don't conform to this part of
+                *  the specificaion - BMSR_ANEGCAPABLE is simply always 1.)
+                */
+               state->an_complete = !!(ctrl & BMCR_ANENABLE);
                state->duplex = status &
                                MV88E6390_SGMII_PHY_STATUS_DUPLEX_FULL ?
                                                 DUPLEX_FULL : DUPLEX_HALF;
@@ -81,6 +92,18 @@ static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
                        dev_err(chip->dev, "invalid PHY speed\n");
                        return -EINVAL;
                }
+       } else if (state->link &&
+                  state->interface != PHY_INTERFACE_MODE_SGMII) {
+               /* If Speed and Duplex Resolved register is 0 and link is up, it
+                * means that AN was enabled, but link partner had it disabled
+                * and the PHY invoked the Auto-Negotiation Bypass feature and
+                * linked anyway.
+                */
+               state->duplex = DUPLEX_FULL;
+               if (state->interface == PHY_INTERFACE_MODE_2500BASEX)
+                       state->speed = SPEED_2500;
+               else
+                       state->speed = SPEED_1000;
        } else {
                state->link = false;
        }
@@ -168,9 +191,15 @@ int mv88e6352_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
                                   int lane, struct phylink_link_state *state)
 {
-       u16 lpa, status;
+       u16 lpa, status, ctrl;
        int err;
 
+       err = mv88e6352_serdes_read(chip, MII_BMCR, &ctrl);
+       if (err) {
+               dev_err(chip->dev, "can't read Serdes PHY control: %d\n", err);
+               return err;
+       }
+
        err = mv88e6352_serdes_read(chip, 0x11, &status);
        if (err) {
                dev_err(chip->dev, "can't read Serdes PHY status: %d\n", err);
@@ -183,7 +212,7 @@ int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
                return err;
        }
 
-       return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state);
+       return mv88e6xxx_serdes_pcs_get_state(chip, ctrl, status, lpa, state);
 }
 
 int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
@@ -883,9 +912,16 @@ int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
        int port, int lane, struct phylink_link_state *state)
 {
-       u16 lpa, status;
+       u16 lpa, status, ctrl;
        int err;
 
+       err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+                                   MV88E6390_SGMII_BMCR, &ctrl);
+       if (err) {
+               dev_err(chip->dev, "can't read Serdes PHY control: %d\n", err);
+               return err;
+       }
+
        err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
                                    MV88E6390_SGMII_PHY_STATUS, &status);
        if (err) {
@@ -900,7 +936,7 @@ static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
                return err;
        }
 
-       return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state);
+       return mv88e6xxx_serdes_pcs_get_state(chip, ctrl, status, lpa, state);
 }
 
 static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
@@ -1271,9 +1307,31 @@ void mv88e6390_serdes_get_regs(struct mv88e6xxx_chip *chip, int port, void *_p)
        }
 }
 
-static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane)
+static int mv88e6393x_serdes_power_lane(struct mv88e6xxx_chip *chip, int lane,
+                                       bool on)
 {
-       u16 reg, pcs;
+       u16 reg;
+       int err;
+
+       err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+                                   MV88E6393X_SERDES_CTRL1, &reg);
+       if (err)
+               return err;
+
+       if (on)
+               reg &= ~(MV88E6393X_SERDES_CTRL1_TX_PDOWN |
+                        MV88E6393X_SERDES_CTRL1_RX_PDOWN);
+       else
+               reg |= MV88E6393X_SERDES_CTRL1_TX_PDOWN |
+                      MV88E6393X_SERDES_CTRL1_RX_PDOWN;
+
+       return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+                                     MV88E6393X_SERDES_CTRL1, reg);
+}
+
+static int mv88e6393x_serdes_erratum_4_6(struct mv88e6xxx_chip *chip, int lane)
+{
+       u16 reg;
        int err;
 
        /* mv88e6393x family errata 4.6:
@@ -1284,26 +1342,45 @@ static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane)
         * It seems that after this workaround the SERDES is automatically
         * powered up (the bit is cleared), so power it down.
         */
-       if (lane == MV88E6393X_PORT0_LANE || lane == MV88E6393X_PORT9_LANE ||
-           lane == MV88E6393X_PORT10_LANE) {
-               err = mv88e6390_serdes_read(chip, lane,
-                                           MDIO_MMD_PHYXS,
-                                           MV88E6393X_SERDES_POC, &reg);
-               if (err)
-                       return err;
+       err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+                                   MV88E6393X_SERDES_POC, &reg);
+       if (err)
+               return err;
 
-               reg &= ~MV88E6393X_SERDES_POC_PDOWN;
-               reg |= MV88E6393X_SERDES_POC_RESET;
+       reg &= ~MV88E6393X_SERDES_POC_PDOWN;
+       reg |= MV88E6393X_SERDES_POC_RESET;
 
-               err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
-                                            MV88E6393X_SERDES_POC, reg);
-               if (err)
-                       return err;
+       err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+                                    MV88E6393X_SERDES_POC, reg);
+       if (err)
+               return err;
 
-               err = mv88e6390_serdes_power_sgmii(chip, lane, false);
-               if (err)
-                       return err;
-       }
+       err = mv88e6390_serdes_power_sgmii(chip, lane, false);
+       if (err)
+               return err;
+
+       return mv88e6393x_serdes_power_lane(chip, lane, false);
+}
+
+int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip)
+{
+       int err;
+
+       err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT0_LANE);
+       if (err)
+               return err;
+
+       err = mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT9_LANE);
+       if (err)
+               return err;
+
+       return mv88e6393x_serdes_erratum_4_6(chip, MV88E6393X_PORT10_LANE);
+}
+
+static int mv88e6393x_serdes_erratum_4_8(struct mv88e6xxx_chip *chip, int lane)
+{
+       u16 reg, pcs;
+       int err;
 
        /* mv88e6393x family errata 4.8:
         * When a SERDES port is operating in 1000BASE-X or SGMII mode link may
@@ -1334,38 +1411,149 @@ static int mv88e6393x_serdes_port_errata(struct mv88e6xxx_chip *chip, int lane)
                                      MV88E6393X_ERRATA_4_8_REG, reg);
 }
 
-int mv88e6393x_serdes_setup_errata(struct mv88e6xxx_chip *chip)
+static int mv88e6393x_serdes_erratum_5_2(struct mv88e6xxx_chip *chip, int lane,
+                                        u8 cmode)
+{
+       static const struct {
+               u16 dev, reg, val, mask;
+       } fixes[] = {
+               { MDIO_MMD_VEND1, 0x8093, 0xcb5a, 0xffff },
+               { MDIO_MMD_VEND1, 0x8171, 0x7088, 0xffff },
+               { MDIO_MMD_VEND1, 0x80c9, 0x311a, 0xffff },
+               { MDIO_MMD_VEND1, 0x80a2, 0x8000, 0xff7f },
+               { MDIO_MMD_VEND1, 0x80a9, 0x0000, 0xfff0 },
+               { MDIO_MMD_VEND1, 0x80a3, 0x0000, 0xf8ff },
+               { MDIO_MMD_PHYXS, MV88E6393X_SERDES_POC,
+                 MV88E6393X_SERDES_POC_RESET, MV88E6393X_SERDES_POC_RESET },
+       };
+       int err, i;
+       u16 reg;
+
+       /* mv88e6393x family errata 5.2:
+        * For optimal signal integrity the following sequence should be applied
+        * to SERDES operating in 10G mode. These registers only apply to 10G
+        * operation and have no effect on other speeds.
+        */
+       if (cmode != MV88E6393X_PORT_STS_CMODE_10GBASER)
+               return 0;
+
+       for (i = 0; i < ARRAY_SIZE(fixes); ++i) {
+               err = mv88e6390_serdes_read(chip, lane, fixes[i].dev,
+                                           fixes[i].reg, &reg);
+               if (err)
+                       return err;
+
+               reg &= ~fixes[i].mask;
+               reg |= fixes[i].val;
+
+               err = mv88e6390_serdes_write(chip, lane, fixes[i].dev,
+                                            fixes[i].reg, reg);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int mv88e6393x_serdes_fix_2500basex_an(struct mv88e6xxx_chip *chip,
+                                             int lane, u8 cmode, bool on)
 {
+       u16 reg;
        int err;
 
-       err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT0_LANE);
+       if (cmode != MV88E6XXX_PORT_STS_CMODE_2500BASEX)
+               return 0;
+
+       /* Inband AN is broken on Amethyst in 2500base-x mode when set by
+        * standard mechanism (via cmode).
+        * We can get around this by configuring the PCS mode to 1000base-x
+        * and then writing value 0x58 to register 1e.8000. (This must be done
+        * while SerDes receiver and transmitter are disabled, which is, when
+        * this function is called.)
+        * It seem that when we do this configuration to 2500base-x mode (by
+        * changing PCS mode to 1000base-x and frequency to 3.125 GHz from
+        * 1.25 GHz) and then configure to sgmii or 1000base-x, the device
+        * thinks that it already has SerDes at 1.25 GHz and does not change
+        * the 1e.8000 register, leaving SerDes at 3.125 GHz.
+        * To avoid this, change PCS mode back to 2500base-x when disabling
+        * SerDes from 2500base-x mode.
+        */
+       err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+                                   MV88E6393X_SERDES_POC, &reg);
+       if (err)
+               return err;
+
+       reg &= ~(MV88E6393X_SERDES_POC_PCS_MASK | MV88E6393X_SERDES_POC_AN);
+       if (on)
+               reg |= MV88E6393X_SERDES_POC_PCS_1000BASEX |
+                      MV88E6393X_SERDES_POC_AN;
+       else
+               reg |= MV88E6393X_SERDES_POC_PCS_2500BASEX;
+       reg |= MV88E6393X_SERDES_POC_RESET;
+
+       err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+                                    MV88E6393X_SERDES_POC, reg);
        if (err)
                return err;
 
-       err = mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT9_LANE);
+       err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_VEND1, 0x8000, 0x58);
        if (err)
                return err;
 
-       return mv88e6393x_serdes_port_errata(chip, MV88E6393X_PORT10_LANE);
+       return 0;
 }
 
 int mv88e6393x_serdes_power(struct mv88e6xxx_chip *chip, int port, int lane,
                            bool on)
 {
        u8 cmode = chip->ports[port].cmode;
+       int err;
 
        if (port != 0 && port != 9 && port != 10)
                return -EOPNOTSUPP;
 
+       if (on) {
+               err = mv88e6393x_serdes_erratum_4_8(chip, lane);
+               if (err)
+                       return err;
+
+               err = mv88e6393x_serdes_erratum_5_2(chip, lane, cmode);
+               if (err)
+                       return err;
+
+               err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode,
+                                                        true);
+               if (err)
+                       return err;
+
+               err = mv88e6393x_serdes_power_lane(chip, lane, true);
+               if (err)
+                       return err;
+       }
+
        switch (cmode) {
        case MV88E6XXX_PORT_STS_CMODE_SGMII:
        case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
        case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
-               return mv88e6390_serdes_power_sgmii(chip, lane, on);
+               err = mv88e6390_serdes_power_sgmii(chip, lane, on);
+               break;
        case MV88E6393X_PORT_STS_CMODE_5GBASER:
        case MV88E6393X_PORT_STS_CMODE_10GBASER:
-               return mv88e6390_serdes_power_10g(chip, lane, on);
+               err = mv88e6390_serdes_power_10g(chip, lane, on);
+               break;
        }
 
-       return 0;
+       if (err)
+               return err;
+
+       if (!on) {
+               err = mv88e6393x_serdes_power_lane(chip, lane, false);
+               if (err)
+                       return err;
+
+               err = mv88e6393x_serdes_fix_2500basex_an(chip, lane, cmode,
+                                                        false);
+       }
+
+       return err;
 }
index cbb3ba3..8dd8ed2 100644 (file)
 #define MV88E6393X_SERDES_POC_PCS_MASK         0x0007
 #define MV88E6393X_SERDES_POC_RESET            BIT(15)
 #define MV88E6393X_SERDES_POC_PDOWN            BIT(5)
+#define MV88E6393X_SERDES_POC_AN               BIT(3)
+#define MV88E6393X_SERDES_CTRL1                        0xf003
+#define MV88E6393X_SERDES_CTRL1_TX_PDOWN       BIT(9)
+#define MV88E6393X_SERDES_CTRL1_RX_PDOWN       BIT(8)
 
 #define MV88E6393X_ERRATA_4_8_REG              0xF074
 #define MV88E6393X_ERRATA_4_8_BIT              BIT(14)
index a429c97..147ca39 100644 (file)
@@ -1256,8 +1256,12 @@ qca8k_setup(struct dsa_switch *ds)
                /* Set initial MTU for every port.
                 * We have only have a general MTU setting. So track
                 * every port and set the max across all port.
+                * Set per port MTU to 1500 as the MTU change function
+                * will add the overhead and if its set to 1518 then it
+                * will apply the overhead again and we will end up with
+                * MTU of 1536 instead of 1518
                 */
-               priv->port_mtu[i] = ETH_FRAME_LEN + ETH_FCS_LEN;
+               priv->port_mtu[i] = ETH_DATA_LEN;
        }
 
        /* Special GLOBAL_FC_THRESH value are needed for ar8327 switch */
@@ -1433,6 +1437,12 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 
                qca8k_write(priv, QCA8K_REG_SGMII_CTRL, val);
 
+               /* From original code is reported port instability as SGMII also
+                * require delay set. Apply advised values here or take them from DT.
+                */
+               if (state->interface == PHY_INTERFACE_MODE_SGMII)
+                       qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg);
+
                /* For qca8327/qca8328/qca8334/qca8338 sgmii is unique and
                 * falling edge is set writing in the PORT0 PAD reg
                 */
@@ -1455,12 +1465,6 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
                                        QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE,
                                        val);
 
-               /* From original code is reported port instability as SGMII also
-                * require delay set. Apply advised values here or take them from DT.
-                */
-               if (state->interface == PHY_INTERFACE_MODE_SGMII)
-                       qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg);
-
                break;
        default:
                dev_err(ds->dev, "xMII mode %s not supported for port %d\n",
index baaae97..078ca4c 100644 (file)
 #define RTL8365MB_LEARN_LIMIT_MAX_8365MB_VC    2112
 
 /* Family-specific data and limits */
+#define RTL8365MB_PHYADDRMAX   7
 #define RTL8365MB_NUM_PHYREGS  32
 #define RTL8365MB_PHYREGMAX    (RTL8365MB_NUM_PHYREGS - 1)
 #define RTL8365MB_MAX_NUM_PORTS        (RTL8365MB_CPU_PORT_NUM_8365MB_VC + 1)
 #define RTL8365MB_INDIRECT_ACCESS_STATUS_REG                   0x1F01
 #define RTL8365MB_INDIRECT_ACCESS_ADDRESS_REG                  0x1F02
 #define   RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_5_1_MASK    GENMASK(4, 0)
-#define   RTL8365MB_INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK                GENMASK(6, 5)
+#define   RTL8365MB_INDIRECT_ACCESS_ADDRESS_PHYNUM_MASK                GENMASK(7, 5)
 #define   RTL8365MB_INDIRECT_ACCESS_ADDRESS_OCPADR_9_6_MASK    GENMASK(11, 8)
 #define   RTL8365MB_PHY_BASE                                   0x2000
 #define RTL8365MB_INDIRECT_ACCESS_WRITE_DATA_REG               0x1F03
@@ -679,6 +680,9 @@ static int rtl8365mb_phy_read(struct realtek_smi *smi, int phy, int regnum)
        u16 val;
        int ret;
 
+       if (phy > RTL8365MB_PHYADDRMAX)
+               return -EINVAL;
+
        if (regnum > RTL8365MB_PHYREGMAX)
                return -EINVAL;
 
@@ -704,6 +708,9 @@ static int rtl8365mb_phy_write(struct realtek_smi *smi, int phy, int regnum,
        u32 ocp_addr;
        int ret;
 
+       if (phy > RTL8365MB_PHYADDRMAX)
+               return -EINVAL;
+
        if (regnum > RTL8365MB_PHYREGMAX)
                return -EINVAL;
 
index 23b2d39..ace691d 100644 (file)
 
 #define AQ_DEVICE_ID_AQC113DEV 0x00C0
 #define AQ_DEVICE_ID_AQC113CS  0x94C0
+#define AQ_DEVICE_ID_AQC113CA  0x34C0
 #define AQ_DEVICE_ID_AQC114CS  0x93C0
 #define AQ_DEVICE_ID_AQC113    0x04C0
 #define AQ_DEVICE_ID_AQC113C   0x14C0
 #define AQ_DEVICE_ID_AQC115C   0x12C0
+#define AQ_DEVICE_ID_AQC116C   0x11C0
 
 #define HW_ATL_NIC_NAME "Marvell (aQuantia) AQtion 10Gbit Network Adapter"
 
 
 #define AQ_NIC_RATE_10G                BIT(0)
 #define AQ_NIC_RATE_5G         BIT(1)
-#define AQ_NIC_RATE_5GSR       BIT(2)
-#define AQ_NIC_RATE_2G5                BIT(3)
-#define AQ_NIC_RATE_1G         BIT(4)
-#define AQ_NIC_RATE_100M       BIT(5)
-#define AQ_NIC_RATE_10M                BIT(6)
-#define AQ_NIC_RATE_1G_HALF    BIT(7)
-#define AQ_NIC_RATE_100M_HALF  BIT(8)
-#define AQ_NIC_RATE_10M_HALF   BIT(9)
+#define AQ_NIC_RATE_2G5                BIT(2)
+#define AQ_NIC_RATE_1G         BIT(3)
+#define AQ_NIC_RATE_100M       BIT(4)
+#define AQ_NIC_RATE_10M                BIT(5)
+#define AQ_NIC_RATE_1G_HALF    BIT(6)
+#define AQ_NIC_RATE_100M_HALF  BIT(7)
+#define AQ_NIC_RATE_10M_HALF   BIT(8)
 
-#define AQ_NIC_RATE_EEE_10G    BIT(10)
-#define AQ_NIC_RATE_EEE_5G     BIT(11)
-#define AQ_NIC_RATE_EEE_2G5    BIT(12)
-#define AQ_NIC_RATE_EEE_1G     BIT(13)
-#define AQ_NIC_RATE_EEE_100M   BIT(14)
+#define AQ_NIC_RATE_EEE_10G    BIT(9)
+#define AQ_NIC_RATE_EEE_5G     BIT(10)
+#define AQ_NIC_RATE_EEE_2G5    BIT(11)
+#define AQ_NIC_RATE_EEE_1G     BIT(12)
+#define AQ_NIC_RATE_EEE_100M   BIT(13)
 #define AQ_NIC_RATE_EEE_MSK     (AQ_NIC_RATE_EEE_10G |\
                                 AQ_NIC_RATE_EEE_5G |\
                                 AQ_NIC_RATE_EEE_2G5 |\
index 062a300..dbd2846 100644 (file)
@@ -80,6 +80,8 @@ struct aq_hw_link_status_s {
 };
 
 struct aq_stats_s {
+       u64 brc;
+       u64 btc;
        u64 uprc;
        u64 mprc;
        u64 bprc;
index 1acf544..33f1a13 100644 (file)
@@ -316,18 +316,22 @@ int aq_nic_ndev_register(struct aq_nic_s *self)
        aq_macsec_init(self);
 #endif
 
-       mutex_lock(&self->fwreq_mutex);
-       err = self->aq_fw_ops->get_mac_permanent(self->aq_hw, addr);
-       mutex_unlock(&self->fwreq_mutex);
-       if (err)
-               goto err_exit;
+       if (platform_get_ethdev_address(&self->pdev->dev, self->ndev) != 0) {
+               // If DT has none or an invalid one, ask device for MAC address
+               mutex_lock(&self->fwreq_mutex);
+               err = self->aq_fw_ops->get_mac_permanent(self->aq_hw, addr);
+               mutex_unlock(&self->fwreq_mutex);
 
-       eth_hw_addr_set(self->ndev, addr);
+               if (err)
+                       goto err_exit;
 
-       if (!is_valid_ether_addr(self->ndev->dev_addr) ||
-           !aq_nic_is_valid_ether_addr(self->ndev->dev_addr)) {
-               netdev_warn(self->ndev, "MAC is invalid, will use random.");
-               eth_hw_addr_random(self->ndev);
+               if (is_valid_ether_addr(addr) &&
+                   aq_nic_is_valid_ether_addr(addr)) {
+                       eth_hw_addr_set(self->ndev, addr);
+               } else {
+                       netdev_warn(self->ndev, "MAC is invalid, will use random.");
+                       eth_hw_addr_random(self->ndev);
+               }
        }
 
 #if defined(AQ_CFG_MAC_ADDR_PERMANENT)
@@ -905,8 +909,14 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
        data[++i] = stats->mbtc;
        data[++i] = stats->bbrc;
        data[++i] = stats->bbtc;
-       data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
-       data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
+       if (stats->brc)
+               data[++i] = stats->brc;
+       else
+               data[++i] = stats->ubrc + stats->mbrc + stats->bbrc;
+       if (stats->btc)
+               data[++i] = stats->btc;
+       else
+               data[++i] = stats->ubtc + stats->mbtc + stats->bbtc;
        data[++i] = stats->dma_pkt_rc;
        data[++i] = stats->dma_pkt_tc;
        data[++i] = stats->dma_oct_rc;
index d4b1976..797a951 100644 (file)
@@ -49,6 +49,8 @@ static const struct pci_device_id aq_pci_tbl[] = {
        { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113), },
        { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113C), },
        { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC115C), },
+       { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113CA), },
+       { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC116C), },
 
        {}
 };
@@ -85,7 +87,10 @@ static const struct aq_board_revision_s hw_atl_boards[] = {
        { AQ_DEVICE_ID_AQC113CS,        AQ_HWREV_ANY,   &hw_atl2_ops, &hw_atl2_caps_aqc113, },
        { AQ_DEVICE_ID_AQC114CS,        AQ_HWREV_ANY,   &hw_atl2_ops, &hw_atl2_caps_aqc113, },
        { AQ_DEVICE_ID_AQC113C,         AQ_HWREV_ANY,   &hw_atl2_ops, &hw_atl2_caps_aqc113, },
-       { AQ_DEVICE_ID_AQC115C,         AQ_HWREV_ANY,   &hw_atl2_ops, &hw_atl2_caps_aqc113, },
+       { AQ_DEVICE_ID_AQC115C,         AQ_HWREV_ANY,   &hw_atl2_ops, &hw_atl2_caps_aqc115c, },
+       { AQ_DEVICE_ID_AQC113CA,        AQ_HWREV_ANY,   &hw_atl2_ops, &hw_atl2_caps_aqc113, },
+       { AQ_DEVICE_ID_AQC116C,         AQ_HWREV_ANY,   &hw_atl2_ops, &hw_atl2_caps_aqc116c, },
+
 };
 
 MODULE_DEVICE_TABLE(pci, aq_pci_tbl);
index 24122cc..81b3756 100644 (file)
@@ -298,13 +298,14 @@ bool aq_ring_tx_clean(struct aq_ring_s *self)
                        }
                }
 
-               if (unlikely(buff->is_eop)) {
+               if (unlikely(buff->is_eop && buff->skb)) {
                        u64_stats_update_begin(&self->stats.tx.syncp);
                        ++self->stats.tx.packets;
                        self->stats.tx.bytes += buff->skb->len;
                        u64_stats_update_end(&self->stats.tx.syncp);
 
                        dev_kfree_skb_any(buff->skb);
+                       buff->skb = NULL;
                }
                buff->pa = 0U;
                buff->eop_index = 0xffffU;
index d281322..f4774cf 100644 (file)
@@ -362,9 +362,6 @@ unsigned int aq_vec_get_sw_stats(struct aq_vec_s *self, const unsigned int tc, u
 {
        unsigned int count;
 
-       WARN_ONCE(!aq_vec_is_valid_tc(self, tc),
-                 "Invalid tc %u (#rx=%u, #tx=%u)\n",
-                 tc, self->rx_rings, self->tx_rings);
        if (!aq_vec_is_valid_tc(self, tc))
                return 0;
 
index 3f1704c..7e88d72 100644 (file)
@@ -867,12 +867,20 @@ static int hw_atl_fw1x_deinit(struct aq_hw_s *self)
 int hw_atl_utils_update_stats(struct aq_hw_s *self)
 {
        struct aq_stats_s *cs = &self->curr_stats;
+       struct aq_stats_s curr_stats = *cs;
        struct hw_atl_utils_mbox mbox;
+       bool corrupted_stats = false;
 
        hw_atl_utils_mpi_read_stats(self, &mbox);
 
-#define AQ_SDELTA(_N_) (self->curr_stats._N_ += \
-                       mbox.stats._N_ - self->last_stats._N_)
+#define AQ_SDELTA(_N_)  \
+do { \
+       if (!corrupted_stats && \
+           ((s64)(mbox.stats._N_ - self->last_stats._N_)) >= 0) \
+               curr_stats._N_ += mbox.stats._N_ - self->last_stats._N_; \
+       else \
+               corrupted_stats = true; \
+} while (0)
 
        if (self->aq_link_status.mbps) {
                AQ_SDELTA(uprc);
@@ -892,6 +900,9 @@ int hw_atl_utils_update_stats(struct aq_hw_s *self)
                AQ_SDELTA(bbrc);
                AQ_SDELTA(bbtc);
                AQ_SDELTA(dpc);
+
+               if (!corrupted_stats)
+                       *cs = curr_stats;
        }
 #undef AQ_SDELTA
 
index eac631c..4d4cfbc 100644 (file)
@@ -132,9 +132,6 @@ static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed)
        if (speed & AQ_NIC_RATE_5G)
                rate |= FW2X_RATE_5G;
 
-       if (speed & AQ_NIC_RATE_5GSR)
-               rate |= FW2X_RATE_5G;
-
        if (speed & AQ_NIC_RATE_2G5)
                rate |= FW2X_RATE_2G5;
 
index c98708b..5dfc751 100644 (file)
@@ -65,11 +65,25 @@ const struct aq_hw_caps_s hw_atl2_caps_aqc113 = {
                          AQ_NIC_RATE_5G  |
                          AQ_NIC_RATE_2G5 |
                          AQ_NIC_RATE_1G  |
-                         AQ_NIC_RATE_1G_HALF   |
                          AQ_NIC_RATE_100M      |
-                         AQ_NIC_RATE_100M_HALF |
-                         AQ_NIC_RATE_10M       |
-                         AQ_NIC_RATE_10M_HALF,
+                         AQ_NIC_RATE_10M,
+};
+
+const struct aq_hw_caps_s hw_atl2_caps_aqc115c = {
+       DEFAULT_BOARD_BASIC_CAPABILITIES,
+       .media_type = AQ_HW_MEDIA_TYPE_TP,
+       .link_speed_msk = AQ_NIC_RATE_2G5 |
+                         AQ_NIC_RATE_1G  |
+                         AQ_NIC_RATE_100M      |
+                         AQ_NIC_RATE_10M,
+};
+
+const struct aq_hw_caps_s hw_atl2_caps_aqc116c = {
+       DEFAULT_BOARD_BASIC_CAPABILITIES,
+       .media_type = AQ_HW_MEDIA_TYPE_TP,
+       .link_speed_msk = AQ_NIC_RATE_1G  |
+                         AQ_NIC_RATE_100M      |
+                         AQ_NIC_RATE_10M,
 };
 
 static u32 hw_atl2_sem_act_rslvr_get(struct aq_hw_s *self)
index de8723f..346f0dc 100644 (file)
@@ -9,6 +9,8 @@
 #include "aq_common.h"
 
 extern const struct aq_hw_caps_s hw_atl2_caps_aqc113;
+extern const struct aq_hw_caps_s hw_atl2_caps_aqc115c;
+extern const struct aq_hw_caps_s hw_atl2_caps_aqc116c;
 extern const struct aq_hw_ops hw_atl2_ops;
 
 #endif /* HW_ATL2_H */
index b66fa34..6bad64c 100644 (file)
@@ -239,7 +239,8 @@ struct version_s {
                u8 minor;
                u16 build;
        } phy;
-       u32 rsvd;
+       u32 drv_iface_ver:4;
+       u32 rsvd:28;
 };
 
 struct link_status_s {
@@ -424,7 +425,7 @@ struct cable_diag_status_s {
        u16 rsvd2;
 };
 
-struct statistics_s {
+struct statistics_a0_s {
        struct {
                u32 link_up;
                u32 link_down;
@@ -457,6 +458,33 @@ struct statistics_s {
        u32 reserve_fw_gap;
 };
 
+struct __packed statistics_b0_s {
+       u64 rx_good_octets;
+       u64 rx_pause_frames;
+       u64 rx_good_frames;
+       u64 rx_errors;
+       u64 rx_unicast_frames;
+       u64 rx_multicast_frames;
+       u64 rx_broadcast_frames;
+
+       u64 tx_good_octets;
+       u64 tx_pause_frames;
+       u64 tx_good_frames;
+       u64 tx_errors;
+       u64 tx_unicast_frames;
+       u64 tx_multicast_frames;
+       u64 tx_broadcast_frames;
+
+       u32 main_loop_cycles;
+};
+
+struct __packed statistics_s {
+       union __packed {
+               struct statistics_a0_s a0;
+               struct statistics_b0_s b0;
+       };
+};
+
 struct filter_caps_s {
        u8 l2_filters_base_index:6;
        u8 flexible_filter_mask:2;
@@ -545,7 +573,7 @@ struct management_status_s {
        u32 rsvd5;
 };
 
-struct fw_interface_out {
+struct __packed fw_interface_out {
        struct transaction_counter_s transaction_id;
        struct version_s version;
        struct link_status_s link_status;
@@ -569,7 +597,6 @@ struct fw_interface_out {
        struct core_dump_s core_dump;
        u32 rsvd11;
        struct statistics_s stats;
-       u32 rsvd12;
        struct filter_caps_s filter_caps;
        struct device_caps_s device_caps;
        u32 rsvd13;
@@ -592,6 +619,9 @@ struct fw_interface_out {
 #define  AQ_HOST_MODE_LOW_POWER    3U
 #define  AQ_HOST_MODE_SHUTDOWN     4U
 
+#define  AQ_A2_FW_INTERFACE_A0     0
+#define  AQ_A2_FW_INTERFACE_B0     1
+
 int hw_atl2_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops);
 
 int hw_atl2_utils_soft_reset(struct aq_hw_s *self);
index dd259c8..58d426d 100644 (file)
@@ -84,7 +84,7 @@ static int hw_atl2_shared_buffer_read_block(struct aq_hw_s *self,
                        if (cnt > AQ_A2_FW_READ_TRY_MAX)
                                return -ETIME;
                        if (tid1.transaction_cnt_a != tid1.transaction_cnt_b)
-                               udelay(1);
+                               mdelay(1);
                } while (tid1.transaction_cnt_a != tid1.transaction_cnt_b);
 
                hw_atl2_mif_shared_buf_read(self, offset, (u32 *)data, dwords);
@@ -154,7 +154,7 @@ static void a2_link_speed_mask2fw(u32 speed,
 {
        link_options->rate_10G = !!(speed & AQ_NIC_RATE_10G);
        link_options->rate_5G = !!(speed & AQ_NIC_RATE_5G);
-       link_options->rate_N5G = !!(speed & AQ_NIC_RATE_5GSR);
+       link_options->rate_N5G = link_options->rate_5G;
        link_options->rate_2P5G = !!(speed & AQ_NIC_RATE_2G5);
        link_options->rate_N2P5G = link_options->rate_2P5G;
        link_options->rate_1G = !!(speed & AQ_NIC_RATE_1G);
@@ -192,8 +192,6 @@ static u32 a2_fw_lkp_to_mask(struct lkp_link_caps_s *lkp_link_caps)
                rate |= AQ_NIC_RATE_10G;
        if (lkp_link_caps->rate_5G)
                rate |= AQ_NIC_RATE_5G;
-       if (lkp_link_caps->rate_N5G)
-               rate |= AQ_NIC_RATE_5GSR;
        if (lkp_link_caps->rate_2P5G)
                rate |= AQ_NIC_RATE_2G5;
        if (lkp_link_caps->rate_1G)
@@ -335,15 +333,22 @@ static int aq_a2_fw_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
        return 0;
 }
 
-static int aq_a2_fw_update_stats(struct aq_hw_s *self)
+static void aq_a2_fill_a0_stats(struct aq_hw_s *self,
+                               struct statistics_s *stats)
 {
        struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
-       struct statistics_s stats;
-
-       hw_atl2_shared_buffer_read_safe(self, stats, &stats);
-
-#define AQ_SDELTA(_N_, _F_) (self->curr_stats._N_ += \
-                       stats.msm._F_ - priv->last_stats.msm._F_)
+       struct aq_stats_s *cs = &self->curr_stats;
+       struct aq_stats_s curr_stats = *cs;
+       bool corrupted_stats = false;
+
+#define AQ_SDELTA(_N, _F)  \
+do { \
+       if (!corrupted_stats && \
+           ((s64)(stats->a0.msm._F - priv->last_stats.a0.msm._F)) >= 0) \
+               curr_stats._N += stats->a0.msm._F - priv->last_stats.a0.msm._F;\
+       else \
+               corrupted_stats = true; \
+} while (0)
 
        if (self->aq_link_status.mbps) {
                AQ_SDELTA(uprc, rx_unicast_frames);
@@ -362,17 +367,76 @@ static int aq_a2_fw_update_stats(struct aq_hw_s *self)
                AQ_SDELTA(mbtc, tx_multicast_octets);
                AQ_SDELTA(bbrc, rx_broadcast_octets);
                AQ_SDELTA(bbtc, tx_broadcast_octets);
+
+               if (!corrupted_stats)
+                       *cs = curr_stats;
        }
 #undef AQ_SDELTA
-       self->curr_stats.dma_pkt_rc =
-               hw_atl_stats_rx_dma_good_pkt_counter_get(self);
-       self->curr_stats.dma_pkt_tc =
-               hw_atl_stats_tx_dma_good_pkt_counter_get(self);
-       self->curr_stats.dma_oct_rc =
-               hw_atl_stats_rx_dma_good_octet_counter_get(self);
-       self->curr_stats.dma_oct_tc =
-               hw_atl_stats_tx_dma_good_octet_counter_get(self);
-       self->curr_stats.dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self);
+
+}
+
+static void aq_a2_fill_b0_stats(struct aq_hw_s *self,
+                               struct statistics_s *stats)
+{
+       struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+       struct aq_stats_s *cs = &self->curr_stats;
+       struct aq_stats_s curr_stats = *cs;
+       bool corrupted_stats = false;
+
+#define AQ_SDELTA(_N, _F)  \
+do { \
+       if (!corrupted_stats && \
+           ((s64)(stats->b0._F - priv->last_stats.b0._F)) >= 0) \
+               curr_stats._N += stats->b0._F - priv->last_stats.b0._F; \
+       else \
+               corrupted_stats = true; \
+} while (0)
+
+       if (self->aq_link_status.mbps) {
+               AQ_SDELTA(uprc, rx_unicast_frames);
+               AQ_SDELTA(mprc, rx_multicast_frames);
+               AQ_SDELTA(bprc, rx_broadcast_frames);
+               AQ_SDELTA(erpr, rx_errors);
+               AQ_SDELTA(brc, rx_good_octets);
+
+               AQ_SDELTA(uptc, tx_unicast_frames);
+               AQ_SDELTA(mptc, tx_multicast_frames);
+               AQ_SDELTA(bptc, tx_broadcast_frames);
+               AQ_SDELTA(erpt, tx_errors);
+               AQ_SDELTA(btc, tx_good_octets);
+
+               if (!corrupted_stats)
+                       *cs = curr_stats;
+       }
+#undef AQ_SDELTA
+}
+
+static int aq_a2_fw_update_stats(struct aq_hw_s *self)
+{
+       struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+       struct aq_stats_s *cs = &self->curr_stats;
+       struct statistics_s stats;
+       struct version_s version;
+       int err;
+
+       err = hw_atl2_shared_buffer_read_safe(self, version, &version);
+       if (err)
+               return err;
+
+       err = hw_atl2_shared_buffer_read_safe(self, stats, &stats);
+       if (err)
+               return err;
+
+       if (version.drv_iface_ver == AQ_A2_FW_INTERFACE_A0)
+               aq_a2_fill_a0_stats(self, &stats);
+       else
+               aq_a2_fill_b0_stats(self, &stats);
+
+       cs->dma_pkt_rc = hw_atl_stats_rx_dma_good_pkt_counter_get(self);
+       cs->dma_pkt_tc = hw_atl_stats_tx_dma_good_pkt_counter_get(self);
+       cs->dma_oct_rc = hw_atl_stats_rx_dma_good_octet_counter_get(self);
+       cs->dma_oct_tc = hw_atl_stats_tx_dma_good_octet_counter_get(self);
+       cs->dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self);
 
        memcpy(&priv->last_stats, &stats, sizeof(stats));
 
@@ -499,9 +563,9 @@ u32 hw_atl2_utils_get_fw_version(struct aq_hw_s *self)
        hw_atl2_shared_buffer_read_safe(self, version, &version);
 
        /* A2 FW version is stored in reverse order */
-       return version.mac.major << 24 |
-              version.mac.minor << 16 |
-              version.mac.build;
+       return version.bundle.major << 24 |
+              version.bundle.minor << 16 |
+              version.bundle.build;
 }
 
 int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self,
index 94df4f9..0710e71 100644 (file)
@@ -34,7 +34,7 @@ int axspi_read_status(struct axspi_data *ax_spi, struct spi_status *status)
 
        /* OP */
        ax_spi->cmd_buf[0] = AX_SPICMD_READ_STATUS;
-       ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)&status, 3);
+       ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)status, 3);
        if (ret)
                dev_err(&ax_spi->spi->dev, "%s() failed: ret = %d\n", __func__, ret);
        else
index 64479c4..ae9cca7 100644 (file)
@@ -3196,6 +3196,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
        }
        if (adapter->registered_device_map == 0) {
                dev_err(&pdev->dev, "could not register any net devices\n");
+               err = -EINVAL;
                goto err_disable_interrupts;
        }
 
index 6451c83..8e64356 100644 (file)
@@ -4550,6 +4550,8 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 
        fsl_mc_portal_free(priv->mc_io);
 
+       destroy_workqueue(priv->dpaa2_ptp_wq);
+
        dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name);
 
        free_netdev(net_dev);
index 67364ab..081295b 100644 (file)
@@ -1081,7 +1081,8 @@ static void hns3_dump_page_pool_info(struct hns3_enet_ring *ring,
        u32 j = 0;
 
        sprintf(result[j++], "%u", index);
-       sprintf(result[j++], "%u", ring->page_pool->pages_state_hold_cnt);
+       sprintf(result[j++], "%u",
+               READ_ONCE(ring->page_pool->pages_state_hold_cnt));
        sprintf(result[j++], "%u",
                atomic_read(&ring->page_pool->pages_state_release_cnt));
        sprintf(result[j++], "%u", ring->page_pool->p.pool_size);
@@ -1106,6 +1107,11 @@ hns3_dbg_page_pool_info(struct hnae3_handle *h, char *buf, int len)
                return -EFAULT;
        }
 
+       if (!priv->ring[h->kinfo.num_tqps].page_pool) {
+               dev_err(&h->pdev->dev, "page pool is not initialized\n");
+               return -EFAULT;
+       }
+
        for (i = 0; i < ARRAY_SIZE(page_pool_info_items); i++)
                result[i] = &data_str[i][0];
 
index c8442b8..c9b4568 100644 (file)
@@ -987,6 +987,7 @@ static int hns3_set_reset(struct net_device *netdev, u32 *flags)
        struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
        const struct hnae3_ae_ops *ops = h->ae_algo->ops;
        const struct hns3_reset_type_map *rst_type_map;
+       enum ethtool_reset_flags rst_flags;
        u32 i, size;
 
        if (ops->ae_dev_resetting && ops->ae_dev_resetting(h))
@@ -1006,6 +1007,7 @@ static int hns3_set_reset(struct net_device *netdev, u32 *flags)
        for (i = 0; i < size; i++) {
                if (rst_type_map[i].rst_flags == *flags) {
                        rst_type = rst_type_map[i].rst_type;
+                       rst_flags = rst_type_map[i].rst_flags;
                        break;
                }
        }
@@ -1021,6 +1023,8 @@ static int hns3_set_reset(struct net_device *netdev, u32 *flags)
 
        ops->reset_event(h->pdev, h);
 
+       *flags &= ~rst_flags;
+
        return 0;
 }
 
index 25c419d..41afaee 100644 (file)
@@ -703,9 +703,9 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev,  u16 rss_size)
        roundup_size = ilog2(roundup_size);
 
        for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) {
-               tc_valid[i] = !!(hdev->hw_tc_map & BIT(i));
+               tc_valid[i] = 1;
                tc_size[i] = roundup_size;
-               tc_offset[i] = rss_size * i;
+               tc_offset[i] = (hdev->hw_tc_map & BIT(i)) ? rss_size * i : 0;
        }
 
        hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false);
index 3cca517..0bb3911 100644 (file)
@@ -628,17 +628,9 @@ static bool reuse_rx_pools(struct ibmvnic_adapter *adapter)
        old_buff_size = adapter->prev_rx_buf_sz;
        new_buff_size = adapter->cur_rx_buf_sz;
 
-       /* Require buff size to be exactly same for now */
-       if (old_buff_size != new_buff_size)
-               return false;
-
-       if (old_num_pools == new_num_pools && old_pool_size == new_pool_size)
-               return true;
-
-       if (old_num_pools < adapter->min_rx_queues ||
-           old_num_pools > adapter->max_rx_queues ||
-           old_pool_size < adapter->min_rx_add_entries_per_subcrq ||
-           old_pool_size > adapter->max_rx_add_entries_per_subcrq)
+       if (old_buff_size != new_buff_size ||
+           old_num_pools != new_num_pools ||
+           old_pool_size != new_pool_size)
                return false;
 
        return true;
@@ -874,17 +866,9 @@ static bool reuse_tx_pools(struct ibmvnic_adapter *adapter)
        old_mtu = adapter->prev_mtu;
        new_mtu = adapter->req_mtu;
 
-       /* Require MTU to be exactly same to reuse pools for now */
-       if (old_mtu != new_mtu)
-               return false;
-
-       if (old_num_pools == new_num_pools && old_pool_size == new_pool_size)
-               return true;
-
-       if (old_num_pools < adapter->min_tx_queues ||
-           old_num_pools > adapter->max_tx_queues ||
-           old_pool_size < adapter->min_tx_entries_per_subcrq ||
-           old_pool_size > adapter->max_tx_entries_per_subcrq)
+       if (old_mtu != new_mtu ||
+           old_num_pools != new_num_pools ||
+           old_pool_size != new_pool_size)
                return false;
 
        return true;
index 75635bd..3789269 100644 (file)
@@ -305,6 +305,7 @@ struct iavf_adapter {
 #define IAVF_FLAG_AQ_DEL_FDIR_FILTER           BIT(26)
 #define IAVF_FLAG_AQ_ADD_ADV_RSS_CFG           BIT(27)
 #define IAVF_FLAG_AQ_DEL_ADV_RSS_CFG           BIT(28)
+#define IAVF_FLAG_AQ_REQUEST_STATS             BIT(29)
 
        /* OS defined structs */
        struct net_device *netdev;
@@ -444,6 +445,7 @@ int iavf_up(struct iavf_adapter *adapter);
 void iavf_down(struct iavf_adapter *adapter);
 int iavf_process_config(struct iavf_adapter *adapter);
 void iavf_schedule_reset(struct iavf_adapter *adapter);
+void iavf_schedule_request_stats(struct iavf_adapter *adapter);
 void iavf_reset(struct iavf_adapter *adapter);
 void iavf_set_ethtool_ops(struct net_device *netdev);
 void iavf_update_stats(struct iavf_adapter *adapter);
@@ -501,4 +503,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
 void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
 struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
                                        const u8 *macaddr);
+int iavf_lock_timeout(struct mutex *lock, unsigned int msecs);
 #endif /* _IAVF_H_ */
index 144a776..0cecaff 100644 (file)
@@ -354,6 +354,9 @@ static void iavf_get_ethtool_stats(struct net_device *netdev,
        struct iavf_adapter *adapter = netdev_priv(netdev);
        unsigned int i;
 
+       /* Explicitly request stats refresh */
+       iavf_schedule_request_stats(adapter);
+
        iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats);
 
        rcu_read_lock();
@@ -723,12 +726,31 @@ static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
  *
  * Change the ITR settings for a specific queue.
  **/
-static void iavf_set_itr_per_queue(struct iavf_adapter *adapter,
-                                  struct ethtool_coalesce *ec, int queue)
+static int iavf_set_itr_per_queue(struct iavf_adapter *adapter,
+                                 struct ethtool_coalesce *ec, int queue)
 {
        struct iavf_ring *rx_ring = &adapter->rx_rings[queue];
        struct iavf_ring *tx_ring = &adapter->tx_rings[queue];
        struct iavf_q_vector *q_vector;
+       u16 itr_setting;
+
+       itr_setting = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+       if (ec->rx_coalesce_usecs != itr_setting &&
+           ec->use_adaptive_rx_coalesce) {
+               netif_info(adapter, drv, adapter->netdev,
+                          "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n");
+               return -EINVAL;
+       }
+
+       itr_setting = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+       if (ec->tx_coalesce_usecs != itr_setting &&
+           ec->use_adaptive_tx_coalesce) {
+               netif_info(adapter, drv, adapter->netdev,
+                          "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n");
+               return -EINVAL;
+       }
 
        rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
        tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
@@ -751,6 +773,7 @@ static void iavf_set_itr_per_queue(struct iavf_adapter *adapter,
         * the Tx and Rx ITR values based on the values we have entered
         * into the q_vector, no need to write the values now.
         */
+       return 0;
 }
 
 /**
@@ -792,9 +815,11 @@ static int __iavf_set_coalesce(struct net_device *netdev,
         */
        if (queue < 0) {
                for (i = 0; i < adapter->num_active_queues; i++)
-                       iavf_set_itr_per_queue(adapter, ec, i);
+                       if (iavf_set_itr_per_queue(adapter, ec, i))
+                               return -EINVAL;
        } else if (queue < adapter->num_active_queues) {
-               iavf_set_itr_per_queue(adapter, ec, queue);
+               if (iavf_set_itr_per_queue(adapter, ec, queue))
+                       return -EINVAL;
        } else {
                netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
                           adapter->num_active_queues - 1);
index 336e6bf..14934a7 100644 (file)
@@ -147,7 +147,7 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
  *
  * Returns 0 on success, negative on failure
  **/
-static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
 {
        unsigned int wait, delay = 10;
 
@@ -174,6 +174,19 @@ void iavf_schedule_reset(struct iavf_adapter *adapter)
        }
 }
 
+/**
+ * iavf_schedule_request_stats - Set the flags and schedule statistics request
+ * @adapter: board private structure
+ *
+ * Sets IAVF_FLAG_AQ_REQUEST_STATS flag so iavf_watchdog_task() will explicitly
+ * request and refresh ethtool stats
+ **/
+void iavf_schedule_request_stats(struct iavf_adapter *adapter)
+{
+       adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS;
+       mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+}
+
 /**
  * iavf_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
@@ -704,13 +717,11 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, u16 vlan)
  **/
 static void iavf_restore_filters(struct iavf_adapter *adapter)
 {
-       /* re-add all VLAN filters */
-       if (VLAN_ALLOWED(adapter)) {
-               u16 vid;
+       u16 vid;
 
-               for_each_set_bit(vid, adapter->vsi.active_vlans, VLAN_N_VID)
-                       iavf_add_vlan(adapter, vid);
-       }
+       /* re-add all VLAN filters */
+       for_each_set_bit(vid, adapter->vsi.active_vlans, VLAN_N_VID)
+               iavf_add_vlan(adapter, vid);
 }
 
 /**
@@ -745,9 +756,6 @@ static int iavf_vlan_rx_kill_vid(struct net_device *netdev,
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
 
-       if (!VLAN_ALLOWED(adapter))
-               return -EIO;
-
        iavf_del_vlan(adapter, vid);
        clear_bit(vid, adapter->vsi.active_vlans);
 
@@ -1709,6 +1717,11 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
                iavf_del_adv_rss_cfg(adapter);
                return 0;
        }
+       if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_STATS) {
+               iavf_request_stats(adapter);
+               return 0;
+       }
+
        return -EAGAIN;
 }
 
@@ -2173,7 +2186,6 @@ static void iavf_reset_task(struct work_struct *work)
        struct net_device *netdev = adapter->netdev;
        struct iavf_hw *hw = &adapter->hw;
        struct iavf_mac_filter *f, *ftmp;
-       struct iavf_vlan_filter *vlf;
        struct iavf_cloud_filter *cf;
        u32 reg_val;
        int i = 0, err;
@@ -2254,6 +2266,7 @@ continue_reset:
                   (adapter->state == __IAVF_RESETTING));
 
        if (running) {
+               netdev->flags &= ~IFF_UP;
                netif_carrier_off(netdev);
                netif_tx_stop_all_queues(netdev);
                adapter->link_up = false;
@@ -2313,11 +2326,6 @@ continue_reset:
        list_for_each_entry(f, &adapter->mac_filter_list, list) {
                f->add = true;
        }
-       /* re-add all VLAN filters */
-       list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
-               vlf->add = true;
-       }
-
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
        /* check if TCs are running and re-add all cloud filters */
@@ -2331,7 +2339,6 @@ continue_reset:
        spin_unlock_bh(&adapter->cloud_filter_list_lock);
 
        adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
-       adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
        adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
        iavf_misc_irq_enable(adapter);
 
@@ -2365,7 +2372,7 @@ continue_reset:
                 * to __IAVF_RUNNING
                 */
                iavf_up_complete(adapter);
-
+               netdev->flags |= IFF_UP;
                iavf_irq_enable(adapter, true);
        } else {
                iavf_change_state(adapter, __IAVF_DOWN);
@@ -2378,8 +2385,10 @@ continue_reset:
 reset_err:
        mutex_unlock(&adapter->client_lock);
        mutex_unlock(&adapter->crit_lock);
-       if (running)
+       if (running) {
                iavf_change_state(adapter, __IAVF_RUNNING);
+               netdev->flags |= IFF_UP;
+       }
        dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
        iavf_close(netdev);
 }
@@ -3441,11 +3450,16 @@ static int iavf_set_features(struct net_device *netdev,
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
 
-       /* Don't allow changing VLAN_RX flag when adapter is not capable
-        * of VLAN offload
+       /* Don't allow enabling VLAN features when adapter is not capable
+        * of VLAN offload/filtering
         */
        if (!VLAN_ALLOWED(adapter)) {
-               if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX)
+               netdev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_RX |
+                                        NETIF_F_HW_VLAN_CTAG_TX |
+                                        NETIF_F_HW_VLAN_CTAG_FILTER);
+               if (features & (NETIF_F_HW_VLAN_CTAG_RX |
+                               NETIF_F_HW_VLAN_CTAG_TX |
+                               NETIF_F_HW_VLAN_CTAG_FILTER))
                        return -EINVAL;
        } else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) {
                if (features & NETIF_F_HW_VLAN_CTAG_RX)
index 8c3f0f7..d60bf7c 100644 (file)
@@ -607,7 +607,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
                if (f->add)
                        count++;
        }
-       if (!count) {
+       if (!count || !VLAN_ALLOWED(adapter)) {
                adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER;
                spin_unlock_bh(&adapter->mac_vlan_list_lock);
                return;
@@ -673,9 +673,19 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
 
        spin_lock_bh(&adapter->mac_vlan_list_lock);
 
-       list_for_each_entry(f, &adapter->vlan_filter_list, list) {
-               if (f->remove)
+       list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+               /* since VLAN capabilities are not allowed, we dont want to send
+                * a VLAN delete request because it will most likely fail and
+                * create unnecessary errors/noise, so just free the VLAN
+                * filters marked for removal to enable bailing out before
+                * sending a virtchnl message
+                */
+               if (f->remove && !VLAN_ALLOWED(adapter)) {
+                       list_del(&f->list);
+                       kfree(f);
+               } else if (f->remove) {
                        count++;
+               }
        }
        if (!count) {
                adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER;
@@ -784,6 +794,8 @@ void iavf_request_stats(struct iavf_adapter *adapter)
                /* no error message, this isn't crucial */
                return;
        }
+
+       adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_STATS;
        adapter->current_op = VIRTCHNL_OP_GET_STATS;
        vqs.vsi_id = adapter->vsi_res->vsi_id;
        /* queue maps are ignored for this message - only the vsi is used */
@@ -1722,8 +1734,37 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
                }
                spin_lock_bh(&adapter->mac_vlan_list_lock);
                iavf_add_filter(adapter, adapter->hw.mac.addr);
+
+               if (VLAN_ALLOWED(adapter)) {
+                       if (!list_empty(&adapter->vlan_filter_list)) {
+                               struct iavf_vlan_filter *vlf;
+
+                               /* re-add all VLAN filters over virtchnl */
+                               list_for_each_entry(vlf,
+                                                   &adapter->vlan_filter_list,
+                                                   list)
+                                       vlf->add = true;
+
+                               adapter->aq_required |=
+                                       IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+                       }
+               }
+
                spin_unlock_bh(&adapter->mac_vlan_list_lock);
                iavf_process_config(adapter);
+
+               /* unlock crit_lock before acquiring rtnl_lock as other
+                * processes holding rtnl_lock could be waiting for the same
+                * crit_lock
+                */
+               mutex_unlock(&adapter->crit_lock);
+               rtnl_lock();
+               netdev_update_features(adapter->netdev);
+               rtnl_unlock();
+               if (iavf_lock_timeout(&adapter->crit_lock, 10000))
+                       dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n",
+                                __FUNCTION__);
+
                }
                break;
        case VIRTCHNL_OP_ENABLE_QUEUES:
index 4056260..09a3297 100644 (file)
@@ -89,8 +89,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
        if (!vsi->rx_rings)
                goto err_rings;
 
-       /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
-       vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq),
+       /* txq_map needs to have enough space to track both Tx (stack) rings
+        * and XDP rings; at this point vsi->num_xdp_txq might not be set,
+        * so use num_possible_cpus() as we want to always provide XDP ring
+        * per CPU, regardless of queue count settings from user that might
+        * have come from ethtool's set_channels() callback;
+        */
+       vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()),
                                    sizeof(*vsi->txq_map), GFP_KERNEL);
 
        if (!vsi->txq_map)
index f099797..4d1fc48 100644 (file)
@@ -2609,7 +2609,18 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
                        ice_stat_str(status));
                goto clear_xdp_rings;
        }
-       ice_vsi_assign_bpf_prog(vsi, prog);
+
+       /* assign the prog only when it's not already present on VSI;
+        * this flow is a subject of both ethtool -L and ndo_bpf flows;
+        * VSI rebuild that happens under ethtool -L can expose us to
+        * the bpf_prog refcount issues as we would be swapping same
+        * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
+        * on it as it would be treated as an 'old_prog'; for ndo_bpf
+        * this is not harmful as dev_xdp_install bumps the refcount
+        * before calling the op exposed by the driver;
+        */
+       if (!ice_is_xdp_ena_vsi(vsi))
+               ice_vsi_assign_bpf_prog(vsi, prog);
 
        return 0;
 clear_xdp_rings:
@@ -2785,6 +2796,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
                if (xdp_ring_err)
                        NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
        } else {
+               /* safe to call even when prog == vsi->xdp_prog as
+                * dev_xdp_install in net/core/dev.c incremented prog's
+                * refcount so corresponding bpf_prog_put won't cause
+                * underflow
+                */
                ice_vsi_assign_bpf_prog(vsi, prog);
        }
 
index ff55cb4..bb9a808 100644 (file)
@@ -383,6 +383,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
        while (i--) {
                dma = xsk_buff_xdp_get_dma(*xdp);
                rx_desc->read.pkt_addr = cpu_to_le64(dma);
+               rx_desc->wb.status_error0 = 0;
 
                rx_desc++;
                xdp++;
index 836be0d..fd54d3e 100644 (file)
@@ -8026,7 +8026,7 @@ static int igb_poll(struct napi_struct *napi, int budget)
        if (likely(napi_complete_done(napi, work_done)))
                igb_ring_irq_enable(q_vector);
 
-       return min(work_done, budget - 1);
+       return work_done;
 }
 
 /**
index 2b18d89..6480696 100644 (file)
@@ -5017,11 +5017,13 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu)
                mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
        }
 
+       if (port->xdp_prog && mtu > MVPP2_MAX_RX_BUF_SIZE) {
+               netdev_err(dev, "Illegal MTU value %d (> %d) for XDP mode\n",
+                          mtu, (int)MVPP2_MAX_RX_BUF_SIZE);
+               return -EINVAL;
+       }
+
        if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) {
-               if (port->xdp_prog) {
-                       netdev_err(dev, "Jumbo frames are not supported with XDP\n");
-                       return -EINVAL;
-               }
                if (priv->percpu_pools) {
                        netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu);
                        mvpp2_bm_switch_buffers(priv, false);
@@ -5307,8 +5309,8 @@ static int mvpp2_xdp_setup(struct mvpp2_port *port, struct netdev_bpf *bpf)
        bool running = netif_running(port->dev);
        bool reset = !prog != !port->xdp_prog;
 
-       if (port->dev->mtu > ETH_DATA_LEN) {
-               NL_SET_ERR_MSG_MOD(bpf->extack, "XDP is not supported with jumbo frames enabled");
+       if (port->dev->mtu > MVPP2_MAX_RX_BUF_SIZE) {
+               NL_SET_ERR_MSG_MOD(bpf->extack, "MTU too large for XDP");
                return -EOPNOTSUPP;
        }
 
@@ -7456,7 +7458,7 @@ static int mvpp2_probe(struct platform_device *pdev)
 
        shared = num_present_cpus() - priv->nthreads;
        if (shared > 0)
-               bitmap_fill(&priv->lock_map,
+               bitmap_set(&priv->lock_map, 0,
                            min_t(int, shared, MVPP2_MAX_THREADS));
 
        for (i = 0; i < MVPP2_MAX_THREADS; i++) {
index cb56e17..3ca6b94 100644 (file)
@@ -2341,7 +2341,7 @@ static int rvu_mbox_init(struct rvu *rvu, struct mbox_wq_info *mw,
                        goto free_regions;
                break;
        default:
-               return err;
+               goto free_regions;
        }
 
        mw->mbox_wq = alloc_workqueue(name,
index 3ce6ccd..b4599fe 100644 (file)
@@ -497,8 +497,8 @@ int prestera_bridge_port_join(struct net_device *br_dev,
 
        br_port = prestera_bridge_port_add(bridge, port->dev);
        if (IS_ERR(br_port)) {
-               err = PTR_ERR(br_port);
-               goto err_brport_create;
+               prestera_bridge_put(bridge);
+               return PTR_ERR(br_port);
        }
 
        err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL,
@@ -519,8 +519,6 @@ err_port_join:
        switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL);
 err_switchdev_offload:
        prestera_bridge_port_put(br_port);
-err_brport_create:
-       prestera_bridge_put(bridge);
        return err;
 }
 
@@ -1124,7 +1122,7 @@ static int prestera_switchdev_blk_event(struct notifier_block *unused,
                                                     prestera_port_obj_attr_set);
                break;
        default:
-               err = -EOPNOTSUPP;
+               return NOTIFY_DONE;
        }
 
        return notifier_from_errno(err);
index 066d79e..10238be 100644 (file)
@@ -670,7 +670,7 @@ void __init mlx4_en_init_ptys2ethtool_map(void)
        MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_T, SPEED_1000,
                                       ETHTOOL_LINK_MODE_1000baseT_Full_BIT);
        MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_CX_SGMII, SPEED_1000,
-                                      ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
+                                      ETHTOOL_LINK_MODE_1000baseX_Full_BIT);
        MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_KX, SPEED_1000,
                                       ETHTOOL_LINK_MODE_1000baseKX_Full_BIT);
        MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_T, SPEED_10000,
@@ -682,9 +682,9 @@ void __init mlx4_en_init_ptys2ethtool_map(void)
        MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KR, SPEED_10000,
                                       ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
        MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CR, SPEED_10000,
-                                      ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+                                      ETHTOOL_LINK_MODE_10000baseCR_Full_BIT);
        MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_SR, SPEED_10000,
-                                      ETHTOOL_LINK_MODE_10000baseKR_Full_BIT);
+                                      ETHTOOL_LINK_MODE_10000baseSR_Full_BIT);
        MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_20GBASE_KR2, SPEED_20000,
                                       ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT,
                                       ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT);
index 3f6d5c3..f1c10f2 100644 (file)
@@ -2286,9 +2286,14 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
                                bool carry_xdp_prog)
 {
        struct bpf_prog *xdp_prog;
-       int i, t;
+       int i, t, ret;
 
-       mlx4_en_copy_priv(tmp, priv, prof);
+       ret = mlx4_en_copy_priv(tmp, priv, prof);
+       if (ret) {
+               en_warn(priv, "%s: mlx4_en_copy_priv() failed, return\n",
+                       __func__);
+               return ret;
+       }
 
        if (mlx4_en_alloc_resources(tmp)) {
                en_warn(priv,
index 8eaa24d..a46284c 100644 (file)
@@ -341,6 +341,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_DEALLOC_SF:
        case MLX5_CMD_OP_DESTROY_UCTX:
        case MLX5_CMD_OP_DESTROY_UMEM:
+       case MLX5_CMD_OP_MODIFY_RQT:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -446,7 +447,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_MODIFY_TIS:
        case MLX5_CMD_OP_QUERY_TIS:
        case MLX5_CMD_OP_CREATE_RQT:
-       case MLX5_CMD_OP_MODIFY_RQT:
        case MLX5_CMD_OP_QUERY_RQT:
 
        case MLX5_CMD_OP_CREATE_FLOW_TABLE:
index 1429538..0015a81 100644 (file)
@@ -13,6 +13,9 @@ struct mlx5e_rx_res {
        unsigned int max_nch;
        u32 drop_rqn;
 
+       struct mlx5e_packet_merge_param pkt_merge_param;
+       struct rw_semaphore pkt_merge_param_sem;
+
        struct mlx5e_rss *rss[MLX5E_MAX_NUM_RSS];
        bool rss_active;
        u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
@@ -392,6 +395,7 @@ static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
        if (err)
                goto out;
 
+       /* Separated from the channels RQs, does not share pkt_merge state with them */
        mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
                                    mlx5e_rqt_get_rqtn(&res->ptp.rqt),
                                    inner_ft_support);
@@ -447,6 +451,9 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
        res->max_nch = max_nch;
        res->drop_rqn = drop_rqn;
 
+       res->pkt_merge_param = *init_pkt_merge_param;
+       init_rwsem(&res->pkt_merge_param_sem);
+
        err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch);
        if (err)
                goto err_out;
@@ -513,7 +520,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
        return mlx5e_tir_get_tirn(&res->ptp.tir);
 }
 
-u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
 {
        return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
 }
@@ -656,6 +663,9 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
        if (!builder)
                return -ENOMEM;
 
+       down_write(&res->pkt_merge_param_sem);
+       res->pkt_merge_param = *pkt_merge_param;
+
        mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
 
        final_err = 0;
@@ -681,6 +691,7 @@ int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
                }
        }
 
+       up_write(&res->pkt_merge_param_sem);
        mlx5e_tir_builder_free(builder);
        return final_err;
 }
@@ -689,3 +700,31 @@ struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *
 {
        return mlx5e_rss_get_hash(res->rss[0]);
 }
+
+int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
+                               struct mlx5e_tir *tir)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_tir_builder *builder;
+       u32 rqtn;
+       int err;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       rqtn = mlx5e_rx_res_get_rqtn_direct(res, rxq);
+
+       mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, rqtn,
+                                   inner_ft_support);
+       mlx5e_tir_builder_build_direct(builder);
+       mlx5e_tir_builder_build_tls(builder);
+       down_read(&res->pkt_merge_param_sem);
+       mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param);
+       err = mlx5e_tir_init(tir, builder, res->mdev, false);
+       up_read(&res->pkt_merge_param_sem);
+
+       mlx5e_tir_builder_free(builder);
+
+       return err;
+}
index d09f7d1..b39b20a 100644 (file)
@@ -37,9 +37,6 @@ u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types
 u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
 u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
 
-/* RQTN getters for modules that create their own TIRs */
-u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix);
-
 /* Activate/deactivate API */
 void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
 void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
@@ -69,4 +66,7 @@ struct mlx5e_rss *mlx5e_rx_res_rss_get(struct mlx5e_rx_res *res, u32 rss_idx);
 /* Workaround for hairpin */
 struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
 
+/* Accel TIRs */
+int mlx5e_rx_res_tls_tir_create(struct mlx5e_rx_res *res, unsigned int rxq,
+                               struct mlx5e_tir *tir);
 #endif /* __MLX5_EN_RX_RES_H__ */
index fb53973..2db9573 100644 (file)
@@ -191,7 +191,7 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb,
                        eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2;
                        eseg->swp_inner_l4_offset =
                                (skb->csum_start + skb->head - skb->data) / 2;
-                       if (skb->protocol == htons(ETH_P_IPV6))
+                       if (inner_ip_hdr(skb)->version == 6)
                                eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6;
                        break;
                default:
index a2a9f68..1571181 100644 (file)
@@ -100,25 +100,6 @@ mlx5e_ktls_rx_resync_create_resp_list(void)
        return resp_list;
 }
 
-static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 rqtn)
-{
-       struct mlx5e_tir_builder *builder;
-       int err;
-
-       builder = mlx5e_tir_builder_alloc(false);
-       if (!builder)
-               return -ENOMEM;
-
-       mlx5e_tir_builder_build_rqt(builder, mdev->mlx5e_res.hw_objs.td.tdn, rqtn, false);
-       mlx5e_tir_builder_build_direct(builder);
-       mlx5e_tir_builder_build_tls(builder);
-       err = mlx5e_tir_init(tir, builder, mdev, false);
-
-       mlx5e_tir_builder_free(builder);
-
-       return err;
-}
-
 static void accel_rule_handle_work(struct work_struct *work)
 {
        struct mlx5e_ktls_offload_context_rx *priv_rx;
@@ -609,7 +590,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
        struct mlx5_core_dev *mdev;
        struct mlx5e_priv *priv;
        int rxq, err;
-       u32 rqtn;
 
        tls_ctx = tls_get_ctx(sk);
        priv = netdev_priv(netdev);
@@ -635,9 +615,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
        priv_rx->sw_stats = &priv->tls->sw_stats;
        mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
 
-       rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq);
-
-       err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn);
+       err = mlx5e_rx_res_tls_tir_create(priv->rx_res, rxq, &priv_rx->tir);
        if (err)
                goto err_create_tir;
 
index e58a9ec..48895d7 100644 (file)
@@ -1080,6 +1080,10 @@ static mlx5e_stats_grp_t mlx5e_ul_rep_stats_grps[] = {
        &MLX5E_STATS_GRP(pme),
        &MLX5E_STATS_GRP(channels),
        &MLX5E_STATS_GRP(per_port_buff_congest),
+#ifdef CONFIG_MLX5_EN_IPSEC
+       &MLX5E_STATS_GRP(ipsec_sw),
+       &MLX5E_STATS_GRP(ipsec_hw),
+#endif
 };
 
 static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv)
index 96967b0..793511d 100644 (file)
@@ -543,13 +543,13 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
                                     u16 klm_entries, u16 index)
 {
        struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
-       u16 entries, pi, i, header_offset, err, wqe_bbs, new_entries;
+       u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
        u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey;
        struct page *page = shampo->last_page;
        u64 addr = shampo->last_addr;
        struct mlx5e_dma_info *dma_info;
        struct mlx5e_umr_wqe *umr_wqe;
-       int headroom;
+       int headroom, i;
 
        headroom = rq->buff.headroom;
        new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1));
@@ -601,9 +601,7 @@ update_klm:
 
 err_unmap:
        while (--i >= 0) {
-               if (--index < 0)
-                       index = shampo->hd_per_wq - 1;
-               dma_info = &shampo->info[index];
+               dma_info = &shampo->info[--index];
                if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
                        dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
                        mlx5e_page_release(rq, dma_info, true);
index c6cc67c..d377ddc 100644 (file)
@@ -130,7 +130,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
        /* If vports min rate divider is 0 but their group has bw_share configured, then
         * need to set bw_share for vports to minimal value.
         */
-       if (!group_level && !max_guarantee && group->bw_share)
+       if (!group_level && !max_guarantee && group && group->bw_share)
                return 1;
        return 0;
 }
@@ -423,7 +423,7 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
                return err;
 
        /* Recalculate bw share weights of old and new groups */
-       if (vport->qos.bw_share) {
+       if (vport->qos.bw_share || new_group->bw_share) {
                esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
                esw_qos_normalize_vports_min_rate(esw, new_group, extack);
        }
index a464556..32bc08a 100644 (file)
@@ -329,14 +329,25 @@ static bool
 esw_is_indir_table(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
 {
        struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+       bool result = false;
        int i;
 
-       for (i = esw_attr->split_count; i < esw_attr->out_count; i++)
+       /* Indirect table is supported only for flows with in_port uplink
+        * and the destination is vport on the same eswitch as the uplink,
+        * return false in case at least one of destinations doesn't meet
+        * this criteria.
+        */
+       for (i = esw_attr->split_count; i < esw_attr->out_count; i++) {
                if (esw_attr->dests[i].rep &&
                    mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport,
-                                               esw_attr->dests[i].mdev))
-                       return true;
-       return false;
+                                               esw_attr->dests[i].mdev)) {
+                       result = true;
+               } else {
+                       result = false;
+                       break;
+               }
+       }
+       return result;
 }
 
 static int
@@ -2512,6 +2523,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
        struct mlx5_eswitch *esw = master->priv.eswitch;
        struct mlx5_flow_table_attr ft_attr = {
                .max_fte = 1, .prio = 0, .level = 0,
+               .flags = MLX5_FLOW_TABLE_OTHER_VPORT,
        };
        struct mlx5_flow_namespace *egress_ns;
        struct mlx5_flow_table *acl;
index 64f1abc..3ca9988 100644 (file)
@@ -835,6 +835,9 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 
        health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
        add_timer(&health->timer);
+
+       if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
+               queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
 }
 
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
@@ -902,8 +905,6 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
        INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
        INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
        INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update);
-       if (mlx5_core_is_pf(dev))
-               queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
 
        return 0;
 
index ad63dd4..a6592f9 100644 (file)
@@ -608,4 +608,5 @@ void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev)
        if (port_sel->tunnel)
                mlx5_destroy_ttc_table(port_sel->inner.ttc);
        mlx5_lag_destroy_definers(ldev);
+       memset(port_sel, 0, sizeof(*port_sel));
 }
index 0dd96a6..c1df0d3 100644 (file)
@@ -31,11 +31,11 @@ static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_type
        dev->timeouts->to[type] = val;
 }
 
-static void tout_set_def_val(struct mlx5_core_dev *dev)
+void mlx5_tout_set_def_val(struct mlx5_core_dev *dev)
 {
        int i;
 
-       for (i = MLX5_TO_FW_PRE_INIT_TIMEOUT_MS; i < MAX_TIMEOUT_TYPES; i++)
+       for (i = 0; i < MAX_TIMEOUT_TYPES; i++)
                tout_set(dev, tout_def_sw_val[i], i);
 }
 
@@ -45,7 +45,6 @@ int mlx5_tout_init(struct mlx5_core_dev *dev)
        if (!dev->timeouts)
                return -ENOMEM;
 
-       tout_set_def_val(dev);
        return 0;
 }
 
index 31faa5c..1c42ead 100644 (file)
@@ -34,6 +34,7 @@ int mlx5_tout_init(struct mlx5_core_dev *dev);
 void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
 void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
 int mlx5_tout_query_dtor(struct mlx5_core_dev *dev);
+void mlx5_tout_set_def_val(struct mlx5_core_dev *dev);
 u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
 
 #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)
index a92a92a..7df9c7f 100644 (file)
@@ -992,11 +992,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
        if (mlx5_core_is_pf(dev))
                pcie_print_link_status(dev->pdev);
 
-       err = mlx5_tout_init(dev);
-       if (err) {
-               mlx5_core_err(dev, "Failed initializing timeouts, aborting\n");
-               return err;
-       }
+       mlx5_tout_set_def_val(dev);
 
        /* wait for firmware to accept initialization segments configurations
         */
@@ -1005,13 +1001,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
        if (err) {
                mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
                              mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
-               goto err_tout_cleanup;
+               return err;
        }
 
        err = mlx5_cmd_init(dev);
        if (err) {
                mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
-               goto err_tout_cleanup;
+               return err;
        }
 
        mlx5_tout_query_iseg(dev);
@@ -1075,18 +1071,16 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
 
        mlx5_set_driver_version(dev);
 
-       mlx5_start_health_poll(dev);
-
        err = mlx5_query_hca_caps(dev);
        if (err) {
                mlx5_core_err(dev, "query hca failed\n");
-               goto stop_health;
+               goto reclaim_boot_pages;
        }
 
+       mlx5_start_health_poll(dev);
+
        return 0;
 
-stop_health:
-       mlx5_stop_health_poll(dev, boot);
 reclaim_boot_pages:
        mlx5_reclaim_startup_pages(dev);
 err_disable_hca:
@@ -1094,8 +1088,6 @@ err_disable_hca:
 err_cmd_cleanup:
        mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
        mlx5_cmd_cleanup(dev);
-err_tout_cleanup:
-       mlx5_tout_cleanup(dev);
 
        return err;
 }
@@ -1114,7 +1106,6 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
        mlx5_core_disable_hca(dev, 0);
        mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
        mlx5_cmd_cleanup(dev);
-       mlx5_tout_cleanup(dev);
 
        return 0;
 }
@@ -1476,6 +1467,12 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
                                            mlx5_debugfs_root);
        INIT_LIST_HEAD(&priv->traps);
 
+       err = mlx5_tout_init(dev);
+       if (err) {
+               mlx5_core_err(dev, "Failed initializing timeouts, aborting\n");
+               goto err_timeout_init;
+       }
+
        err = mlx5_health_init(dev);
        if (err)
                goto err_health_init;
@@ -1501,6 +1498,8 @@ err_adev_init:
 err_pagealloc_init:
        mlx5_health_cleanup(dev);
 err_health_init:
+       mlx5_tout_cleanup(dev);
+err_timeout_init:
        debugfs_remove(dev->priv.dbg_root);
        mutex_destroy(&priv->pgdir_mutex);
        mutex_destroy(&priv->alloc_mutex);
@@ -1518,6 +1517,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
        mlx5_adev_cleanup(dev);
        mlx5_pagealloc_cleanup(dev);
        mlx5_health_cleanup(dev);
+       mlx5_tout_cleanup(dev);
        debugfs_remove_recursive(dev->priv.dbg_root);
        mutex_destroy(&priv->pgdir_mutex);
        mutex_destroy(&priv->alloc_mutex);
index 5925db3..03e5bad 100644 (file)
@@ -2153,7 +2153,7 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
        max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
        local_port = mlxsw_reg_pude_local_port_get(pude_pl);
 
-       if (WARN_ON_ONCE(local_port >= max_ports))
+       if (WARN_ON_ONCE(!local_port || local_port >= max_ports))
                return;
        mlxsw_sp_port = mlxsw_sp->ports[local_port];
        if (!mlxsw_sp_port)
@@ -3290,10 +3290,10 @@ mlxsw_sp_resources_rif_mac_profile_register(struct mlxsw_core *mlxsw_core)
        u8 max_rif_mac_profiles;
 
        if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIF_MAC_PROFILES))
-               return -EIO;
-
-       max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core,
-                                                 MAX_RIF_MAC_PROFILES);
+               max_rif_mac_profiles = 1;
+       else
+               max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core,
+                                                         MAX_RIF_MAC_PROFILES);
        devlink_resource_size_params_init(&size_params, max_rif_mac_profiles,
                                          max_rif_mac_profiles, 1,
                                          DEVLINK_RESOURCE_UNIT_ENTRY);
index 4fc9782..7d76474 100644 (file)
@@ -914,8 +914,7 @@ static int lan743x_phy_reset(struct lan743x_adapter *adapter)
 }
 
 static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter,
-                                          u8 duplex, u16 local_adv,
-                                          u16 remote_adv)
+                                          u16 local_adv, u16 remote_adv)
 {
        struct lan743x_phy *phy = &adapter->phy;
        u8 cap;
@@ -943,7 +942,6 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
 
        phy_print_status(phydev);
        if (phydev->state == PHY_RUNNING) {
-               struct ethtool_link_ksettings ksettings;
                int remote_advertisement = 0;
                int local_advertisement = 0;
 
@@ -980,18 +978,14 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
                }
                lan743x_csr_write(adapter, MAC_CR, data);
 
-               memset(&ksettings, 0, sizeof(ksettings));
-               phy_ethtool_get_link_ksettings(netdev, &ksettings);
                local_advertisement =
                        linkmode_adv_to_mii_adv_t(phydev->advertising);
                remote_advertisement =
                        linkmode_adv_to_mii_adv_t(phydev->lp_advertising);
 
-               lan743x_phy_update_flowcontrol(adapter,
-                                              ksettings.base.duplex,
-                                              local_advertisement,
+               lan743x_phy_update_flowcontrol(adapter, local_advertisement,
                                               remote_advertisement);
-               lan743x_ptp_update_latency(adapter, ksettings.base.speed);
+               lan743x_ptp_update_latency(adapter, phydev->speed);
        }
 }
 
index e6c18b5..1e4ad95 100644 (file)
@@ -1278,6 +1278,225 @@ int ocelot_fdb_dump(struct ocelot *ocelot, int port,
 }
 EXPORT_SYMBOL(ocelot_fdb_dump);
 
+static void ocelot_populate_l2_ptp_trap_key(struct ocelot_vcap_filter *trap)
+{
+       trap->key_type = OCELOT_VCAP_KEY_ETYPE;
+       *(__be16 *)trap->key.etype.etype.value = htons(ETH_P_1588);
+       *(__be16 *)trap->key.etype.etype.mask = htons(0xffff);
+}
+
+static void
+ocelot_populate_ipv4_ptp_event_trap_key(struct ocelot_vcap_filter *trap)
+{
+       trap->key_type = OCELOT_VCAP_KEY_IPV4;
+       trap->key.ipv4.dport.value = PTP_EV_PORT;
+       trap->key.ipv4.dport.mask = 0xffff;
+}
+
+static void
+ocelot_populate_ipv6_ptp_event_trap_key(struct ocelot_vcap_filter *trap)
+{
+       trap->key_type = OCELOT_VCAP_KEY_IPV6;
+       trap->key.ipv6.dport.value = PTP_EV_PORT;
+       trap->key.ipv6.dport.mask = 0xffff;
+}
+
+static void
+ocelot_populate_ipv4_ptp_general_trap_key(struct ocelot_vcap_filter *trap)
+{
+       trap->key_type = OCELOT_VCAP_KEY_IPV4;
+       trap->key.ipv4.dport.value = PTP_GEN_PORT;
+       trap->key.ipv4.dport.mask = 0xffff;
+}
+
+static void
+ocelot_populate_ipv6_ptp_general_trap_key(struct ocelot_vcap_filter *trap)
+{
+       trap->key_type = OCELOT_VCAP_KEY_IPV6;
+       trap->key.ipv6.dport.value = PTP_GEN_PORT;
+       trap->key.ipv6.dport.mask = 0xffff;
+}
+
+static int ocelot_trap_add(struct ocelot *ocelot, int port,
+                          unsigned long cookie,
+                          void (*populate)(struct ocelot_vcap_filter *f))
+{
+       struct ocelot_vcap_block *block_vcap_is2;
+       struct ocelot_vcap_filter *trap;
+       bool new = false;
+       int err;
+
+       block_vcap_is2 = &ocelot->block[VCAP_IS2];
+
+       trap = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, cookie,
+                                                  false);
+       if (!trap) {
+               trap = kzalloc(sizeof(*trap), GFP_KERNEL);
+               if (!trap)
+                       return -ENOMEM;
+
+               populate(trap);
+               trap->prio = 1;
+               trap->id.cookie = cookie;
+               trap->id.tc_offload = false;
+               trap->block_id = VCAP_IS2;
+               trap->type = OCELOT_VCAP_FILTER_OFFLOAD;
+               trap->lookup = 0;
+               trap->action.cpu_copy_ena = true;
+               trap->action.mask_mode = OCELOT_MASK_MODE_PERMIT_DENY;
+               trap->action.port_mask = 0;
+               new = true;
+       }
+
+       trap->ingress_port_mask |= BIT(port);
+
+       if (new)
+               err = ocelot_vcap_filter_add(ocelot, trap, NULL);
+       else
+               err = ocelot_vcap_filter_replace(ocelot, trap);
+       if (err) {
+               trap->ingress_port_mask &= ~BIT(port);
+               if (!trap->ingress_port_mask)
+                       kfree(trap);
+               return err;
+       }
+
+       return 0;
+}
+
+static int ocelot_trap_del(struct ocelot *ocelot, int port,
+                          unsigned long cookie)
+{
+       struct ocelot_vcap_block *block_vcap_is2;
+       struct ocelot_vcap_filter *trap;
+
+       block_vcap_is2 = &ocelot->block[VCAP_IS2];
+
+       trap = ocelot_vcap_block_find_filter_by_id(block_vcap_is2, cookie,
+                                                  false);
+       if (!trap)
+               return 0;
+
+       trap->ingress_port_mask &= ~BIT(port);
+       if (!trap->ingress_port_mask)
+               return ocelot_vcap_filter_del(ocelot, trap);
+
+       return ocelot_vcap_filter_replace(ocelot, trap);
+}
+
+static int ocelot_l2_ptp_trap_add(struct ocelot *ocelot, int port)
+{
+       unsigned long l2_cookie = ocelot->num_phys_ports + 1;
+
+       return ocelot_trap_add(ocelot, port, l2_cookie,
+                              ocelot_populate_l2_ptp_trap_key);
+}
+
+static int ocelot_l2_ptp_trap_del(struct ocelot *ocelot, int port)
+{
+       unsigned long l2_cookie = ocelot->num_phys_ports + 1;
+
+       return ocelot_trap_del(ocelot, port, l2_cookie);
+}
+
+static int ocelot_ipv4_ptp_trap_add(struct ocelot *ocelot, int port)
+{
+       unsigned long ipv4_gen_cookie = ocelot->num_phys_ports + 2;
+       unsigned long ipv4_ev_cookie = ocelot->num_phys_ports + 3;
+       int err;
+
+       err = ocelot_trap_add(ocelot, port, ipv4_ev_cookie,
+                             ocelot_populate_ipv4_ptp_event_trap_key);
+       if (err)
+               return err;
+
+       err = ocelot_trap_add(ocelot, port, ipv4_gen_cookie,
+                             ocelot_populate_ipv4_ptp_general_trap_key);
+       if (err)
+               ocelot_trap_del(ocelot, port, ipv4_ev_cookie);
+
+       return err;
+}
+
+static int ocelot_ipv4_ptp_trap_del(struct ocelot *ocelot, int port)
+{
+       unsigned long ipv4_gen_cookie = ocelot->num_phys_ports + 2;
+       unsigned long ipv4_ev_cookie = ocelot->num_phys_ports + 3;
+       int err;
+
+       err = ocelot_trap_del(ocelot, port, ipv4_ev_cookie);
+       err |= ocelot_trap_del(ocelot, port, ipv4_gen_cookie);
+       return err;
+}
+
+static int ocelot_ipv6_ptp_trap_add(struct ocelot *ocelot, int port)
+{
+       unsigned long ipv6_gen_cookie = ocelot->num_phys_ports + 4;
+       unsigned long ipv6_ev_cookie = ocelot->num_phys_ports + 5;
+       int err;
+
+       err = ocelot_trap_add(ocelot, port, ipv6_ev_cookie,
+                             ocelot_populate_ipv6_ptp_event_trap_key);
+       if (err)
+               return err;
+
+       err = ocelot_trap_add(ocelot, port, ipv6_gen_cookie,
+                             ocelot_populate_ipv6_ptp_general_trap_key);
+       if (err)
+               ocelot_trap_del(ocelot, port, ipv6_ev_cookie);
+
+       return err;
+}
+
+static int ocelot_ipv6_ptp_trap_del(struct ocelot *ocelot, int port)
+{
+       unsigned long ipv6_gen_cookie = ocelot->num_phys_ports + 4;
+       unsigned long ipv6_ev_cookie = ocelot->num_phys_ports + 5;
+       int err;
+
+       err = ocelot_trap_del(ocelot, port, ipv6_ev_cookie);
+       err |= ocelot_trap_del(ocelot, port, ipv6_gen_cookie);
+       return err;
+}
+
+static int ocelot_setup_ptp_traps(struct ocelot *ocelot, int port,
+                                 bool l2, bool l4)
+{
+       int err;
+
+       if (l2)
+               err = ocelot_l2_ptp_trap_add(ocelot, port);
+       else
+               err = ocelot_l2_ptp_trap_del(ocelot, port);
+       if (err)
+               return err;
+
+       if (l4) {
+               err = ocelot_ipv4_ptp_trap_add(ocelot, port);
+               if (err)
+                       goto err_ipv4;
+
+               err = ocelot_ipv6_ptp_trap_add(ocelot, port);
+               if (err)
+                       goto err_ipv6;
+       } else {
+               err = ocelot_ipv4_ptp_trap_del(ocelot, port);
+
+               err |= ocelot_ipv6_ptp_trap_del(ocelot, port);
+       }
+       if (err)
+               return err;
+
+       return 0;
+
+err_ipv6:
+       ocelot_ipv4_ptp_trap_del(ocelot, port);
+err_ipv4:
+       if (l2)
+               ocelot_l2_ptp_trap_del(ocelot, port);
+       return err;
+}
+
 int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr)
 {
        return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config,
@@ -1288,7 +1507,9 @@ EXPORT_SYMBOL(ocelot_hwstamp_get);
 int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr)
 {
        struct ocelot_port *ocelot_port = ocelot->ports[port];
+       bool l2 = false, l4 = false;
        struct hwtstamp_config cfg;
+       int err;
 
        if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
                return -EFAULT;
@@ -1320,28 +1541,42 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr)
        switch (cfg.rx_filter) {
        case HWTSTAMP_FILTER_NONE:
                break;
-       case HWTSTAMP_FILTER_ALL:
-       case HWTSTAMP_FILTER_SOME:
-       case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
-       case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
-       case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
-       case HWTSTAMP_FILTER_NTP_ALL:
        case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
        case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
        case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+               l4 = true;
+               break;
        case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
        case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
        case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+               l2 = true;
+               break;
        case HWTSTAMP_FILTER_PTP_V2_EVENT:
        case HWTSTAMP_FILTER_PTP_V2_SYNC:
        case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-               cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+               l2 = true;
+               l4 = true;
                break;
        default:
                mutex_unlock(&ocelot->ptp_lock);
                return -ERANGE;
        }
 
+       err = ocelot_setup_ptp_traps(ocelot, port, l2, l4);
+       if (err) {
+               mutex_unlock(&ocelot->ptp_lock);
+               return err;
+       }
+
+       if (l2 && l4)
+               cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+       else if (l2)
+               cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+       else if (l4)
+               cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT;
+       else
+               cfg.rx_filter = HWTSTAMP_FILTER_NONE;
+
        /* Commit back the result & save it */
        memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg));
        mutex_unlock(&ocelot->ptp_lock);
@@ -1444,7 +1679,10 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port,
                                 SOF_TIMESTAMPING_RAW_HARDWARE;
        info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
                         BIT(HWTSTAMP_TX_ONESTEP_SYNC);
-       info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+       info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+                          BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
+                          BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+                          BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT);
 
        return 0;
 }
index 99d7376..337cd08 100644 (file)
@@ -1217,6 +1217,22 @@ int ocelot_vcap_filter_del(struct ocelot *ocelot,
 }
 EXPORT_SYMBOL(ocelot_vcap_filter_del);
 
+int ocelot_vcap_filter_replace(struct ocelot *ocelot,
+                              struct ocelot_vcap_filter *filter)
+{
+       struct ocelot_vcap_block *block = &ocelot->block[filter->block_id];
+       int index;
+
+       index = ocelot_vcap_block_get_filter_index(block, filter);
+       if (index < 0)
+               return index;
+
+       vcap_entry_set(ocelot, index, filter);
+
+       return 0;
+}
+EXPORT_SYMBOL(ocelot_vcap_filter_replace);
+
 int ocelot_vcap_filter_stats_update(struct ocelot *ocelot,
                                    struct ocelot_vcap_filter *filter)
 {
index ca46860..0a02d8b 100644 (file)
@@ -120,7 +120,7 @@ static const struct net_device_ops xtsonic_netdev_ops = {
        .ndo_set_mac_address    = eth_mac_addr,
 };
 
-static int __init sonic_probe1(struct net_device *dev)
+static int sonic_probe1(struct net_device *dev)
 {
        unsigned int silicon_revision;
        struct sonic_local *lp = netdev_priv(dev);
index df20373..0b1865e 100644 (file)
@@ -565,7 +565,6 @@ struct nfp_net_dp {
  * @exn_name:           Name for Exception interrupt
  * @shared_handler:     Handler for shared interrupts
  * @shared_name:        Name for shared interrupt
- * @me_freq_mhz:        ME clock_freq (MHz)
  * @reconfig_lock:     Protects @reconfig_posted, @reconfig_timer_active,
  *                     @reconfig_sync_present and HW reconfiguration request
  *                     regs/machinery from async requests (sync must take
@@ -650,8 +649,6 @@ struct nfp_net {
        irq_handler_t shared_handler;
        char shared_name[IFNAMSIZ + 8];
 
-       u32 me_freq_mhz;
-
        bool link_up;
        spinlock_t link_status_lock;
 
index 1de076f..cf78829 100644 (file)
@@ -1344,7 +1344,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
         * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
         * count.
         */
-       factor = nn->me_freq_mhz / 16;
+       factor = nn->tlv_caps.me_freq_mhz / 16;
 
        /* Each pair of (usecs, max_frames) fields specifies that interrupts
         * should be coalesced until
index cfeb762..07a00dd 100644 (file)
@@ -1209,7 +1209,7 @@ static void *nixge_get_nvmem_address(struct device *dev)
 
        cell = nvmem_cell_get(dev, "address");
        if (IS_ERR(cell))
-               return NULL;
+               return cell;
 
        mac = nvmem_cell_read(cell, &cell_size);
        nvmem_cell_put(cell);
@@ -1282,7 +1282,7 @@ static int nixge_probe(struct platform_device *pdev)
        ndev->max_mtu = NIXGE_JUMBO_MTU;
 
        mac_addr = nixge_get_nvmem_address(&pdev->dev);
-       if (mac_addr && is_valid_ether_addr(mac_addr)) {
+       if (!IS_ERR(mac_addr) && is_valid_ether_addr(mac_addr)) {
                eth_hw_addr_set(ndev, mac_addr);
                kfree(mac_addr);
        } else {
index a97f691..6958ade 100644 (file)
@@ -1045,7 +1045,7 @@ static int qed_int_deassertion(struct qed_hwfn  *p_hwfn,
                if (!parities)
                        continue;
 
-               for (j = 0, bit_idx = 0; bit_idx < 32; j++) {
+               for (j = 0, bit_idx = 0; bit_idx < 32 && j < 32; j++) {
                        struct aeu_invert_reg_bit *p_bit = &p_aeu->bits[j];
 
                        if (qed_int_is_parity_flag(p_hwfn, p_bit) &&
@@ -1083,7 +1083,7 @@ static int qed_int_deassertion(struct qed_hwfn  *p_hwfn,
                         * to current group, making them responsible for the
                         * previous assertion.
                         */
-                       for (j = 0, bit_idx = 0; bit_idx < 32; j++) {
+                       for (j = 0, bit_idx = 0; bit_idx < 32 && j < 32; j++) {
                                long unsigned int bitmask;
                                u8 bit, bit_len;
 
@@ -1382,7 +1382,7 @@ static void qed_int_sb_attn_init(struct qed_hwfn *p_hwfn,
        memset(sb_info->parity_mask, 0, sizeof(u32) * NUM_ATTN_REGS);
        for (i = 0; i < NUM_ATTN_REGS; i++) {
                /* j is array index, k is bit index */
-               for (j = 0, k = 0; k < 32; j++) {
+               for (j = 0, k = 0; k < 32 && j < 32; j++) {
                        struct aeu_invert_reg_bit *p_aeu;
 
                        p_aeu = &aeu_descs[i].bits[j];
index d51bac7..bd06076 100644 (file)
@@ -1077,8 +1077,14 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter)
        sds_mbx_size = sizeof(struct qlcnic_sds_mbx);
        context_id = recv_ctx->context_id;
        num_sds = adapter->drv_sds_rings - QLCNIC_MAX_SDS_RINGS;
-       ahw->hw_ops->alloc_mbx_args(&cmd, adapter,
-                                   QLCNIC_CMD_ADD_RCV_RINGS);
+       err = ahw->hw_ops->alloc_mbx_args(&cmd, adapter,
+                                       QLCNIC_CMD_ADD_RCV_RINGS);
+       if (err) {
+               dev_err(&adapter->pdev->dev,
+                       "Failed to alloc mbx args %d\n", err);
+               return err;
+       }
+
        cmd.req.arg[1] = 0 | (num_sds << 8) | (context_id << 16);
 
        /* set up status rings, mbx 2-81 */
index bbe21db..86c44bc 100644 (file)
@@ -5217,8 +5217,8 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp)
 
 static void rtl_init_mac_address(struct rtl8169_private *tp)
 {
+       u8 mac_addr[ETH_ALEN] __aligned(2) = {};
        struct net_device *dev = tp->dev;
-       u8 mac_addr[ETH_ALEN];
        int rc;
 
        rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr);
@@ -5233,7 +5233,8 @@ static void rtl_init_mac_address(struct rtl8169_private *tp)
        if (is_valid_ether_addr(mac_addr))
                goto done;
 
-       eth_hw_addr_random(dev);
+       eth_random_addr(mac_addr);
+       dev->addr_assign_type = NET_ADDR_RANDOM;
        dev_warn(tp_to_dev(tp), "can't read MAC address, setting random one\n");
 done:
        eth_hw_addr_set(dev, mac_addr);
index 43eead7..5f12973 100644 (file)
@@ -314,6 +314,7 @@ int stmmac_mdio_reset(struct mii_bus *mii);
 int stmmac_xpcs_setup(struct mii_bus *mii);
 void stmmac_set_ethtool_ops(struct net_device *netdev);
 
+int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags);
 void stmmac_ptp_register(struct stmmac_priv *priv);
 void stmmac_ptp_unregister(struct stmmac_priv *priv);
 int stmmac_open(struct net_device *dev);
index 2eb2845..da8306f 100644 (file)
 #include "dwxgmac2.h"
 #include "hwif.h"
 
+/* As long as the interface is active, we keep the timestamping counter enabled
+ * with fine resolution and binary rollover. This avoid non-monotonic behavior
+ * (clock jumps) when changing timestamping settings at runtime.
+ */
+#define STMMAC_HWTS_ACTIVE     (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | \
+                                PTP_TCR_TSCTRLSSR)
+
 #define        STMMAC_ALIGN(x)         ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16)
 #define        TSO_MAX_BUFF_SIZE       (SZ_16K - 1)
 
@@ -613,8 +620,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
        struct hwtstamp_config config;
-       struct timespec64 now;
-       u64 temp = 0;
        u32 ptp_v2 = 0;
        u32 tstamp_all = 0;
        u32 ptp_over_ipv4_udp = 0;
@@ -623,11 +628,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
        u32 snap_type_sel = 0;
        u32 ts_master_en = 0;
        u32 ts_event_en = 0;
-       u32 sec_inc = 0;
-       u32 value = 0;
-       bool xmac;
-
-       xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
 
        if (!(priv->dma_cap.time_stamp || priv->adv_ts)) {
                netdev_alert(priv->dev, "No support for HW time stamping\n");
@@ -789,42 +789,17 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
        priv->hwts_rx_en = ((config.rx_filter == HWTSTAMP_FILTER_NONE) ? 0 : 1);
        priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON;
 
-       if (!priv->hwts_tx_en && !priv->hwts_rx_en)
-               stmmac_config_hw_tstamping(priv, priv->ptpaddr, 0);
-       else {
-               value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR |
-                        tstamp_all | ptp_v2 | ptp_over_ethernet |
-                        ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en |
-                        ts_master_en | snap_type_sel);
-               stmmac_config_hw_tstamping(priv, priv->ptpaddr, value);
-
-               /* program Sub Second Increment reg */
-               stmmac_config_sub_second_increment(priv,
-                               priv->ptpaddr, priv->plat->clk_ptp_rate,
-                               xmac, &sec_inc);
-               temp = div_u64(1000000000ULL, sec_inc);
-
-               /* Store sub second increment and flags for later use */
-               priv->sub_second_inc = sec_inc;
-               priv->systime_flags = value;
-
-               /* calculate default added value:
-                * formula is :
-                * addend = (2^32)/freq_div_ratio;
-                * where, freq_div_ratio = 1e9ns/sec_inc
-                */
-               temp = (u64)(temp << 32);
-               priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
-               stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
-
-               /* initialize system time */
-               ktime_get_real_ts64(&now);
+       priv->systime_flags = STMMAC_HWTS_ACTIVE;
 
-               /* lower 32 bits of tv_sec are safe until y2106 */
-               stmmac_init_systime(priv, priv->ptpaddr,
-                               (u32)now.tv_sec, now.tv_nsec);
+       if (priv->hwts_tx_en || priv->hwts_rx_en) {
+               priv->systime_flags |= tstamp_all | ptp_v2 |
+                                      ptp_over_ethernet | ptp_over_ipv6_udp |
+                                      ptp_over_ipv4_udp | ts_event_en |
+                                      ts_master_en | snap_type_sel;
        }
 
+       stmmac_config_hw_tstamping(priv, priv->ptpaddr, priv->systime_flags);
+
        memcpy(&priv->tstamp_config, &config, sizeof(config));
 
        return copy_to_user(ifr->ifr_data, &config,
@@ -852,6 +827,66 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
                            sizeof(*config)) ? -EFAULT : 0;
 }
 
+/**
+ * stmmac_init_tstamp_counter - init hardware timestamping counter
+ * @priv: driver private structure
+ * @systime_flags: timestamping flags
+ * Description:
+ * Initialize hardware counter for packet timestamping.
+ * This is valid as long as the interface is open and not suspended.
+ * Will be rerun after resuming from suspend, case in which the timestamping
+ * flags updated by stmmac_hwtstamp_set() also need to be restored.
+ */
+int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
+{
+       bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+       struct timespec64 now;
+       u32 sec_inc = 0;
+       u64 temp = 0;
+       int ret;
+
+       if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
+               return -EOPNOTSUPP;
+
+       ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+       if (ret < 0) {
+               netdev_warn(priv->dev,
+                           "failed to enable PTP reference clock: %pe\n",
+                           ERR_PTR(ret));
+               return ret;
+       }
+
+       stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
+       priv->systime_flags = systime_flags;
+
+       /* program Sub Second Increment reg */
+       stmmac_config_sub_second_increment(priv, priv->ptpaddr,
+                                          priv->plat->clk_ptp_rate,
+                                          xmac, &sec_inc);
+       temp = div_u64(1000000000ULL, sec_inc);
+
+       /* Store sub second increment for later use */
+       priv->sub_second_inc = sec_inc;
+
+       /* calculate default added value:
+        * formula is :
+        * addend = (2^32)/freq_div_ratio;
+        * where, freq_div_ratio = 1e9ns/sec_inc
+        */
+       temp = (u64)(temp << 32);
+       priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
+       stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
+
+       /* initialize system time */
+       ktime_get_real_ts64(&now);
+
+       /* lower 32 bits of tv_sec are safe until y2106 */
+       stmmac_init_systime(priv, priv->ptpaddr, (u32)now.tv_sec, now.tv_nsec);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(stmmac_init_tstamp_counter);
+
 /**
  * stmmac_init_ptp - init PTP
  * @priv: driver private structure
@@ -862,9 +897,11 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 static int stmmac_init_ptp(struct stmmac_priv *priv)
 {
        bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+       int ret;
 
-       if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
-               return -EOPNOTSUPP;
+       ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE);
+       if (ret)
+               return ret;
 
        priv->adv_ts = 0;
        /* Check if adv_ts can be enabled for dwmac 4.x / xgmac core */
@@ -3272,10 +3309,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        stmmac_mmc_setup(priv);
 
        if (init_ptp) {
-               ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
-               if (ret < 0)
-                       netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret);
-
                ret = stmmac_init_ptp(priv);
                if (ret == -EOPNOTSUPP)
                        netdev_warn(priv->dev, "PTP not supported by HW\n");
@@ -3769,6 +3802,8 @@ int stmmac_release(struct net_device *dev)
        struct stmmac_priv *priv = netdev_priv(dev);
        u32 chan;
 
+       netif_tx_disable(dev);
+
        if (device_may_wakeup(priv->device))
                phylink_speed_down(priv->phylink, false);
        /* Stop and disconnect the PHY */
@@ -5161,12 +5196,13 @@ read_again:
                if (likely(!(status & rx_not_ls)) &&
                    (likely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
                     unlikely(status != llc_snap))) {
-                       if (buf2_len)
+                       if (buf2_len) {
                                buf2_len -= ETH_FCS_LEN;
-                       else
+                               len -= ETH_FCS_LEN;
+                       } else if (buf1_len) {
                                buf1_len -= ETH_FCS_LEN;
-
-                       len -= ETH_FCS_LEN;
+                               len -= ETH_FCS_LEN;
+                       }
                }
 
                if (!skb) {
@@ -5504,8 +5540,6 @@ static int stmmac_set_features(struct net_device *netdev,
                               netdev_features_t features)
 {
        struct stmmac_priv *priv = netdev_priv(netdev);
-       bool sph_en;
-       u32 chan;
 
        /* Keep the COE Type in case of csum is supporting */
        if (features & NETIF_F_RXCSUM)
@@ -5517,10 +5551,13 @@ static int stmmac_set_features(struct net_device *netdev,
         */
        stmmac_rx_ipc(priv, priv->hw);
 
-       sph_en = (priv->hw->rx_csum > 0) && priv->sph;
+       if (priv->sph_cap) {
+               bool sph_en = (priv->hw->rx_csum > 0) && priv->sph;
+               u32 chan;
 
-       for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
-               stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
+               for (chan = 0; chan < priv->plat->rx_queues_to_use; chan++)
+                       stmmac_enable_sph(priv, priv->ioaddr, sph_en, chan);
+       }
 
        return 0;
 }
index 232ac98..5d29f33 100644 (file)
@@ -816,7 +816,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev)
                if (ret)
                        return ret;
 
-               clk_prepare_enable(priv->plat->clk_ptp_ref);
+               stmmac_init_tstamp_counter(priv, priv->systime_flags);
        }
 
        return 0;
index e2b332b..7da2bb8 100644 (file)
@@ -31,6 +31,8 @@
 
 #define AX_MTU         236
 
+/* some arch define END as assembly function ending, just undef it */
+#undef END
 /* SLIP/KISS protocol characters. */
 #define END             0300           /* indicates end of frame       */
 #define ESC             0333           /* indicates byte stuffing      */
index cff5173..d57472e 100644 (file)
@@ -661,22 +661,6 @@ void ipa_cmd_pipeline_clear_wait(struct ipa *ipa)
        wait_for_completion(&ipa->completion);
 }
 
-void ipa_cmd_pipeline_clear(struct ipa *ipa)
-{
-       u32 count = ipa_cmd_pipeline_clear_count();
-       struct gsi_trans *trans;
-
-       trans = ipa_cmd_trans_alloc(ipa, count);
-       if (trans) {
-               ipa_cmd_pipeline_clear_add(trans);
-               gsi_trans_commit_wait(trans);
-               ipa_cmd_pipeline_clear_wait(ipa);
-       } else {
-               dev_err(&ipa->pdev->dev,
-                       "error allocating %u entry tag transaction\n", count);
-       }
-}
-
 static struct ipa_cmd_info *
 ipa_cmd_info_alloc(struct ipa_endpoint *endpoint, u32 tre_count)
 {
index 69cd085..05ed7e4 100644 (file)
@@ -163,12 +163,6 @@ u32 ipa_cmd_pipeline_clear_count(void);
  */
 void ipa_cmd_pipeline_clear_wait(struct ipa *ipa);
 
-/**
- * ipa_cmd_pipeline_clear() - Clear the hardware pipeline
- * @ipa:       - IPA pointer
- */
-void ipa_cmd_pipeline_clear(struct ipa *ipa);
-
 /**
  * ipa_cmd_trans_alloc() - Allocate a transaction for the command TX endpoint
  * @ipa:       IPA pointer
index ef790fd..03a1709 100644 (file)
@@ -1636,8 +1636,6 @@ void ipa_endpoint_suspend(struct ipa *ipa)
        if (ipa->modem_netdev)
                ipa_modem_suspend(ipa->modem_netdev);
 
-       ipa_cmd_pipeline_clear(ipa);
-
        ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]);
        ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]);
 }
index cdfa98a..a448ec1 100644 (file)
@@ -28,6 +28,7 @@
 #include "ipa_reg.h"
 #include "ipa_mem.h"
 #include "ipa_table.h"
+#include "ipa_smp2p.h"
 #include "ipa_modem.h"
 #include "ipa_uc.h"
 #include "ipa_interrupt.h"
@@ -801,6 +802,11 @@ static int ipa_remove(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        int ret;
 
+       /* Prevent the modem from triggering a call to ipa_setup().  This
+        * also ensures a modem-initiated setup that's underway completes.
+        */
+       ipa_smp2p_irq_disable_setup(ipa);
+
        ret = pm_runtime_get_sync(dev);
        if (WARN_ON(ret < 0))
                goto out_power_put;
index ad116bc..d0ab4d7 100644 (file)
@@ -339,9 +339,6 @@ int ipa_modem_stop(struct ipa *ipa)
        if (state != IPA_MODEM_STATE_RUNNING)
                return -EBUSY;
 
-       /* Prevent the modem from triggering a call to ipa_setup() */
-       ipa_smp2p_disable(ipa);
-
        /* Clean up the netdev and endpoints if it was started */
        if (netdev) {
                struct ipa_priv *priv = netdev_priv(netdev);
@@ -369,6 +366,9 @@ static void ipa_modem_crashed(struct ipa *ipa)
        struct device *dev = &ipa->pdev->dev;
        int ret;
 
+       /* Prevent the modem from triggering a call to ipa_setup() */
+       ipa_smp2p_irq_disable_setup(ipa);
+
        ret = pm_runtime_get_sync(dev);
        if (ret < 0) {
                dev_err(dev, "error %d getting power to handle crash\n", ret);
index df7639c..2112336 100644 (file)
@@ -53,7 +53,7 @@
  * @setup_ready_irq:   IPA interrupt triggered by modem to signal GSI ready
  * @power_on:          Whether IPA power is on
  * @notified:          Whether modem has been notified of power state
- * @disabled:          Whether setup ready interrupt handling is disabled
+ * @setup_disabled:    Whether setup ready interrupt handler is disabled
  * @mutex:             Mutex protecting ready-interrupt/shutdown interlock
  * @panic_notifier:    Panic notifier structure
 */
@@ -67,7 +67,7 @@ struct ipa_smp2p {
        u32 setup_ready_irq;
        bool power_on;
        bool notified;
-       bool disabled;
+       bool setup_disabled;
        struct mutex mutex;
        struct notifier_block panic_notifier;
 };
@@ -155,11 +155,9 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
        struct device *dev;
        int ret;
 
-       mutex_lock(&smp2p->mutex);
-
-       if (smp2p->disabled)
-               goto out_mutex_unlock;
-       smp2p->disabled = true;         /* If any others arrive, ignore them */
+       /* Ignore any (spurious) interrupts received after the first */
+       if (smp2p->ipa->setup_complete)
+               return IRQ_HANDLED;
 
        /* Power needs to be active for setup */
        dev = &smp2p->ipa->pdev->dev;
@@ -176,8 +174,6 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
 out_power_put:
        pm_runtime_mark_last_busy(dev);
        (void)pm_runtime_put_autosuspend(dev);
-out_mutex_unlock:
-       mutex_unlock(&smp2p->mutex);
 
        return IRQ_HANDLED;
 }
@@ -313,7 +309,7 @@ void ipa_smp2p_exit(struct ipa *ipa)
        kfree(smp2p);
 }
 
-void ipa_smp2p_disable(struct ipa *ipa)
+void ipa_smp2p_irq_disable_setup(struct ipa *ipa)
 {
        struct ipa_smp2p *smp2p = ipa->smp2p;
 
@@ -322,7 +318,10 @@ void ipa_smp2p_disable(struct ipa *ipa)
 
        mutex_lock(&smp2p->mutex);
 
-       smp2p->disabled = true;
+       if (!smp2p->setup_disabled) {
+               disable_irq(smp2p->setup_ready_irq);
+               smp2p->setup_disabled = true;
+       }
 
        mutex_unlock(&smp2p->mutex);
 }
index 99a9567..59cee31 100644 (file)
@@ -27,13 +27,12 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init);
 void ipa_smp2p_exit(struct ipa *ipa);
 
 /**
- * ipa_smp2p_disable() - Prevent "ipa-setup-ready" interrupt handling
+ * ipa_smp2p_irq_disable_setup() - Disable the "setup ready" interrupt
  * @ipa:       IPA pointer
  *
- * Prevent handling of the "setup ready" interrupt from the modem.
- * This is used before initiating shutdown of the driver.
+ * Disable the "ipa-setup-ready" interrupt from the modem.
  */
-void ipa_smp2p_disable(struct ipa *ipa);
+void ipa_smp2p_irq_disable_setup(struct ipa *ipa);
 
 /**
  * ipa_smp2p_notify_reset() - Reset modem notification state
index cad8205..966c3b4 100644 (file)
@@ -61,6 +61,13 @@ static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum)
 
        iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL);
 
+       rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl,
+                               !(ctrl & ASPEED_MDIO_CTRL_FIRE),
+                               ASPEED_MDIO_INTERVAL_US,
+                               ASPEED_MDIO_TIMEOUT_US);
+       if (rc < 0)
+               return rc;
+
        rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_DATA, data,
                                data & ASPEED_MDIO_DATA_IDLE,
                                ASPEED_MDIO_INTERVAL_US,
index 3ad7397..5904546 100644 (file)
@@ -710,6 +710,7 @@ static void phylink_resolve(struct work_struct *w)
        struct phylink_link_state link_state;
        struct net_device *ndev = pl->netdev;
        bool mac_config = false;
+       bool retrigger = false;
        bool cur_link_state;
 
        mutex_lock(&pl->state_mutex);
@@ -723,6 +724,7 @@ static void phylink_resolve(struct work_struct *w)
                link_state.link = false;
        } else if (pl->mac_link_dropped) {
                link_state.link = false;
+               retrigger = true;
        } else {
                switch (pl->cur_link_an_mode) {
                case MLO_AN_PHY:
@@ -739,6 +741,19 @@ static void phylink_resolve(struct work_struct *w)
                case MLO_AN_INBAND:
                        phylink_mac_pcs_get_state(pl, &link_state);
 
+                       /* The PCS may have a latching link-fail indicator.
+                        * If the link was up, bring the link down and
+                        * re-trigger the resolve. Otherwise, re-read the
+                        * PCS state to get the current status of the link.
+                        */
+                       if (!link_state.link) {
+                               if (cur_link_state)
+                                       retrigger = true;
+                               else
+                                       phylink_mac_pcs_get_state(pl,
+                                                                 &link_state);
+                       }
+
                        /* If we have a phy, the "up" state is the union of
                         * both the PHY and the MAC
                         */
@@ -747,6 +762,15 @@ static void phylink_resolve(struct work_struct *w)
 
                        /* Only update if the PHY link is up */
                        if (pl->phydev && pl->phy_state.link) {
+                               /* If the interface has changed, force a
+                                * link down event if the link isn't already
+                                * down, and re-resolve.
+                                */
+                               if (link_state.interface !=
+                                   pl->phy_state.interface) {
+                                       retrigger = true;
+                                       link_state.link = false;
+                               }
                                link_state.interface = pl->phy_state.interface;
 
                                /* If we have a PHY, we need to update with
@@ -789,7 +813,7 @@ static void phylink_resolve(struct work_struct *w)
                else
                        phylink_link_up(pl, link_state);
        }
-       if (!link_state.link && pl->mac_link_dropped) {
+       if (!link_state.link && retrigger) {
                pl->mac_link_dropped = false;
                queue_work(system_power_efficient_wq, &pl->resolve);
        }
index c420e59..3d7f88b 100644 (file)
@@ -40,6 +40,8 @@
                                           insmod -oslip_maxdev=nnn     */
 #define SL_MTU         296             /* 296; I am used to 600- FvK   */
 
+/* some arch define END as assembly function ending, just undef it */
+#undef END
 /* SLIP protocol characters. */
 #define END             0300           /* indicates end of frame       */
 #define ESC             0333           /* indicates byte stuffing      */
index f20376c..8cd265f 100644 (file)
@@ -2228,7 +2228,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
        if (dev->domain_data.phyirq > 0)
                phydev->irq = dev->domain_data.phyirq;
        else
-               phydev->irq = 0;
+               phydev->irq = PHY_POLL;
        netdev_dbg(dev->net, "phydev->irq = %d\n", phydev->irq);
 
        /* set to AUTOMDIX */
index 20fe4cd..abe0149 100644 (file)
@@ -1050,6 +1050,14 @@ static const struct net_device_ops smsc95xx_netdev_ops = {
        .ndo_set_features       = smsc95xx_set_features,
 };
 
+static void smsc95xx_handle_link_change(struct net_device *net)
+{
+       struct usbnet *dev = netdev_priv(net);
+
+       phy_print_status(net->phydev);
+       usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
+}
+
 static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 {
        struct smsc95xx_priv *pdata;
@@ -1154,6 +1162,17 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->net->min_mtu = ETH_MIN_MTU;
        dev->net->max_mtu = ETH_DATA_LEN;
        dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
+
+       ret = phy_connect_direct(dev->net, pdata->phydev,
+                                &smsc95xx_handle_link_change,
+                                PHY_INTERFACE_MODE_MII);
+       if (ret) {
+               netdev_err(dev->net, "can't attach PHY to %s\n", pdata->mdiobus->id);
+               goto unregister_mdio;
+       }
+
+       phy_attached_info(dev->net->phydev);
+
        return 0;
 
 unregister_mdio:
@@ -1171,47 +1190,25 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
        struct smsc95xx_priv *pdata = dev->driver_priv;
 
+       phy_disconnect(dev->net->phydev);
        mdiobus_unregister(pdata->mdiobus);
        mdiobus_free(pdata->mdiobus);
        netif_dbg(dev, ifdown, dev->net, "free pdata\n");
        kfree(pdata);
 }
 
-static void smsc95xx_handle_link_change(struct net_device *net)
-{
-       struct usbnet *dev = netdev_priv(net);
-
-       phy_print_status(net->phydev);
-       usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
-}
-
 static int smsc95xx_start_phy(struct usbnet *dev)
 {
-       struct smsc95xx_priv *pdata = dev->driver_priv;
-       struct net_device *net = dev->net;
-       int ret;
+       phy_start(dev->net->phydev);
 
-       ret = smsc95xx_reset(dev);
-       if (ret < 0)
-               return ret;
-
-       ret = phy_connect_direct(net, pdata->phydev,
-                                &smsc95xx_handle_link_change,
-                                PHY_INTERFACE_MODE_MII);
-       if (ret) {
-               netdev_err(net, "can't attach PHY to %s\n", pdata->mdiobus->id);
-               return ret;
-       }
-
-       phy_attached_info(net->phydev);
-       phy_start(net->phydev);
        return 0;
 }
 
-static int smsc95xx_disconnect_phy(struct usbnet *dev)
+static int smsc95xx_stop(struct usbnet *dev)
 {
-       phy_stop(dev->net->phydev);
-       phy_disconnect(dev->net->phydev);
+       if (dev->net->phydev)
+               phy_stop(dev->net->phydev);
+
        return 0;
 }
 
@@ -1966,7 +1963,7 @@ static const struct driver_info smsc95xx_info = {
        .unbind         = smsc95xx_unbind,
        .link_reset     = smsc95xx_link_reset,
        .reset          = smsc95xx_start_phy,
-       .stop           = smsc95xx_disconnect_phy,
+       .stop           = smsc95xx_stop,
        .rx_fixup       = smsc95xx_rx_fixup,
        .tx_fixup       = smsc95xx_tx_fixup,
        .status         = smsc95xx_status,
index 1771d6e..55db6a3 100644 (file)
@@ -3423,7 +3423,6 @@ static struct virtio_driver virtio_net_driver = {
        .feature_table_size = ARRAY_SIZE(features),
        .feature_table_legacy = features_legacy,
        .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
-       .suppress_used_validation = true,
        .driver.name =  KBUILD_MODNAME,
        .driver.owner = THIS_MODULE,
        .id_table =     id_table,
index ccf6770..131c745 100644 (file)
@@ -497,6 +497,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
        /* strip the ethernet header added for pass through VRF device */
        __skb_pull(skb, skb_network_offset(skb));
 
+       memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
        ret = vrf_ip6_local_out(net, skb->sk, skb);
        if (unlikely(net_xmit_eval(ret)))
                dev->stats.tx_errors++;
@@ -579,6 +580,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
                                               RT_SCOPE_LINK);
        }
 
+       memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
        ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
        if (unlikely(net_xmit_eval(ret)))
                vrf_dev->stats.tx_errors++;
index b7197e8..9a4c8ff 100644 (file)
@@ -163,7 +163,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
        return exact;
 }
 
-static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
+static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node)
 {
        node->parent_bit_packed = (unsigned long)parent | bit;
        rcu_assign_pointer(*parent, node);
index 551ddaa..a46067c 100644 (file)
@@ -98,6 +98,7 @@ static int wg_stop(struct net_device *dev)
 {
        struct wg_device *wg = netdev_priv(dev);
        struct wg_peer *peer;
+       struct sk_buff *skb;
 
        mutex_lock(&wg->device_update_lock);
        list_for_each_entry(peer, &wg->peer_list, peer_list) {
@@ -108,7 +109,9 @@ static int wg_stop(struct net_device *dev)
                wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
        }
        mutex_unlock(&wg->device_update_lock);
-       skb_queue_purge(&wg->incoming_handshakes);
+       while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL)
+               kfree_skb(skb);
+       atomic_set(&wg->handshake_queue_len, 0);
        wg_socket_reinit(wg, NULL, NULL);
        return 0;
 }
@@ -235,14 +238,13 @@ static void wg_destruct(struct net_device *dev)
        destroy_workqueue(wg->handshake_receive_wq);
        destroy_workqueue(wg->handshake_send_wq);
        destroy_workqueue(wg->packet_crypt_wq);
-       wg_packet_queue_free(&wg->decrypt_queue);
-       wg_packet_queue_free(&wg->encrypt_queue);
+       wg_packet_queue_free(&wg->handshake_queue, true);
+       wg_packet_queue_free(&wg->decrypt_queue, false);
+       wg_packet_queue_free(&wg->encrypt_queue, false);
        rcu_barrier(); /* Wait for all the peers to be actually freed. */
        wg_ratelimiter_uninit();
        memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
-       skb_queue_purge(&wg->incoming_handshakes);
        free_percpu(dev->tstats);
-       free_percpu(wg->incoming_handshakes_worker);
        kvfree(wg->index_hashtable);
        kvfree(wg->peer_hashtable);
        mutex_unlock(&wg->device_update_lock);
@@ -298,7 +300,6 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
        init_rwsem(&wg->static_identity.lock);
        mutex_init(&wg->socket_update_lock);
        mutex_init(&wg->device_update_lock);
-       skb_queue_head_init(&wg->incoming_handshakes);
        wg_allowedips_init(&wg->peer_allowedips);
        wg_cookie_checker_init(&wg->cookie_checker, wg);
        INIT_LIST_HEAD(&wg->peer_list);
@@ -316,16 +317,10 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
        if (!dev->tstats)
                goto err_free_index_hashtable;
 
-       wg->incoming_handshakes_worker =
-               wg_packet_percpu_multicore_worker_alloc(
-                               wg_packet_handshake_receive_worker, wg);
-       if (!wg->incoming_handshakes_worker)
-               goto err_free_tstats;
-
        wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
                        WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
        if (!wg->handshake_receive_wq)
-               goto err_free_incoming_handshakes;
+               goto err_free_tstats;
 
        wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
                        WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
@@ -347,10 +342,15 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
        if (ret < 0)
                goto err_free_encrypt_queue;
 
-       ret = wg_ratelimiter_init();
+       ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker,
+                                  MAX_QUEUED_INCOMING_HANDSHAKES);
        if (ret < 0)
                goto err_free_decrypt_queue;
 
+       ret = wg_ratelimiter_init();
+       if (ret < 0)
+               goto err_free_handshake_queue;
+
        ret = register_netdevice(dev);
        if (ret < 0)
                goto err_uninit_ratelimiter;
@@ -367,18 +367,18 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
 
 err_uninit_ratelimiter:
        wg_ratelimiter_uninit();
+err_free_handshake_queue:
+       wg_packet_queue_free(&wg->handshake_queue, false);
 err_free_decrypt_queue:
-       wg_packet_queue_free(&wg->decrypt_queue);
+       wg_packet_queue_free(&wg->decrypt_queue, false);
 err_free_encrypt_queue:
-       wg_packet_queue_free(&wg->encrypt_queue);
+       wg_packet_queue_free(&wg->encrypt_queue, false);
 err_destroy_packet_crypt:
        destroy_workqueue(wg->packet_crypt_wq);
 err_destroy_handshake_send:
        destroy_workqueue(wg->handshake_send_wq);
 err_destroy_handshake_receive:
        destroy_workqueue(wg->handshake_receive_wq);
-err_free_incoming_handshakes:
-       free_percpu(wg->incoming_handshakes_worker);
 err_free_tstats:
        free_percpu(dev->tstats);
 err_free_index_hashtable:
@@ -398,6 +398,7 @@ static struct rtnl_link_ops link_ops __read_mostly = {
 static void wg_netns_pre_exit(struct net *net)
 {
        struct wg_device *wg;
+       struct wg_peer *peer;
 
        rtnl_lock();
        list_for_each_entry(wg, &device_list, device_list) {
@@ -407,6 +408,8 @@ static void wg_netns_pre_exit(struct net *net)
                        mutex_lock(&wg->device_update_lock);
                        rcu_assign_pointer(wg->creating_net, NULL);
                        wg_socket_reinit(wg, NULL, NULL);
+                       list_for_each_entry(peer, &wg->peer_list, peer_list)
+                               wg_socket_clear_peer_endpoint_src(peer);
                        mutex_unlock(&wg->device_update_lock);
                }
        }
index 854bc3d..43c7ceb 100644 (file)
@@ -39,21 +39,18 @@ struct prev_queue {
 
 struct wg_device {
        struct net_device *dev;
-       struct crypt_queue encrypt_queue, decrypt_queue;
+       struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue;
        struct sock __rcu *sock4, *sock6;
        struct net __rcu *creating_net;
        struct noise_static_identity static_identity;
-       struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
-       struct workqueue_struct *packet_crypt_wq;
-       struct sk_buff_head incoming_handshakes;
-       int incoming_handshake_cpu;
-       struct multicore_worker __percpu *incoming_handshakes_worker;
+       struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq;
        struct cookie_checker cookie_checker;
        struct pubkey_hashtable *peer_hashtable;
        struct index_hashtable *index_hashtable;
        struct allowedips peer_allowedips;
        struct mutex device_update_lock, socket_update_lock;
        struct list_head device_list, peer_list;
+       atomic_t handshake_queue_len;
        unsigned int num_peers, device_update_gen;
        u32 fwmark;
        u16 incoming_port;
index 75dbe77..ee4da9a 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/genetlink.h>
 #include <net/rtnetlink.h>
 
-static int __init mod_init(void)
+static int __init wg_mod_init(void)
 {
        int ret;
 
@@ -60,7 +60,7 @@ err_allowedips:
        return ret;
 }
 
-static void __exit mod_exit(void)
+static void __exit wg_mod_exit(void)
 {
        wg_genetlink_uninit();
        wg_device_uninit();
@@ -68,8 +68,8 @@ static void __exit mod_exit(void)
        wg_allowedips_slab_uninit();
 }
 
-module_init(mod_init);
-module_exit(mod_exit);
+module_init(wg_mod_init);
+module_exit(wg_mod_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("WireGuard secure network tunnel");
 MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
index 48e7b98..1de413b 100644 (file)
@@ -38,11 +38,11 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
        return 0;
 }
 
-void wg_packet_queue_free(struct crypt_queue *queue)
+void wg_packet_queue_free(struct crypt_queue *queue, bool purge)
 {
        free_percpu(queue->worker);
-       WARN_ON(!__ptr_ring_empty(&queue->ring));
-       ptr_ring_cleanup(&queue->ring, NULL);
+       WARN_ON(!purge && !__ptr_ring_empty(&queue->ring));
+       ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL);
 }
 
 #define NEXT(skb) ((skb)->prev)
index 4ef2944..e238810 100644 (file)
@@ -23,7 +23,7 @@ struct sk_buff;
 /* queueing.c APIs: */
 int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
                         unsigned int len);
-void wg_packet_queue_free(struct crypt_queue *queue);
+void wg_packet_queue_free(struct crypt_queue *queue, bool purge);
 struct multicore_worker __percpu *
 wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
 
index 3fedd1d..dd55e5c 100644 (file)
@@ -176,12 +176,12 @@ int wg_ratelimiter_init(void)
                        (1U << 14) / sizeof(struct hlist_head)));
        max_entries = table_size * 8;
 
-       table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
+       table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL);
        if (unlikely(!table_v4))
                goto err_kmemcache;
 
 #if IS_ENABLED(CONFIG_IPV6)
-       table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
+       table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL);
        if (unlikely(!table_v6)) {
                kvfree(table_v4);
                goto err_kmemcache;
index 7dc84bc..7b8df40 100644 (file)
@@ -116,8 +116,8 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
                return;
        }
 
-       under_load = skb_queue_len(&wg->incoming_handshakes) >=
-                    MAX_QUEUED_INCOMING_HANDSHAKES / 8;
+       under_load = atomic_read(&wg->handshake_queue_len) >=
+                       MAX_QUEUED_INCOMING_HANDSHAKES / 8;
        if (under_load) {
                last_under_load = ktime_get_coarse_boottime_ns();
        } else if (last_under_load) {
@@ -212,13 +212,14 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
 
 void wg_packet_handshake_receive_worker(struct work_struct *work)
 {
-       struct wg_device *wg = container_of(work, struct multicore_worker,
-                                           work)->ptr;
+       struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr;
+       struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue);
        struct sk_buff *skb;
 
-       while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
+       while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
                wg_receive_handshake_packet(wg, skb);
                dev_kfree_skb(skb);
+               atomic_dec(&wg->handshake_queue_len);
                cond_resched();
        }
 }
@@ -553,22 +554,28 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
        case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
        case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
        case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
-               int cpu;
-
-               if (skb_queue_len(&wg->incoming_handshakes) >
-                           MAX_QUEUED_INCOMING_HANDSHAKES ||
-                   unlikely(!rng_is_initialized())) {
+               int cpu, ret = -EBUSY;
+
+               if (unlikely(!rng_is_initialized()))
+                       goto drop;
+               if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) {
+                       if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) {
+                               ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb);
+                               spin_unlock_bh(&wg->handshake_queue.ring.producer_lock);
+                       }
+               } else
+                       ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb);
+               if (ret) {
+       drop:
                        net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
                                                wg->dev->name, skb);
                        goto err;
                }
-               skb_queue_tail(&wg->incoming_handshakes, skb);
-               /* Queues up a call to packet_process_queued_handshake_
-                * packets(skb):
-                */
-               cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
+               atomic_inc(&wg->handshake_queue_len);
+               cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu);
+               /* Queues up a call to packet_process_queued_handshake_packets(skb): */
                queue_work_on(cpu, wg->handshake_receive_wq,
-                       &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
+                             &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work);
                break;
        }
        case cpu_to_le32(MESSAGE_DATA):
index 8c496b7..6f07b94 100644 (file)
@@ -308,7 +308,7 @@ void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer)
 {
        write_lock_bh(&peer->endpoint_lock);
        memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
-       dst_cache_reset(&peer->endpoint_cache);
+       dst_cache_reset_now(&peer->endpoint_cache);
        write_unlock_bh(&peer->endpoint_lock);
 }
 
index c875bf3..009dd4b 100644 (file)
@@ -86,6 +86,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
                if (len < tlv_len) {
                        IWL_ERR(trans, "invalid TLV len: %zd/%u\n",
                                len, tlv_len);
+                       kfree(reduce_power_data);
                        reduce_power_data = ERR_PTR(-EINVAL);
                        goto out;
                }
@@ -105,6 +106,7 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
                                IWL_DEBUG_FW(trans,
                                             "Couldn't allocate (more) reduce_power_data\n");
 
+                               kfree(reduce_power_data);
                                reduce_power_data = ERR_PTR(-ENOMEM);
                                goto out;
                        }
@@ -134,6 +136,10 @@ static void *iwl_uefi_reduce_power_section(struct iwl_trans *trans,
 done:
        if (!size) {
                IWL_DEBUG_FW(trans, "Empty REDUCE_POWER, skipping.\n");
+               /* Better safe than sorry, but 'reduce_power_data' should
+                * always be NULL if !size.
+                */
+               kfree(reduce_power_data);
                reduce_power_data = ERR_PTR(-ENOENT);
                goto out;
        }
index 36196e0..5cec467 100644 (file)
@@ -1313,23 +1313,31 @@ _iwl_op_mode_start(struct iwl_drv *drv, struct iwlwifi_opmode_table *op)
        const struct iwl_op_mode_ops *ops = op->ops;
        struct dentry *dbgfs_dir = NULL;
        struct iwl_op_mode *op_mode = NULL;
+       int retry, max_retry = !!iwlwifi_mod_params.fw_restart * IWL_MAX_INIT_RETRY;
+
+       for (retry = 0; retry <= max_retry; retry++) {
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
-       drv->dbgfs_op_mode = debugfs_create_dir(op->name,
-                                               drv->dbgfs_drv);
-       dbgfs_dir = drv->dbgfs_op_mode;
+               drv->dbgfs_op_mode = debugfs_create_dir(op->name,
+                                                       drv->dbgfs_drv);
+               dbgfs_dir = drv->dbgfs_op_mode;
 #endif
 
-       op_mode = ops->start(drv->trans, drv->trans->cfg, &drv->fw, dbgfs_dir);
+               op_mode = ops->start(drv->trans, drv->trans->cfg,
+                                    &drv->fw, dbgfs_dir);
+
+               if (op_mode)
+                       return op_mode;
+
+               IWL_ERR(drv, "retry init count %d\n", retry);
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
-       if (!op_mode) {
                debugfs_remove_recursive(drv->dbgfs_op_mode);
                drv->dbgfs_op_mode = NULL;
-       }
 #endif
+       }
 
-       return op_mode;
+       return NULL;
 }
 
 static void _iwl_op_mode_stop(struct iwl_drv *drv)
index 2e2d60a..0fd009e 100644 (file)
@@ -89,4 +89,7 @@ void iwl_drv_stop(struct iwl_drv *drv);
 #define IWL_EXPORT_SYMBOL(sym)
 #endif
 
+/* max retry for init flow */
+#define IWL_MAX_INIT_RETRY 2
+
 #endif /* __iwl_drv_h__ */
index 9fb9c7d..897e3b9 100644 (file)
@@ -16,6 +16,7 @@
 #include <net/ieee80211_radiotap.h>
 #include <net/tcp.h>
 
+#include "iwl-drv.h"
 #include "iwl-op-mode.h"
 #include "iwl-io.h"
 #include "mvm.h"
@@ -1117,9 +1118,30 @@ static int iwl_mvm_mac_start(struct ieee80211_hw *hw)
 {
        struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
        int ret;
+       int retry, max_retry = 0;
 
        mutex_lock(&mvm->mutex);
-       ret = __iwl_mvm_mac_start(mvm);
+
+       /* we are starting the mac not in error flow, and restart is enabled */
+       if (!test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status) &&
+           iwlwifi_mod_params.fw_restart) {
+               max_retry = IWL_MAX_INIT_RETRY;
+               /*
+                * This will prevent mac80211 recovery flows to trigger during
+                * init failures
+                */
+               set_bit(IWL_MVM_STATUS_STARTING, &mvm->status);
+       }
+
+       for (retry = 0; retry <= max_retry; retry++) {
+               ret = __iwl_mvm_mac_start(mvm);
+               if (!ret)
+                       break;
+
+               IWL_ERR(mvm, "mac start retry %d\n", retry);
+       }
+       clear_bit(IWL_MVM_STATUS_STARTING, &mvm->status);
+
        mutex_unlock(&mvm->mutex);
 
        return ret;
index 2b1dcd6..a72d850 100644 (file)
@@ -1123,6 +1123,8 @@ struct iwl_mvm {
  * @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running
  * @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA
  * @IWL_MVM_STATUS_IN_D3: in D3 (or at least about to go into it)
+ * @IWL_MVM_STATUS_STARTING: starting mac,
+ *     used to disable restart flow while in STARTING state
  */
 enum iwl_mvm_status {
        IWL_MVM_STATUS_HW_RFKILL,
@@ -1134,6 +1136,7 @@ enum iwl_mvm_status {
        IWL_MVM_STATUS_FIRMWARE_RUNNING,
        IWL_MVM_STATUS_NEED_FLUSH_P2P,
        IWL_MVM_STATUS_IN_D3,
+       IWL_MVM_STATUS_STARTING,
 };
 
 /* Keep track of completed init configuration */
index 232ad53..cd08e28 100644 (file)
@@ -686,6 +686,7 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm)
        int ret;
 
        rtnl_lock();
+       wiphy_lock(mvm->hw->wiphy);
        mutex_lock(&mvm->mutex);
 
        ret = iwl_run_init_mvm_ucode(mvm);
@@ -701,6 +702,7 @@ static int iwl_mvm_start_get_nvm(struct iwl_mvm *mvm)
                iwl_mvm_stop_device(mvm);
 
        mutex_unlock(&mvm->mutex);
+       wiphy_unlock(mvm->hw->wiphy);
        rtnl_unlock();
 
        if (ret < 0)
@@ -1600,6 +1602,9 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
         */
        if (!mvm->fw_restart && fw_error) {
                iwl_fw_error_collect(&mvm->fwrt, false);
+       } else if (test_bit(IWL_MVM_STATUS_STARTING,
+                           &mvm->status)) {
+               IWL_ERR(mvm, "Starting mac, retry will be triggered anyway\n");
        } else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
                struct iwl_mvm_reprobe *reprobe;
 
index c574f04..5ce07f2 100644 (file)
@@ -1339,9 +1339,13 @@ iwl_pci_find_dev_info(u16 device, u16 subsystem_device,
                      u16 mac_type, u8 mac_step,
                      u16 rf_type, u8 cdb, u8 rf_id, u8 no_160, u8 cores)
 {
+       int num_devices = ARRAY_SIZE(iwl_dev_info_table);
        int i;
 
-       for (i = ARRAY_SIZE(iwl_dev_info_table) - 1; i >= 0; i--) {
+       if (!num_devices)
+               return NULL;
+
+       for (i = num_devices - 1; i >= 0; i--) {
                const struct iwl_dev_info *dev_info = &iwl_dev_info_table[i];
 
                if (dev_info->device != (u16)IWL_CFG_ANY &&
@@ -1442,8 +1446,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
         */
        if (iwl_trans->trans_cfg->rf_id &&
            iwl_trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_9000 &&
-           !CSR_HW_RFID_TYPE(iwl_trans->hw_rf_id) && get_crf_id(iwl_trans))
+           !CSR_HW_RFID_TYPE(iwl_trans->hw_rf_id) && get_crf_id(iwl_trans)) {
+               ret = -EINVAL;
                goto out_free_trans;
+       }
 
        dev_info = iwl_pci_find_dev_info(pdev->device, pdev->subsystem_device,
                                         CSR_HW_REV_TYPE(iwl_trans->hw_rev),
index 5ee52cd..d1806f1 100644 (file)
@@ -143,8 +143,6 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
        if (!wcid)
                wcid = &dev->mt76.global_wcid;
 
-       pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb);
-
        if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && msta) {
                struct mt7615_phy *phy = &dev->phy;
 
@@ -164,6 +162,7 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
        if (id < 0)
                return id;
 
+       pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb);
        mt7615_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, sta,
                              pid, key, false);
 
index bd2939e..5a6d782 100644 (file)
@@ -43,19 +43,11 @@ EXPORT_SYMBOL_GPL(mt7663_usb_sdio_reg_map);
 static void
 mt7663_usb_sdio_write_txwi(struct mt7615_dev *dev, struct mt76_wcid *wcid,
                           enum mt76_txq_id qid, struct ieee80211_sta *sta,
+                          struct ieee80211_key_conf *key, int pid,
                           struct sk_buff *skb)
 {
-       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-       struct ieee80211_key_conf *key = info->control.hw_key;
-       __le32 *txwi;
-       int pid;
-
-       if (!wcid)
-               wcid = &dev->mt76.global_wcid;
-
-       pid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb);
+       __le32 *txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE);
 
-       txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE);
        memset(txwi, 0, MT_USB_TXD_SIZE);
        mt7615_mac_write_txwi(dev, txwi, skb, wcid, sta, pid, key, false);
        skb_push(skb, MT_USB_TXD_SIZE);
@@ -194,10 +186,14 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
        struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
        struct sk_buff *skb = tx_info->skb;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+       struct ieee80211_key_conf *key = info->control.hw_key;
        struct mt7615_sta *msta;
-       int pad;
+       int pad, err, pktid;
 
        msta = wcid ? container_of(wcid, struct mt7615_sta, wcid) : NULL;
+       if (!wcid)
+               wcid = &dev->mt76.global_wcid;
+
        if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) &&
            msta && !msta->rate_probe) {
                /* request to configure sampling rate */
@@ -207,7 +203,8 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
                spin_unlock_bh(&dev->mt76.lock);
        }
 
-       mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, skb);
+       pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb);
+       mt7663_usb_sdio_write_txwi(dev, wcid, qid, sta, key, pktid, skb);
        if (mt76_is_usb(mdev)) {
                u32 len = skb->len;
 
@@ -217,7 +214,12 @@ int mt7663_usb_sdio_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
                pad = round_up(skb->len, 4) - skb->len;
        }
 
-       return mt76_skb_adjust_pad(skb, pad);
+       err = mt76_skb_adjust_pad(skb, pad);
+       if (err)
+               /* Release pktid in case of error. */
+               idr_remove(&wcid->pktid, pktid);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(mt7663_usb_sdio_tx_prepare_skb);
 
index efd70dd..2c6c038 100644 (file)
@@ -72,6 +72,7 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data,
        bool ampdu = IEEE80211_SKB_CB(tx_info->skb)->flags & IEEE80211_TX_CTL_AMPDU;
        enum mt76_qsel qsel;
        u32 flags;
+       int err;
 
        mt76_insert_hdr_pad(tx_info->skb);
 
@@ -106,7 +107,12 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data,
                ewma_pktlen_add(&msta->pktlen, tx_info->skb->len);
        }
 
-       return mt76x02u_skb_dma_info(tx_info->skb, WLAN_PORT, flags);
+       err = mt76x02u_skb_dma_info(tx_info->skb, WLAN_PORT, flags);
+       if (err && wcid)
+               /* Release pktid in case of error. */
+               idr_remove(&wcid->pktid, pid);
+
+       return err;
 }
 EXPORT_SYMBOL_GPL(mt76x02u_tx_prepare_skb);
 
index 5fcf35f..809dc18 100644 (file)
@@ -1151,8 +1151,14 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
                }
        }
 
-       pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb);
+       t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
+       t->skb = tx_info->skb;
+
+       id = mt76_token_consume(mdev, &t);
+       if (id < 0)
+               return id;
 
+       pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb);
        mt7915_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, pid, key,
                              false);
 
@@ -1178,13 +1184,6 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
                txp->bss_idx = mvif->idx;
        }
 
-       t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
-       t->skb = tx_info->skb;
-
-       id = mt76_token_consume(mdev, &t);
-       if (id < 0)
-               return id;
-
        txp->token = cpu_to_le16(id);
        if (test_bit(MT_WCID_FLAG_4ADDR, &wcid->flags))
                txp->rept_wds_wcid = cpu_to_le16(wcid->idx);
index 899957b..852d5d9 100644 (file)
@@ -176,7 +176,7 @@ mt7915_get_phy_mode(struct ieee80211_vif *vif, struct ieee80211_sta *sta)
                if (ht_cap->ht_supported)
                        mode |= PHY_MODE_GN;
 
-               if (he_cap->has_he)
+               if (he_cap && he_cap->has_he)
                        mode |= PHY_MODE_AX_24G;
        } else if (band == NL80211_BAND_5GHZ) {
                mode |= PHY_MODE_A;
@@ -187,7 +187,7 @@ mt7915_get_phy_mode(struct ieee80211_vif *vif, struct ieee80211_sta *sta)
                if (vht_cap->vht_supported)
                        mode |= PHY_MODE_AC;
 
-               if (he_cap->has_he)
+               if (he_cap && he_cap->has_he)
                        mode |= PHY_MODE_AX_5G;
        }
 
index 137f86a..bdec508 100644 (file)
@@ -142,15 +142,11 @@ out:
 static void
 mt7921s_write_txwi(struct mt7921_dev *dev, struct mt76_wcid *wcid,
                   enum mt76_txq_id qid, struct ieee80211_sta *sta,
+                  struct ieee80211_key_conf *key, int pid,
                   struct sk_buff *skb)
 {
-       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-       struct ieee80211_key_conf *key = info->control.hw_key;
-       __le32 *txwi;
-       int pid;
+       __le32 *txwi = (__le32 *)(skb->data - MT_SDIO_TXD_SIZE);
 
-       pid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb);
-       txwi = (__le32 *)(skb->data - MT_SDIO_TXD_SIZE);
        memset(txwi, 0, MT_SDIO_TXD_SIZE);
        mt7921_mac_write_txwi(dev, txwi, skb, wcid, key, pid, false);
        skb_push(skb, MT_SDIO_TXD_SIZE);
@@ -163,8 +159,9 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 {
        struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
+       struct ieee80211_key_conf *key = info->control.hw_key;
        struct sk_buff *skb = tx_info->skb;
-       int pad;
+       int err, pad, pktid;
 
        if (unlikely(tx_info->skb->len <= ETH_HLEN))
                return -EINVAL;
@@ -181,12 +178,18 @@ int mt7921s_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
                }
        }
 
-       mt7921s_write_txwi(dev, wcid, qid, sta, skb);
+       pktid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb);
+       mt7921s_write_txwi(dev, wcid, qid, sta, key, pktid, skb);
 
        mt7921_skb_add_sdio_hdr(skb, MT7921_SDIO_DATA);
        pad = round_up(skb->len, 4) - skb->len;
 
-       return mt76_skb_adjust_pad(skb, pad);
+       err = mt76_skb_adjust_pad(skb, pad);
+       if (err)
+               /* Release pktid in case of error. */
+               idr_remove(&wcid->pktid, pktid);
+
+       return err;
 }
 
 void mt7921s_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e)
index 11719ef..6b8c9dc 100644 (file)
@@ -173,7 +173,7 @@ mt76_tx_status_skb_get(struct mt76_dev *dev, struct mt76_wcid *wcid, int pktid,
                        if (!(cb->flags & MT_TX_CB_DMA_DONE))
                                continue;
 
-                       if (!time_is_after_jiffies(cb->jiffies +
+                       if (time_is_after_jiffies(cb->jiffies +
                                                   MT_TX_STATUS_SKB_TIMEOUT))
                                continue;
                }
index e4473a5..74c3d8c 100644 (file)
@@ -25,6 +25,9 @@ static bool rt2x00usb_check_usb_error(struct rt2x00_dev *rt2x00dev, int status)
        if (status == -ENODEV || status == -ENOENT)
                return true;
 
+       if (!test_bit(DEVICE_STATE_STARTED, &rt2x00dev->flags))
+               return false;
+
        if (status == -EPROTO || status == -ETIMEDOUT)
                rt2x00dev->num_proto_errs++;
        else
index 212aaf5..65ef3dc 100644 (file)
@@ -91,7 +91,6 @@ static int rtw89_fw_hdr_parser(struct rtw89_dev *rtwdev, const u8 *fw, u32 len,
        info->section_num = GET_FW_HDR_SEC_NUM(fw);
        info->hdr_len = RTW89_FW_HDR_SIZE +
                        info->section_num * RTW89_FW_SECTION_HDR_SIZE;
-       SET_FW_HDR_PART_SIZE(fw, FWDL_SECTION_PER_PKT_LEN);
 
        bin = fw + info->hdr_len;
 
@@ -275,6 +274,7 @@ static int __rtw89_fw_download_hdr(struct rtw89_dev *rtwdev, const u8 *fw, u32 l
        }
 
        skb_put_data(skb, fw, len);
+       SET_FW_HDR_PART_SIZE(skb->data, FWDL_SECTION_PER_PKT_LEN);
        rtw89_h2c_pkt_set_hdr_fwdl(rtwdev, skb, FWCMD_TYPE_H2C,
                                   H2C_CAT_MAC, H2C_CL_MAC_FWDL,
                                   H2C_FUNC_MAC_FWHDR_DL, len);
index 7ee0d93..36e8d0d 100644 (file)
@@ -282,8 +282,10 @@ struct rtw89_h2creg_sch_tx_en {
        le32_get_bits(*((__le32 *)(fwhdr) + 6), GENMASK(15, 8))
 #define GET_FW_HDR_CMD_VERSERION(fwhdr)        \
        le32_get_bits(*((__le32 *)(fwhdr) + 7), GENMASK(31, 24))
-#define SET_FW_HDR_PART_SIZE(fwhdr, val)       \
-       le32p_replace_bits((__le32 *)(fwhdr) + 7, val, GENMASK(15, 0))
+static inline void SET_FW_HDR_PART_SIZE(void *fwhdr, u32 val)
+{
+       le32p_replace_bits((__le32 *)fwhdr + 7, val, GENMASK(15, 0));
+}
 
 #define SET_CTRL_INFO_MACID(table, val) \
        le32p_replace_bits((__le32 *)(table) + 0, val, GENMASK(6, 0))
index 221fa3b..f577449 100644 (file)
@@ -202,7 +202,7 @@ static int __init virtual_ncidev_init(void)
        miscdev.minor = MISC_DYNAMIC_MINOR;
        miscdev.name = "virtual_nci";
        miscdev.fops = &virtual_ncidev_fops;
-       miscdev.mode = S_IALLUGO;
+       miscdev.mode = 0600;
 
        return misc_register(&miscdev);
 }
index 4b5de8f..4c63564 100644 (file)
@@ -895,10 +895,19 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
                cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
        cmnd->write_zeroes.length =
                cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-       if (nvme_ns_has_pi(ns))
+
+       if (nvme_ns_has_pi(ns)) {
                cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
-       else
-               cmnd->write_zeroes.control = 0;
+
+               switch (ns->pi_type) {
+               case NVME_NS_DPS_PI_TYPE1:
+               case NVME_NS_DPS_PI_TYPE2:
+                       cmnd->write_zeroes.reftag =
+                               cpu_to_le32(t10_pi_ref_tag(req));
+                       break;
+               }
+       }
+
        return BLK_STS_OK;
 }
 
@@ -2469,6 +2478,20 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
                .vid = 0x14a4,
                .fr = "22301111",
                .quirks = NVME_QUIRK_SIMPLE_SUSPEND,
+       },
+       {
+               /*
+                * This Kioxia CD6-V Series / HPE PE8030 device times out and
+                * aborts I/O during any load, but more easily reproducible
+                * with discards (fstrim).
+                *
+                * The device is left in a state where it is also not possible
+                * to use "nvme set-feature" to disable APST, but booting with
+                * nvme_core.default_ps_max_latency=0 works.
+                */
+               .vid = 0x1e0f,
+               .mn = "KCD6XVUL6T40",
+               .quirks = NVME_QUIRK_NO_APST,
        }
 };
 
index c5a2b71..282d541 100644 (file)
@@ -698,6 +698,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                        if (token >= 0)
                                pr_warn("I/O fail on reconnect controller after %d sec\n",
                                        token);
+                       else
+                               token = -1;
+
                        opts->fast_io_fail_tmo = token;
                        break;
                case NVMF_OPT_HOSTNQN:
index 33bc83d..4ceb286 100644 (file)
@@ -572,7 +572,7 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
        return ret;
 }
 
-static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
+static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
                struct nvme_tcp_r2t_pdu *pdu)
 {
        struct nvme_tcp_data_pdu *data = req->pdu;
@@ -581,32 +581,11 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
        u8 hdgst = nvme_tcp_hdgst_len(queue);
        u8 ddgst = nvme_tcp_ddgst_len(queue);
 
+       req->state = NVME_TCP_SEND_H2C_PDU;
+       req->offset = 0;
        req->pdu_len = le32_to_cpu(pdu->r2t_length);
        req->pdu_sent = 0;
 
-       if (unlikely(!req->pdu_len)) {
-               dev_err(queue->ctrl->ctrl.device,
-                       "req %d r2t len is %u, probably a bug...\n",
-                       rq->tag, req->pdu_len);
-               return -EPROTO;
-       }
-
-       if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
-               dev_err(queue->ctrl->ctrl.device,
-                       "req %d r2t len %u exceeded data len %u (%zu sent)\n",
-                       rq->tag, req->pdu_len, req->data_len,
-                       req->data_sent);
-               return -EPROTO;
-       }
-
-       if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
-               dev_err(queue->ctrl->ctrl.device,
-                       "req %d unexpected r2t offset %u (expected %zu)\n",
-                       rq->tag, le32_to_cpu(pdu->r2t_offset),
-                       req->data_sent);
-               return -EPROTO;
-       }
-
        memset(data, 0, sizeof(*data));
        data->hdr.type = nvme_tcp_h2c_data;
        data->hdr.flags = NVME_TCP_F_DATA_LAST;
@@ -622,7 +601,6 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
        data->command_id = nvme_cid(rq);
        data->data_offset = pdu->r2t_offset;
        data->data_length = cpu_to_le32(req->pdu_len);
-       return 0;
 }
 
 static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
@@ -630,7 +608,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
 {
        struct nvme_tcp_request *req;
        struct request *rq;
-       int ret;
+       u32 r2t_length = le32_to_cpu(pdu->r2t_length);
 
        rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
        if (!rq) {
@@ -641,13 +619,28 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
        }
        req = blk_mq_rq_to_pdu(rq);
 
-       ret = nvme_tcp_setup_h2c_data_pdu(req, pdu);
-       if (unlikely(ret))
-               return ret;
+       if (unlikely(!r2t_length)) {
+               dev_err(queue->ctrl->ctrl.device,
+                       "req %d r2t len is %u, probably a bug...\n",
+                       rq->tag, r2t_length);
+               return -EPROTO;
+       }
 
-       req->state = NVME_TCP_SEND_H2C_PDU;
-       req->offset = 0;
+       if (unlikely(req->data_sent + r2t_length > req->data_len)) {
+               dev_err(queue->ctrl->ctrl.device,
+                       "req %d r2t len %u exceeded data len %u (%zu sent)\n",
+                       rq->tag, r2t_length, req->data_len, req->data_sent);
+               return -EPROTO;
+       }
 
+       if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
+               dev_err(queue->ctrl->ctrl.device,
+                       "req %d unexpected r2t offset %u (expected %zu)\n",
+                       rq->tag, le32_to_cpu(pdu->r2t_offset), req->data_sent);
+               return -EPROTO;
+       }
+
+       nvme_tcp_setup_h2c_data_pdu(req, pdu);
        nvme_tcp_queue_request(req, false, true);
 
        return 0;
@@ -1232,6 +1225,7 @@ static int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
 
 static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
 {
+       struct page *page;
        struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
        struct nvme_tcp_queue *queue = &ctrl->queues[qid];
 
@@ -1241,6 +1235,11 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
        if (queue->hdr_digest || queue->data_digest)
                nvme_tcp_free_crypto(queue);
 
+       if (queue->pf_cache.va) {
+               page = virt_to_head_page(queue->pf_cache.va);
+               __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
+               queue->pf_cache.va = NULL;
+       }
        sock_release(queue->sock);
        kfree(queue->pdu);
        mutex_destroy(&queue->send_mutex);
index 6aa30f3..6be6e59 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/uio.h>
 #include <linux/falloc.h>
 #include <linux/file.h>
+#include <linux/fs.h>
 #include "nvmet.h"
 
 #define NVMET_MAX_MPOOL_BVEC           16
@@ -266,7 +267,8 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
 
        if (req->ns->buffered_io) {
                if (likely(!req->f.mpool_alloc) &&
-                               nvmet_file_execute_io(req, IOCB_NOWAIT))
+                   (req->ns->file->f_mode & FMODE_NOWAIT) &&
+                   nvmet_file_execute_io(req, IOCB_NOWAIT))
                        return;
                nvmet_file_submit_buffered_io(req);
        } else
index 84c387e..cb6a473 100644 (file)
@@ -166,6 +166,8 @@ static struct workqueue_struct *nvmet_tcp_wq;
 static const struct nvmet_fabrics_ops nvmet_tcp_ops;
 static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
 static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
+static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd);
+static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd);
 
 static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue,
                struct nvmet_tcp_cmd *cmd)
@@ -297,6 +299,16 @@ static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu)
        return 0;
 }
 
+static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
+{
+       WARN_ON(unlikely(cmd->nr_mapped > 0));
+
+       kfree(cmd->iov);
+       sgl_free(cmd->req.sg);
+       cmd->iov = NULL;
+       cmd->req.sg = NULL;
+}
+
 static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
 {
        struct scatterlist *sg;
@@ -306,6 +318,8 @@ static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd)
 
        for (i = 0; i < cmd->nr_mapped; i++)
                kunmap(sg_page(&sg[i]));
+
+       cmd->nr_mapped = 0;
 }
 
 static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd)
@@ -387,7 +401,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
 
        return 0;
 err:
-       sgl_free(cmd->req.sg);
+       nvmet_tcp_free_cmd_buffers(cmd);
        return NVME_SC_INTERNAL;
 }
 
@@ -632,10 +646,8 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
                }
        }
 
-       if (queue->nvme_sq.sqhd_disabled) {
-               kfree(cmd->iov);
-               sgl_free(cmd->req.sg);
-       }
+       if (queue->nvme_sq.sqhd_disabled)
+               nvmet_tcp_free_cmd_buffers(cmd);
 
        return 1;
 
@@ -664,8 +676,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
        if (left)
                return -EAGAIN;
 
-       kfree(cmd->iov);
-       sgl_free(cmd->req.sg);
+       nvmet_tcp_free_cmd_buffers(cmd);
        cmd->queue->snd_cmd = NULL;
        nvmet_tcp_put_cmd(cmd);
        return 1;
@@ -700,10 +711,11 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
 static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
 {
        struct nvmet_tcp_queue *queue = cmd->queue;
+       int left = NVME_TCP_DIGEST_LENGTH - cmd->offset;
        struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
        struct kvec iov = {
                .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset,
-               .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset
+               .iov_len = left
        };
        int ret;
 
@@ -717,6 +729,10 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
                return ret;
 
        cmd->offset += ret;
+       left -= ret;
+
+       if (left)
+               return -EAGAIN;
 
        if (queue->nvme_sq.sqhd_disabled) {
                cmd->queue->snd_cmd = NULL;
@@ -1406,8 +1422,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)
 {
        nvmet_req_uninit(&cmd->req);
        nvmet_tcp_unmap_pdu_iovec(cmd);
-       kfree(cmd->iov);
-       sgl_free(cmd->req.sg);
+       nvmet_tcp_free_cmd_buffers(cmd);
 }
 
 static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
@@ -1417,7 +1432,10 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue)
 
        for (i = 0; i < queue->nr_cmds; i++, cmd++) {
                if (nvmet_tcp_need_data_in(cmd))
-                       nvmet_tcp_finish_cmd(cmd);
+                       nvmet_req_uninit(&cmd->req);
+
+               nvmet_tcp_unmap_pdu_iovec(cmd);
+               nvmet_tcp_free_cmd_buffers(cmd);
        }
 
        if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) {
@@ -1437,7 +1455,9 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
        mutex_unlock(&nvmet_tcp_queue_mutex);
 
        nvmet_tcp_restore_socket_callbacks(queue);
-       flush_work(&queue->io_work);
+       cancel_work_sync(&queue->io_work);
+       /* stop accepting incoming data */
+       queue->rcv_state = NVMET_TCP_RECV_ERR;
 
        nvmet_tcp_uninit_data_in_cmds(queue);
        nvmet_sq_destroy(&queue->nvme_sq);
index 9d1e7e0..4020b83 100644 (file)
@@ -41,9 +41,12 @@ enum cros_ec_ish_channel {
 #define ISHTP_SEND_TIMEOUT                     (3 * HZ)
 
 /* ISH Transport CrOS EC ISH client unique GUID */
-static const guid_t cros_ish_guid =
-       GUID_INIT(0x7b7154d0, 0x56f4, 0x4bdc,
-                 0xb0, 0xd8, 0x9e, 0x7c, 0xda, 0xe0, 0xd6, 0xa0);
+static const struct ishtp_device_id cros_ec_ishtp_id_table[] = {
+       { .guid = GUID_INIT(0x7b7154d0, 0x56f4, 0x4bdc,
+                 0xb0, 0xd8, 0x9e, 0x7c, 0xda, 0xe0, 0xd6, 0xa0), },
+       { }
+};
+MODULE_DEVICE_TABLE(ishtp, cros_ec_ishtp_id_table);
 
 struct header {
        u8 channel;
@@ -389,7 +392,7 @@ static int cros_ish_init(struct ishtp_cl *cros_ish_cl)
        ishtp_set_tx_ring_size(cros_ish_cl, CROS_ISH_CL_TX_RING_SIZE);
        ishtp_set_rx_ring_size(cros_ish_cl, CROS_ISH_CL_RX_RING_SIZE);
 
-       fw_client = ishtp_fw_cl_get_client(dev, &cros_ish_guid);
+       fw_client = ishtp_fw_cl_get_client(dev, &cros_ec_ishtp_id_table[0].guid);
        if (!fw_client) {
                dev_err(cl_data_to_dev(client_data),
                        "ish client uuid not found\n");
@@ -765,7 +768,7 @@ static SIMPLE_DEV_PM_OPS(cros_ec_ishtp_pm_ops, cros_ec_ishtp_suspend,
 
 static struct ishtp_cl_driver  cros_ec_ishtp_driver = {
        .name = "cros_ec_ishtp",
-       .guid = &cros_ish_guid,
+       .id = cros_ec_ishtp_id_table,
        .probe = cros_ec_ishtp_probe,
        .remove = cros_ec_ishtp_remove,
        .reset = cros_ec_ishtp_reset,
@@ -791,4 +794,3 @@ MODULE_DESCRIPTION("ChromeOS EC ISHTP Client Driver");
 MODULE_AUTHOR("Rushikesh S Kadam <rushikesh.s.kadam@intel.com>");
 
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("ishtp:*");
index 12fc98a..93ac8b2 100644 (file)
@@ -93,9 +93,12 @@ struct ishtp_opregion_dev {
 };
 
 /* eclite ishtp client UUID: 6a19cc4b-d760-4de3-b14d-f25ebd0fbcd9 */
-static const guid_t ecl_ishtp_guid =
-       GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3,
-                 0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9);
+static const struct ishtp_device_id ecl_ishtp_id_table[] = {
+       { .guid = GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3,
+                 0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9), },
+       { }
+};
+MODULE_DEVICE_TABLE(ishtp, ecl_ishtp_id_table);
 
 /* ACPI DSM UUID: 91d936a7-1f01-49c6-a6b4-72f00ad8d8a5 */
 static const guid_t ecl_acpi_guid =
@@ -462,7 +465,7 @@ static int ecl_ishtp_cl_init(struct ishtp_cl *ecl_ishtp_cl)
        ishtp_set_tx_ring_size(ecl_ishtp_cl, ECL_CL_TX_RING_SIZE);
        ishtp_set_rx_ring_size(ecl_ishtp_cl, ECL_CL_RX_RING_SIZE);
 
-       fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_guid);
+       fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_id_table[0].guid);
        if (!fw_client) {
                dev_err(cl_data_to_dev(opr_dev), "fw client not found\n");
                return -ENOENT;
@@ -674,7 +677,7 @@ static const struct dev_pm_ops ecl_ishtp_pm_ops = {
 
 static struct ishtp_cl_driver ecl_ishtp_cl_driver = {
        .name = "ishtp-eclite",
-       .guid = &ecl_ishtp_guid,
+       .id = ecl_ishtp_id_table,
        .probe = ecl_ishtp_cl_probe,
        .remove = ecl_ishtp_cl_remove,
        .reset = ecl_ishtp_cl_reset,
@@ -698,4 +701,3 @@ MODULE_DESCRIPTION("ISH ISHTP eclite client opregion driver");
 MODULE_AUTHOR("K Naduvalath, Sumesh <sumesh.k.naduvalath@intel.com>");
 
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("ishtp:*");
index b9fac78..2a5c182 100644 (file)
@@ -463,17 +463,12 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 
 static int __init init_dtpm(void)
 {
-       struct dtpm_descr *dtpm_descr;
-
        pct = powercap_register_control_type(NULL, "dtpm", NULL);
        if (IS_ERR(pct)) {
                pr_err("Failed to register control type\n");
                return PTR_ERR(pct);
        }
 
-       for_each_dtpm_table(dtpm_descr)
-               dtpm_descr->init();
-
        return 0;
 }
 late_initcall(init_dtpm);
index b940e02..e83453b 100644 (file)
@@ -5095,14 +5095,9 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
                /* NPort Recovery mode or node is just allocated */
                if (!lpfc_nlp_not_used(ndlp)) {
                        /* A LOGO is completing and the node is in NPR state.
-                        * If this a fabric node that cleared its transport
-                        * registration, release the rpi.
+                        * Just unregister the RPI because the node is still
+                        * required.
                         */
-                       spin_lock_irq(&ndlp->lock);
-                       ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
-                       if (phba->sli_rev == LPFC_SLI_REV4)
-                               ndlp->nlp_flag |= NLP_RELEASE_RPI;
-                       spin_unlock_irq(&ndlp->lock);
                        lpfc_unreg_rpi(vport, ndlp);
                } else {
                        /* Indicate the node has already released, should
index 27eb652..81dab9b 100644 (file)
@@ -639,8 +639,8 @@ static void _base_sync_drv_fw_timestamp(struct MPT3SAS_ADAPTER *ioc)
        mpi_request->IOCParameter = MPI26_SET_IOC_PARAMETER_SYNC_TIMESTAMP;
        current_time = ktime_get_real();
        TimeStamp = ktime_to_ms(current_time);
-       mpi_request->Reserved7 = cpu_to_le32(TimeStamp & 0xFFFFFFFF);
-       mpi_request->IOCParameterValue = cpu_to_le32(TimeStamp >> 32);
+       mpi_request->Reserved7 = cpu_to_le32(TimeStamp >> 32);
+       mpi_request->IOCParameterValue = cpu_to_le32(TimeStamp & 0xFFFFFFFF);
        init_completion(&ioc->scsih_cmds.done);
        ioc->put_smid_default(ioc, smid);
        dinitprintk(ioc, ioc_info(ioc,
index db6a759..a0af986 100644 (file)
 
 #define MPT_MAX_CALLBACKS              32
 
+#define MPT_MAX_HBA_NUM_PHYS           32
+
 #define INTERNAL_CMDS_COUNT            10      /* reserved cmds */
 /* reserved for issuing internally framed scsi io cmds */
 #define INTERNAL_SCSIIO_CMDS_COUNT     3
@@ -798,6 +800,7 @@ struct _sas_phy {
  * @enclosure_handle: handle for this a member of an enclosure
  * @device_info: bitwise defining capabilities of this sas_host/expander
  * @responding: used in _scsih_expander_device_mark_responding
+ * @nr_phys_allocated: Allocated memory for this many count phys
  * @phy: a list of phys that make up this sas_host/expander
  * @sas_port_list: list of ports attached to this sas_host/expander
  * @port: hba port entry containing node's port number info
@@ -813,6 +816,7 @@ struct _sas_node {
        u16     enclosure_handle;
        u64     enclosure_logical_id;
        u8      responding;
+       u8      nr_phys_allocated;
        struct hba_port *port;
        struct  _sas_phy *phy;
        struct list_head sas_port_list;
index cee7170..0079276 100644 (file)
@@ -3869,7 +3869,7 @@ _scsih_ublock_io_device(struct MPT3SAS_ADAPTER *ioc,
 
        shost_for_each_device(sdev, ioc->shost) {
                sas_device_priv_data = sdev->hostdata;
-               if (!sas_device_priv_data)
+               if (!sas_device_priv_data || !sas_device_priv_data->sas_target)
                        continue;
                if (sas_device_priv_data->sas_target->sas_address
                    != sas_address)
@@ -6406,11 +6406,26 @@ _scsih_sas_port_refresh(struct MPT3SAS_ADAPTER *ioc)
        int i, j, count = 0, lcount = 0;
        int ret;
        u64 sas_addr;
+       u8 num_phys;
 
        drsprintk(ioc, ioc_info(ioc,
            "updating ports for sas_host(0x%016llx)\n",
            (unsigned long long)ioc->sas_hba.sas_address));
 
+       mpt3sas_config_get_number_hba_phys(ioc, &num_phys);
+       if (!num_phys) {
+               ioc_err(ioc, "failure at %s:%d/%s()!\n",
+                   __FILE__, __LINE__, __func__);
+               return;
+       }
+
+       if (num_phys > ioc->sas_hba.nr_phys_allocated) {
+               ioc_err(ioc, "failure at %s:%d/%s()!\n",
+                  __FILE__, __LINE__, __func__);
+               return;
+       }
+       ioc->sas_hba.num_phys = num_phys;
+
        port_table = kcalloc(ioc->sas_hba.num_phys,
            sizeof(struct hba_port), GFP_KERNEL);
        if (!port_table)
@@ -6611,6 +6626,30 @@ _scsih_sas_host_refresh(struct MPT3SAS_ADAPTER *ioc)
                        ioc->sas_hba.phy[i].hba_vphy = 1;
                }
 
+               /*
+                * Add new HBA phys to STL if these new phys got added as part
+                * of HBA Firmware upgrade/downgrade operation.
+                */
+               if (!ioc->sas_hba.phy[i].phy) {
+                       if ((mpt3sas_config_get_phy_pg0(ioc, &mpi_reply,
+                                                       &phy_pg0, i))) {
+                               ioc_err(ioc, "failure at %s:%d/%s()!\n",
+                                       __FILE__, __LINE__, __func__);
+                               continue;
+                       }
+                       ioc_status = le16_to_cpu(mpi_reply.IOCStatus) &
+                               MPI2_IOCSTATUS_MASK;
+                       if (ioc_status != MPI2_IOCSTATUS_SUCCESS) {
+                               ioc_err(ioc, "failure at %s:%d/%s()!\n",
+                                       __FILE__, __LINE__, __func__);
+                               continue;
+                       }
+                       ioc->sas_hba.phy[i].phy_id = i;
+                       mpt3sas_transport_add_host_phy(ioc,
+                               &ioc->sas_hba.phy[i], phy_pg0,
+                               ioc->sas_hba.parent_dev);
+                       continue;
+               }
                ioc->sas_hba.phy[i].handle = ioc->sas_hba.handle;
                attached_handle = le16_to_cpu(sas_iounit_pg0->PhyData[i].
                    AttachedDevHandle);
@@ -6622,6 +6661,19 @@ _scsih_sas_host_refresh(struct MPT3SAS_ADAPTER *ioc)
                    attached_handle, i, link_rate,
                    ioc->sas_hba.phy[i].port);
        }
+       /*
+        * Clear the phy details if this phy got disabled as part of
+        * HBA Firmware upgrade/downgrade operation.
+        */
+       for (i = ioc->sas_hba.num_phys;
+            i < ioc->sas_hba.nr_phys_allocated; i++) {
+               if (ioc->sas_hba.phy[i].phy &&
+                   ioc->sas_hba.phy[i].phy->negotiated_linkrate >=
+                   SAS_LINK_RATE_1_5_GBPS)
+                       mpt3sas_transport_update_links(ioc,
+                               ioc->sas_hba.sas_address, 0, i,
+                               MPI2_SAS_NEG_LINK_RATE_PHY_DISABLED, NULL);
+       }
  out:
        kfree(sas_iounit_pg0);
 }
@@ -6654,7 +6706,10 @@ _scsih_sas_host_add(struct MPT3SAS_ADAPTER *ioc)
                        __FILE__, __LINE__, __func__);
                return;
        }
-       ioc->sas_hba.phy = kcalloc(num_phys,
+
+       ioc->sas_hba.nr_phys_allocated = max_t(u8,
+           MPT_MAX_HBA_NUM_PHYS, num_phys);
+       ioc->sas_hba.phy = kcalloc(ioc->sas_hba.nr_phys_allocated,
            sizeof(struct _sas_phy), GFP_KERNEL);
        if (!ioc->sas_hba.phy) {
                ioc_err(ioc, "failure at %s:%d/%s()!\n",
index 2e37b18..53d2b85 100644 (file)
@@ -865,7 +865,7 @@ qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
                            "APP request entry - portid=%06x.\n", tdid.b24);
 
                        /* Ran out of space */
-                       if (pcnt > app_req.num_ports)
+                       if (pcnt >= app_req.num_ports)
                                break;
 
                        if (tdid.b24 != 0 && tdid.b24 != fcport->d_id.b24)
index 1d0278d..3c0da37 100644 (file)
@@ -1189,7 +1189,7 @@ static int p_fill_from_dev_buffer(struct scsi_cmnd *scp, const void *arr,
                 __func__, off_dst, scsi_bufflen(scp), act_len,
                 scsi_get_resid(scp));
        n = scsi_bufflen(scp) - (off_dst + act_len);
-       scsi_set_resid(scp, min_t(int, scsi_get_resid(scp), n));
+       scsi_set_resid(scp, min_t(u32, scsi_get_resid(scp), n));
        return 0;
 }
 
@@ -1562,7 +1562,8 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
        unsigned char pq_pdt;
        unsigned char *arr;
        unsigned char *cmd = scp->cmnd;
-       int alloc_len, n, ret;
+       u32 alloc_len, n;
+       int ret;
        bool have_wlun, is_disk, is_zbc, is_disk_zbc;
 
        alloc_len = get_unaligned_be16(cmd + 3);
@@ -1585,7 +1586,8 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
                kfree(arr);
                return check_condition_result;
        } else if (0x1 & cmd[1]) {  /* EVPD bit set */
-               int lu_id_num, port_group_id, target_dev_id, len;
+               int lu_id_num, port_group_id, target_dev_id;
+               u32 len;
                char lu_id_str[6];
                int host_no = devip->sdbg_host->shost->host_no;
                
@@ -1676,9 +1678,9 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
                        kfree(arr);
                        return check_condition_result;
                }
-               len = min(get_unaligned_be16(arr + 2) + 4, alloc_len);
+               len = min_t(u32, get_unaligned_be16(arr + 2) + 4, alloc_len);
                ret = fill_from_dev_buffer(scp, arr,
-                           min(len, SDEBUG_MAX_INQ_ARR_SZ));
+                           min_t(u32, len, SDEBUG_MAX_INQ_ARR_SZ));
                kfree(arr);
                return ret;
        }
@@ -1714,7 +1716,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
        }
        put_unaligned_be16(0x2100, arr + n);    /* SPL-4 no version claimed */
        ret = fill_from_dev_buffer(scp, arr,
-                           min_t(int, alloc_len, SDEBUG_LONG_INQ_SZ));
+                           min_t(u32, alloc_len, SDEBUG_LONG_INQ_SZ));
        kfree(arr);
        return ret;
 }
@@ -1729,8 +1731,8 @@ static int resp_requests(struct scsi_cmnd *scp,
        unsigned char *cmd = scp->cmnd;
        unsigned char arr[SCSI_SENSE_BUFFERSIZE];       /* assume >= 18 bytes */
        bool dsense = !!(cmd[1] & 1);
-       int alloc_len = cmd[4];
-       int len = 18;
+       u32 alloc_len = cmd[4];
+       u32 len = 18;
        int stopped_state = atomic_read(&devip->stopped);
 
        memset(arr, 0, sizeof(arr));
@@ -1774,7 +1776,7 @@ static int resp_requests(struct scsi_cmnd *scp,
                        arr[7] = 0xa;
                }
        }
-       return fill_from_dev_buffer(scp, arr, min_t(int, len, alloc_len));
+       return fill_from_dev_buffer(scp, arr, min_t(u32, len, alloc_len));
 }
 
 static int resp_start_stop(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
@@ -2312,7 +2314,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
 {
        int pcontrol, pcode, subpcode, bd_len;
        unsigned char dev_spec;
-       int alloc_len, offset, len, target_dev_id;
+       u32 alloc_len, offset, len;
+       int target_dev_id;
        int target = scp->device->id;
        unsigned char *ap;
        unsigned char arr[SDEBUG_MAX_MSENSE_SZ];
@@ -2468,7 +2471,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
                arr[0] = offset - 1;
        else
                put_unaligned_be16((offset - 2), arr + 0);
-       return fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, offset));
+       return fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, offset));
 }
 
 #define SDEBUG_MAX_MSELECT_SZ 512
@@ -2499,11 +2502,11 @@ static int resp_mode_select(struct scsi_cmnd *scp,
                            __func__, param_len, res);
        md_len = mselect6 ? (arr[0] + 1) : (get_unaligned_be16(arr + 0) + 2);
        bd_len = mselect6 ? arr[3] : get_unaligned_be16(arr + 6);
-       if (md_len > 2) {
+       off = bd_len + (mselect6 ? 4 : 8);
+       if (md_len > 2 || off >= res) {
                mk_sense_invalid_fld(scp, SDEB_IN_DATA, 0, -1);
                return check_condition_result;
        }
-       off = bd_len + (mselect6 ? 4 : 8);
        mpage = arr[off] & 0x3f;
        ps = !!(arr[off] & 0x80);
        if (ps) {
@@ -2583,7 +2586,8 @@ static int resp_ie_l_pg(unsigned char *arr)
 static int resp_log_sense(struct scsi_cmnd *scp,
                          struct sdebug_dev_info *devip)
 {
-       int ppc, sp, pcode, subpcode, alloc_len, len, n;
+       int ppc, sp, pcode, subpcode;
+       u32 alloc_len, len, n;
        unsigned char arr[SDEBUG_MAX_LSENSE_SZ];
        unsigned char *cmd = scp->cmnd;
 
@@ -2653,9 +2657,9 @@ static int resp_log_sense(struct scsi_cmnd *scp,
                mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1);
                return check_condition_result;
        }
-       len = min_t(int, get_unaligned_be16(arr + 2) + 4, alloc_len);
+       len = min_t(u32, get_unaligned_be16(arr + 2) + 4, alloc_len);
        return fill_from_dev_buffer(scp, arr,
-                   min_t(int, len, SDEBUG_MAX_INQ_ARR_SZ));
+                   min_t(u32, len, SDEBUG_MAX_INQ_ARR_SZ));
 }
 
 static inline bool sdebug_dev_is_zoned(struct sdebug_dev_info *devip)
@@ -4430,7 +4434,7 @@ static int resp_report_zones(struct scsi_cmnd *scp,
        put_unaligned_be64(sdebug_capacity - 1, arr + 8);
 
        rep_len = (unsigned long)desc - (unsigned long)arr;
-       ret = fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, rep_len));
+       ret = fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, rep_len));
 
 fini:
        read_unlock(macc_lckp);
@@ -4653,6 +4657,7 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip,
                         struct sdeb_zone_state *zsp)
 {
        enum sdebug_z_cond zc;
+       struct sdeb_store_info *sip = devip2sip(devip, false);
 
        if (zbc_zone_is_conv(zsp))
                return;
@@ -4664,6 +4669,10 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip,
        if (zsp->z_cond == ZC4_CLOSED)
                devip->nr_closed--;
 
+       if (zsp->z_wp > zsp->z_start)
+               memset(sip->storep + zsp->z_start * sdebug_sector_size, 0,
+                      (zsp->z_wp - zsp->z_start) * sdebug_sector_size);
+
        zsp->z_non_seq_resource = false;
        zsp->z_wp = zsp->z_start;
        zsp->z_cond = ZC1_EMPTY;
index 7afcec2..d4edce9 100644 (file)
@@ -812,7 +812,7 @@ store_state_field(struct device *dev, struct device_attribute *attr,
 
        mutex_lock(&sdev->state_mutex);
        if (sdev->sdev_state == SDEV_RUNNING && state == SDEV_RUNNING) {
-               ret = count;
+               ret = 0;
        } else {
                ret = scsi_device_set_state(sdev, state);
                if (ret == 0 && state == SDEV_RUNNING)
index fc5b214..5393b5c 100644 (file)
@@ -1189,6 +1189,7 @@ static int ufs_mtk_probe(struct platform_device *pdev)
        }
        link = device_link_add(dev, &reset_pdev->dev,
                DL_FLAG_AUTOPROBE_CONSUMER);
+       put_device(&reset_pdev->dev);
        if (!link) {
                dev_notice(dev, "add reset device_link fail\n");
                goto skip_reset;
index 5142455..f725248 100644 (file)
@@ -421,6 +421,13 @@ static int ufs_intel_lkf_init(struct ufs_hba *hba)
        return err;
 }
 
+static int ufs_intel_adl_init(struct ufs_hba *hba)
+{
+       hba->nop_out_timeout = 200;
+       hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8;
+       return ufs_intel_common_init(hba);
+}
+
 static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = {
        .name                   = "intel-pci",
        .init                   = ufs_intel_common_init,
@@ -449,6 +456,15 @@ static struct ufs_hba_variant_ops ufs_intel_lkf_hba_vops = {
        .device_reset           = ufs_intel_device_reset,
 };
 
+static struct ufs_hba_variant_ops ufs_intel_adl_hba_vops = {
+       .name                   = "intel-pci",
+       .init                   = ufs_intel_adl_init,
+       .exit                   = ufs_intel_common_exit,
+       .link_startup_notify    = ufs_intel_link_startup_notify,
+       .resume                 = ufs_intel_resume,
+       .device_reset           = ufs_intel_device_reset,
+};
+
 #ifdef CONFIG_PM_SLEEP
 static int ufshcd_pci_restore(struct device *dev)
 {
@@ -563,6 +579,8 @@ static const struct pci_device_id ufshcd_pci_tbl[] = {
        { PCI_VDEVICE(INTEL, 0x4B41), (kernel_ulong_t)&ufs_intel_ehl_hba_vops },
        { PCI_VDEVICE(INTEL, 0x4B43), (kernel_ulong_t)&ufs_intel_ehl_hba_vops },
        { PCI_VDEVICE(INTEL, 0x98FA), (kernel_ulong_t)&ufs_intel_lkf_hba_vops },
+       { PCI_VDEVICE(INTEL, 0x51FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops },
+       { PCI_VDEVICE(INTEL, 0x54FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops },
        { }     /* terminate list */
 };
 
index 2e31e14..ded5ba9 100644 (file)
@@ -331,7 +331,7 @@ ufshpb_set_hpb_read_to_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
        cdb[0] = UFSHPB_READ;
 
        if (hba->dev_quirks & UFS_DEVICE_QUIRK_SWAP_L2P_ENTRY_FOR_HPB_READ)
-               ppn_tmp = swab64(ppn);
+               ppn_tmp = (__force __be64)swab64((__force u64)ppn);
 
        /* ppn value is stored as big-endian in the host memory */
        memcpy(&cdb[6], &ppn_tmp, sizeof(__be64));
index 19f7d7b..28e1d98 100644 (file)
@@ -977,7 +977,6 @@ static unsigned int features[] = {
 static struct virtio_driver virtio_scsi_driver = {
        .feature_table = features,
        .feature_table_size = ARRAY_SIZE(features),
-       .suppress_used_validation = true,
        .driver.name = KBUILD_MODNAME,
        .driver.owner = THIS_MODULE,
        .id_table = id_table,
index 59af251..7fec869 100644 (file)
@@ -66,8 +66,6 @@ source "drivers/staging/gdm724x/Kconfig"
 
 source "drivers/staging/fwserial/Kconfig"
 
-source "drivers/staging/netlogic/Kconfig"
-
 source "drivers/staging/gs_fpgaboot/Kconfig"
 
 source "drivers/staging/unisys/Kconfig"
index 76f4134..e66e19c 100644 (file)
@@ -10,7 +10,6 @@ obj-$(CONFIG_RTL8723BS)               += rtl8723bs/
 obj-$(CONFIG_R8712U)           += rtl8712/
 obj-$(CONFIG_R8188EU)          += r8188eu/
 obj-$(CONFIG_RTS5208)          += rts5208/
-obj-$(CONFIG_NETLOGIC_XLR_NET) += netlogic/
 obj-$(CONFIG_OCTEON_ETHERNET)  += octeon/
 obj-$(CONFIG_OCTEON_USB)       += octeon-usb/
 obj-$(CONFIG_VT6655)           += vt6655/
index cf263a5..6fd549a 100644 (file)
@@ -187,7 +187,6 @@ static struct fbtft_display display = {
        },
 };
 
-#ifdef CONFIG_FB_BACKLIGHT
 static int update_onboard_backlight(struct backlight_device *bd)
 {
        struct fbtft_par *par = bl_get_data(bd);
@@ -231,9 +230,6 @@ static void register_onboard_backlight(struct fbtft_par *par)
        if (!par->fbtftops.unregister_backlight)
                par->fbtftops.unregister_backlight = fbtft_unregister_backlight;
 }
-#else
-static void register_onboard_backlight(struct fbtft_par *par) { };
-#endif
 
 FBTFT_REGISTER_DRIVER(DRVNAME, "solomon,ssd1351", &display);
 
index ecb5f75..f2684d2 100644 (file)
@@ -128,7 +128,6 @@ static int fbtft_request_gpios(struct fbtft_par *par)
        return 0;
 }
 
-#ifdef CONFIG_FB_BACKLIGHT
 static int fbtft_backlight_update_status(struct backlight_device *bd)
 {
        struct fbtft_par *par = bl_get_data(bd);
@@ -161,6 +160,7 @@ void fbtft_unregister_backlight(struct fbtft_par *par)
                par->info->bl_dev = NULL;
        }
 }
+EXPORT_SYMBOL(fbtft_unregister_backlight);
 
 static const struct backlight_ops fbtft_bl_ops = {
        .get_brightness = fbtft_backlight_get_brightness,
@@ -198,12 +198,7 @@ void fbtft_register_backlight(struct fbtft_par *par)
        if (!par->fbtftops.unregister_backlight)
                par->fbtftops.unregister_backlight = fbtft_unregister_backlight;
 }
-#else
-void fbtft_register_backlight(struct fbtft_par *par) { };
-void fbtft_unregister_backlight(struct fbtft_par *par) { };
-#endif
 EXPORT_SYMBOL(fbtft_register_backlight);
-EXPORT_SYMBOL(fbtft_unregister_backlight);
 
 static void fbtft_set_addr_win(struct fbtft_par *par, int xs, int ys, int xe,
                               int ye)
@@ -853,13 +848,11 @@ int fbtft_register_framebuffer(struct fb_info *fb_info)
                 fb_info->fix.smem_len >> 10, text1,
                 HZ / fb_info->fbdefio->delay, text2);
 
-#ifdef CONFIG_FB_BACKLIGHT
        /* Turn on backlight if available */
        if (fb_info->bl_dev) {
                fb_info->bl_dev->props.power = FB_BLANK_UNBLANK;
                fb_info->bl_dev->ops->update_status(fb_info->bl_dev);
        }
-#endif
 
        return 0;
 
index 1ed4772..8437606 100644 (file)
@@ -192,7 +192,11 @@ int gbaudio_remove_component_controls(struct snd_soc_component *component,
                                      unsigned int num_controls)
 {
        struct snd_card *card = component->card->snd_card;
+       int err;
 
-       return gbaudio_remove_controls(card, component->dev, controls,
-                                      num_controls, component->name_prefix);
+       down_write(&card->controls_rwsem);
+       err = gbaudio_remove_controls(card, component->dev, controls,
+                                     num_controls, component->name_prefix);
+       up_write(&card->controls_rwsem);
+       return err;
 }
diff --git a/drivers/staging/netlogic/Kconfig b/drivers/staging/netlogic/Kconfig
deleted file mode 100644 (file)
index e171260..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-config NETLOGIC_XLR_NET
-       tristate "Netlogic XLR/XLS network device"
-       depends on CPU_XLR
-       depends on NETDEVICES
-       select PHYLIB
-       help
-       This driver support Netlogic XLR/XLS on chip gigabit
-       Ethernet.
diff --git a/drivers/staging/netlogic/Makefile b/drivers/staging/netlogic/Makefile
deleted file mode 100644 (file)
index 7e2902a..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_NETLOGIC_XLR_NET) += xlr_net.o platform_net.o
diff --git a/drivers/staging/netlogic/TODO b/drivers/staging/netlogic/TODO
deleted file mode 100644 (file)
index 20e22ec..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-* Implementing 64bit stat counter in software
-* All memory allocation should be changed to DMA allocations
-* Changing comments into linux standard format
-
-Please send patches
-To:
-Ganesan Ramalingam <ganesanr@broadcom.com>
-Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-Cc:
-Jayachandran Chandrashekaran Nair <jchandra@broadcom.com>
-
diff --git a/drivers/staging/netlogic/platform_net.c b/drivers/staging/netlogic/platform_net.c
deleted file mode 100644 (file)
index 8be9d0b..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-/*
- * Copyright (c) 2003-2012 Broadcom Corporation
- * All Rights Reserved
- */
-
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-#include <linux/ioport.h>
-#include <linux/resource.h>
-#include <linux/phy.h>
-
-#include <asm/netlogic/haldefs.h>
-#include <asm/netlogic/common.h>
-#include <asm/netlogic/xlr/fmn.h>
-#include <asm/netlogic/xlr/xlr.h>
-#include <asm/netlogic/psb-bootinfo.h>
-#include <asm/netlogic/xlr/pic.h>
-#include <asm/netlogic/xlr/iomap.h>
-
-#include "platform_net.h"
-
-/* Linux Net */
-#define MAX_NUM_GMAC           8
-#define MAX_NUM_XLS_GMAC       8
-#define MAX_NUM_XLR_GMAC       4
-
-static u32 xlr_gmac_offsets[] = {
-       NETLOGIC_IO_GMAC_0_OFFSET, NETLOGIC_IO_GMAC_1_OFFSET,
-       NETLOGIC_IO_GMAC_2_OFFSET, NETLOGIC_IO_GMAC_3_OFFSET,
-       NETLOGIC_IO_GMAC_4_OFFSET, NETLOGIC_IO_GMAC_5_OFFSET,
-       NETLOGIC_IO_GMAC_6_OFFSET, NETLOGIC_IO_GMAC_7_OFFSET
-};
-
-static u32 xlr_gmac_irqs[] = { PIC_GMAC_0_IRQ, PIC_GMAC_1_IRQ,
-       PIC_GMAC_2_IRQ, PIC_GMAC_3_IRQ,
-       PIC_GMAC_4_IRQ, PIC_GMAC_5_IRQ,
-       PIC_GMAC_6_IRQ, PIC_GMAC_7_IRQ
-};
-
-static struct resource xlr_net0_res[8];
-static struct resource xlr_net1_res[8];
-static u32 __iomem *gmac4_addr;
-static u32 __iomem *gpio_addr;
-
-static void xlr_resource_init(struct resource *res, int offset, int irq)
-{
-       res->name = "gmac";
-
-       res->start = CPHYSADDR(nlm_mmio_base(offset));
-       res->end = res->start + 0xfff;
-       res->flags = IORESOURCE_MEM;
-
-       res++;
-       res->name = "gmac";
-       res->start = irq;
-       res->end = irq;
-       res->flags = IORESOURCE_IRQ;
-}
-
-static struct platform_device *gmac_controller2_init(void *gmac0_addr)
-{
-       int mac;
-       static struct xlr_net_data ndata1 = {
-               .phy_interface  = PHY_INTERFACE_MODE_SGMII,
-               .rfr_station    = FMN_STNID_GMAC1_FR_0,
-               .bucket_size    = xlr_board_fmn_config.bucket_size,
-               .gmac_fmn_info  = &xlr_board_fmn_config.gmac[1],
-       };
-
-       static struct platform_device xlr_net_dev1 = {
-               .name           = "xlr-net",
-               .id             = 1,
-               .dev.platform_data = &ndata1,
-       };
-
-       gmac4_addr =
-               ioremap(CPHYSADDR(nlm_mmio_base(NETLOGIC_IO_GMAC_4_OFFSET)),
-                       0xfff);
-       ndata1.serdes_addr = gmac4_addr;
-       ndata1.pcs_addr = gmac4_addr;
-       ndata1.mii_addr = gmac0_addr;
-       ndata1.gpio_addr = gpio_addr;
-       ndata1.cpu_mask = nlm_current_node()->coremask;
-
-       xlr_net_dev1.resource = xlr_net1_res;
-
-       for (mac = 0; mac < 4; mac++) {
-               ndata1.tx_stnid[mac] = FMN_STNID_GMAC1_TX0 + mac;
-               ndata1.phy_addr[mac] = mac + 4 + 0x10;
-
-               xlr_resource_init(&xlr_net1_res[mac * 2],
-                                 xlr_gmac_offsets[mac + 4],
-                                 xlr_gmac_irqs[mac + 4]);
-       }
-       xlr_net_dev1.num_resources = 8;
-
-       return &xlr_net_dev1;
-}
-
-static void xls_gmac_init(void)
-{
-       int mac;
-       struct platform_device *xlr_net_dev1;
-       void __iomem *gmac0_addr =
-               ioremap(CPHYSADDR(nlm_mmio_base(NETLOGIC_IO_GMAC_0_OFFSET)),
-                       0xfff);
-
-       static struct xlr_net_data ndata0 = {
-               .rfr_station    = FMN_STNID_GMACRFR_0,
-               .bucket_size    = xlr_board_fmn_config.bucket_size,
-               .gmac_fmn_info  = &xlr_board_fmn_config.gmac[0],
-       };
-
-       static struct platform_device xlr_net_dev0 = {
-               .name           = "xlr-net",
-               .id             = 0,
-       };
-       xlr_net_dev0.dev.platform_data = &ndata0;
-       ndata0.serdes_addr = gmac0_addr;
-       ndata0.pcs_addr = gmac0_addr;
-       ndata0.mii_addr = gmac0_addr;
-
-       /* Passing GPIO base for serdes init. Only needed on sgmii ports */
-       gpio_addr =
-               ioremap(CPHYSADDR(nlm_mmio_base(NETLOGIC_IO_GPIO_OFFSET)),
-                       0xfff);
-       ndata0.gpio_addr = gpio_addr;
-       ndata0.cpu_mask = nlm_current_node()->coremask;
-
-       xlr_net_dev0.resource = xlr_net0_res;
-
-       switch (nlm_prom_info.board_major_version) {
-       case 12:
-               /* first block RGMII or XAUI, use RGMII */
-               ndata0.phy_interface = PHY_INTERFACE_MODE_RGMII;
-               ndata0.tx_stnid[0] = FMN_STNID_GMAC0_TX0;
-               ndata0.phy_addr[0] = 0;
-
-               xlr_net_dev0.num_resources = 2;
-
-               xlr_resource_init(&xlr_net0_res[0], xlr_gmac_offsets[0],
-                                 xlr_gmac_irqs[0]);
-               platform_device_register(&xlr_net_dev0);
-
-               /* second block is XAUI, not supported yet */
-               break;
-       default:
-               /* default XLS config, all ports SGMII */
-               ndata0.phy_interface = PHY_INTERFACE_MODE_SGMII;
-               for (mac = 0; mac < 4; mac++) {
-                       ndata0.tx_stnid[mac] = FMN_STNID_GMAC0_TX0 + mac;
-                       ndata0.phy_addr[mac] = mac + 0x10;
-
-                       xlr_resource_init(&xlr_net0_res[mac * 2],
-                                         xlr_gmac_offsets[mac],
-                                       xlr_gmac_irqs[mac]);
-               }
-               xlr_net_dev0.num_resources = 8;
-               platform_device_register(&xlr_net_dev0);
-
-               xlr_net_dev1 = gmac_controller2_init(gmac0_addr);
-               platform_device_register(xlr_net_dev1);
-       }
-}
-
-static void xlr_gmac_init(void)
-{
-       int mac;
-
-       /* assume all GMACs for now */
-       static struct xlr_net_data ndata0 = {
-               .phy_interface  = PHY_INTERFACE_MODE_RGMII,
-               .serdes_addr    = NULL,
-               .pcs_addr       = NULL,
-               .rfr_station    = FMN_STNID_GMACRFR_0,
-               .bucket_size    = xlr_board_fmn_config.bucket_size,
-               .gmac_fmn_info  = &xlr_board_fmn_config.gmac[0],
-               .gpio_addr      = NULL,
-       };
-
-       static struct platform_device xlr_net_dev0 = {
-               .name           = "xlr-net",
-               .id             = 0,
-               .dev.platform_data = &ndata0,
-       };
-       ndata0.mii_addr =
-               ioremap(CPHYSADDR(nlm_mmio_base(NETLOGIC_IO_GMAC_0_OFFSET)),
-                       0xfff);
-
-       ndata0.cpu_mask = nlm_current_node()->coremask;
-
-       for (mac = 0; mac < MAX_NUM_XLR_GMAC; mac++) {
-               ndata0.tx_stnid[mac] = FMN_STNID_GMAC0_TX0 + mac;
-               ndata0.phy_addr[mac] = mac;
-               xlr_resource_init(&xlr_net0_res[mac * 2], xlr_gmac_offsets[mac],
-                                 xlr_gmac_irqs[mac]);
-       }
-       xlr_net_dev0.num_resources = 8;
-       xlr_net_dev0.resource = xlr_net0_res;
-
-       platform_device_register(&xlr_net_dev0);
-}
-
-static int __init xlr_net_init(void)
-{
-       if (nlm_chip_is_xls())
-               xls_gmac_init();
-       else
-               xlr_gmac_init();
-
-       return 0;
-}
-
-arch_initcall(xlr_net_init);
diff --git a/drivers/staging/netlogic/platform_net.h b/drivers/staging/netlogic/platform_net.h
deleted file mode 100644 (file)
index c8d4c13..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
-/*
- * Copyright (c) 2003-2012 Broadcom Corporation
- * All Rights Reserved
- */
-
-#define PORTS_PER_CONTROLLER           4
-
-struct xlr_net_data {
-       int cpu_mask;
-       u32 __iomem *mii_addr;
-       u32 __iomem *serdes_addr;
-       u32 __iomem *pcs_addr;
-       u32 __iomem *gpio_addr;
-       int phy_interface;
-       int rfr_station;
-       int tx_stnid[PORTS_PER_CONTROLLER];
-       int *bucket_size;
-       int phy_addr[PORTS_PER_CONTROLLER];
-       struct xlr_fmn_info *gmac_fmn_info;
-};
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
deleted file mode 100644 (file)
index 69ea61f..0000000
+++ /dev/null
@@ -1,1080 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-/*
- * Copyright (c) 2003-2012 Broadcom Corporation
- * All Rights Reserved
- */
-
-#include <linux/phy.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/smp.h>
-#include <linux/ethtool.h>
-#include <linux/module.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/jiffies.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-
-#include <asm/mipsregs.h>
-/*
- * fmn.h - For FMN credit configuration and registering fmn_handler.
- * FMN is communication mechanism that allows processing agents within
- * XLR/XLS to communicate each other.
- */
-#include <asm/netlogic/xlr/fmn.h>
-
-#include "platform_net.h"
-#include "xlr_net.h"
-
-/*
- * The readl/writel implementation byteswaps on XLR/XLS, so
- * we need to use __raw_ IO to read the NAE registers
- * because they are in the big-endian MMIO area on the SoC.
- */
-static inline void xlr_nae_wreg(u32 __iomem *base, unsigned int reg, u32 val)
-{
-       __raw_writel(val, base + reg);
-}
-
-static inline u32 xlr_nae_rdreg(u32 __iomem *base, unsigned int reg)
-{
-       return __raw_readl(base + reg);
-}
-
-static inline void xlr_reg_update(u32 *base_addr, u32 off, u32 val, u32 mask)
-{
-       u32 tmp;
-
-       tmp = xlr_nae_rdreg(base_addr, off);
-       xlr_nae_wreg(base_addr, off, (tmp & ~mask) | (val & mask));
-}
-
-#define MAC_SKB_BACK_PTR_SIZE SMP_CACHE_BYTES
-
-static int send_to_rfr_fifo(struct xlr_net_priv *priv, void *addr)
-{
-       struct nlm_fmn_msg msg;
-       int ret = 0, num_try = 0, stnid;
-       unsigned long paddr, mflags;
-
-       paddr = virt_to_bus(addr);
-       msg.msg0 = (u64)paddr & 0xffffffffe0ULL;
-       msg.msg1 = 0;
-       msg.msg2 = 0;
-       msg.msg3 = 0;
-       stnid = priv->nd->rfr_station;
-       do {
-               mflags = nlm_cop2_enable_irqsave();
-               ret = nlm_fmn_send(1, 0, stnid, &msg);
-               nlm_cop2_disable_irqrestore(mflags);
-               if (ret == 0)
-                       return 0;
-       } while (++num_try < 10000);
-
-       netdev_err(priv->ndev, "Send to RFR failed in RX path\n");
-       return ret;
-}
-
-static inline unsigned char *xlr_alloc_skb(void)
-{
-       struct sk_buff *skb;
-       int buf_len = sizeof(struct sk_buff *);
-       unsigned char *skb_data;
-
-       /* skb->data is cache aligned */
-       skb = alloc_skb(XLR_RX_BUF_SIZE, GFP_ATOMIC);
-       if (!skb)
-               return NULL;
-       skb_data = skb->data;
-       skb_reserve(skb, MAC_SKB_BACK_PTR_SIZE);
-       memcpy(skb_data, &skb, buf_len);
-
-       return skb->data;
-}
-
-static void xlr_net_fmn_handler(int bkt, int src_stnid, int size, int code,
-                               struct nlm_fmn_msg *msg, void *arg)
-{
-       struct sk_buff *skb;
-       void *skb_data = NULL;
-       struct net_device *ndev;
-       struct xlr_net_priv *priv;
-       u32 port, length;
-       unsigned char *addr;
-       struct xlr_adapter *adapter = arg;
-
-       length = (msg->msg0 >> 40) & 0x3fff;
-       if (length == 0) {
-               addr = bus_to_virt(msg->msg0 & 0xffffffffffULL);
-               addr = addr - MAC_SKB_BACK_PTR_SIZE;
-               skb = (struct sk_buff *)(*(unsigned long *)addr);
-               dev_kfree_skb_any((struct sk_buff *)addr);
-       } else {
-               addr = (unsigned char *)
-                       bus_to_virt(msg->msg0 & 0xffffffffe0ULL);
-               length = length - BYTE_OFFSET - MAC_CRC_LEN;
-               port = ((int)msg->msg0) & 0x0f;
-               addr = addr - MAC_SKB_BACK_PTR_SIZE;
-               skb = (struct sk_buff *)(*(unsigned long *)addr);
-               skb->dev = adapter->netdev[port];
-               if (!skb->dev)
-                       return;
-               ndev = skb->dev;
-               priv = netdev_priv(ndev);
-
-               /* 16 byte IP header align */
-               skb_reserve(skb, BYTE_OFFSET);
-               skb_put(skb, length);
-               skb->protocol = eth_type_trans(skb, skb->dev);
-               netif_rx(skb);
-               /* Fill rx ring */
-               skb_data = xlr_alloc_skb();
-               if (skb_data)
-                       send_to_rfr_fifo(priv, skb_data);
-       }
-}
-
-static struct phy_device *xlr_get_phydev(struct xlr_net_priv *priv)
-{
-       return mdiobus_get_phy(priv->mii_bus, priv->phy_addr);
-}
-
-/*
- * Ethtool operation
- */
-static int xlr_get_link_ksettings(struct net_device *ndev,
-                                 struct ethtool_link_ksettings *ecmd)
-{
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-       struct phy_device *phydev = xlr_get_phydev(priv);
-
-       if (!phydev)
-               return -ENODEV;
-
-       phy_ethtool_ksettings_get(phydev, ecmd);
-
-       return 0;
-}
-
-static int xlr_set_link_ksettings(struct net_device *ndev,
-                                 const struct ethtool_link_ksettings *ecmd)
-{
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-       struct phy_device *phydev = xlr_get_phydev(priv);
-
-       if (!phydev)
-               return -ENODEV;
-       return phy_ethtool_ksettings_set(phydev, ecmd);
-}
-
-static const struct ethtool_ops xlr_ethtool_ops = {
-       .get_link_ksettings = xlr_get_link_ksettings,
-       .set_link_ksettings = xlr_set_link_ksettings,
-};
-
-/*
- * Net operations
- */
-static int xlr_net_fill_rx_ring(struct net_device *ndev)
-{
-       void *skb_data;
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-       int i;
-
-       for (i = 0; i < MAX_FRIN_SPILL / 4; i++) {
-               skb_data = xlr_alloc_skb();
-               if (!skb_data)
-                       return -ENOMEM;
-               send_to_rfr_fifo(priv, skb_data);
-       }
-       netdev_info(ndev, "Rx ring setup done\n");
-       return 0;
-}
-
-static int xlr_net_open(struct net_device *ndev)
-{
-       u32 err;
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-       struct phy_device *phydev = xlr_get_phydev(priv);
-
-       /* schedule a link state check */
-       phy_start(phydev);
-
-       err = phy_start_aneg(phydev);
-       if (err) {
-               pr_err("Autoneg failed\n");
-               return err;
-       }
-       /* Setup the speed from PHY to internal reg*/
-       xlr_set_gmac_speed(priv);
-
-       netif_tx_start_all_queues(ndev);
-
-       return 0;
-}
-
-static int xlr_net_stop(struct net_device *ndev)
-{
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-       struct phy_device *phydev = xlr_get_phydev(priv);
-
-       phy_stop(phydev);
-       netif_tx_stop_all_queues(ndev);
-       return 0;
-}
-
-static void xlr_make_tx_desc(struct nlm_fmn_msg *msg, unsigned long addr,
-                            struct sk_buff *skb)
-{
-       unsigned long physkb = virt_to_phys(skb);
-       int cpu_core = nlm_core_id();
-       int fr_stn_id = cpu_core * 8 + XLR_FB_STN;      /* FB to 6th bucket */
-
-       msg->msg0 = (((u64)1 << 63)     |       /* End of packet descriptor */
-               ((u64)127 << 54)        |       /* No Free back */
-               (u64)skb->len << 40     |       /* Length of data */
-               ((u64)addr));
-       msg->msg1 = (((u64)1 << 63)     |
-               ((u64)fr_stn_id << 54)  |       /* Free back id */
-               (u64)0 << 40            |       /* Set len to 0 */
-               ((u64)physkb  & 0xffffffff));   /* 32bit address */
-       msg->msg2 = 0;
-       msg->msg3 = 0;
-}
-
-static netdev_tx_t xlr_net_start_xmit(struct sk_buff *skb,
-                                     struct net_device *ndev)
-{
-       struct nlm_fmn_msg msg;
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-       int ret;
-       u32 flags;
-
-       xlr_make_tx_desc(&msg, virt_to_phys(skb->data), skb);
-       flags = nlm_cop2_enable_irqsave();
-       ret = nlm_fmn_send(2, 0, priv->tx_stnid, &msg);
-       nlm_cop2_disable_irqrestore(flags);
-       if (ret)
-               dev_kfree_skb_any(skb);
-       return NETDEV_TX_OK;
-}
-
-static void xlr_hw_set_mac_addr(struct net_device *ndev)
-{
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-
-       /* set mac station address */
-       xlr_nae_wreg(priv->base_addr, R_MAC_ADDR0,
-                    ((ndev->dev_addr[5] << 24) | (ndev->dev_addr[4] << 16) |
-                    (ndev->dev_addr[3] << 8) | (ndev->dev_addr[2])));
-       xlr_nae_wreg(priv->base_addr, R_MAC_ADDR0 + 1,
-                    ((ndev->dev_addr[1] << 24) | (ndev->dev_addr[0] << 16)));
-
-       xlr_nae_wreg(priv->base_addr, R_MAC_ADDR_MASK2, 0xffffffff);
-       xlr_nae_wreg(priv->base_addr, R_MAC_ADDR_MASK2 + 1, 0xffffffff);
-       xlr_nae_wreg(priv->base_addr, R_MAC_ADDR_MASK3, 0xffffffff);
-       xlr_nae_wreg(priv->base_addr, R_MAC_ADDR_MASK3 + 1, 0xffffffff);
-
-       xlr_nae_wreg(priv->base_addr, R_MAC_FILTER_CONFIG,
-                    (1 << O_MAC_FILTER_CONFIG__BROADCAST_EN) |
-                    (1 << O_MAC_FILTER_CONFIG__ALL_MCAST_EN) |
-                    (1 << O_MAC_FILTER_CONFIG__MAC_ADDR0_VALID));
-
-       if (priv->nd->phy_interface == PHY_INTERFACE_MODE_RGMII ||
-           priv->nd->phy_interface == PHY_INTERFACE_MODE_SGMII)
-               xlr_reg_update(priv->base_addr, R_IPG_IFG, MAC_B2B_IPG, 0x7f);
-}
-
-static int xlr_net_set_mac_addr(struct net_device *ndev, void *data)
-{
-       int err;
-
-       err = eth_mac_addr(ndev, data);
-       if (err)
-               return err;
-       xlr_hw_set_mac_addr(ndev);
-       return 0;
-}
-
-static void xlr_set_rx_mode(struct net_device *ndev)
-{
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-       u32 regval;
-
-       regval = xlr_nae_rdreg(priv->base_addr, R_MAC_FILTER_CONFIG);
-
-       if (ndev->flags & IFF_PROMISC) {
-               regval |= (1 << O_MAC_FILTER_CONFIG__BROADCAST_EN) |
-               (1 << O_MAC_FILTER_CONFIG__PAUSE_FRAME_EN) |
-               (1 << O_MAC_FILTER_CONFIG__ALL_MCAST_EN) |
-               (1 << O_MAC_FILTER_CONFIG__ALL_UCAST_EN);
-       } else {
-               regval &= ~((1 << O_MAC_FILTER_CONFIG__PAUSE_FRAME_EN) |
-               (1 << O_MAC_FILTER_CONFIG__ALL_UCAST_EN));
-       }
-
-       xlr_nae_wreg(priv->base_addr, R_MAC_FILTER_CONFIG, regval);
-}
-
-static void xlr_stats(struct net_device *ndev, struct rtnl_link_stats64 *stats)
-{
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-
-       stats->rx_packets = xlr_nae_rdreg(priv->base_addr, RX_PACKET_COUNTER);
-       stats->tx_packets = xlr_nae_rdreg(priv->base_addr, TX_PACKET_COUNTER);
-       stats->rx_bytes = xlr_nae_rdreg(priv->base_addr, RX_BYTE_COUNTER);
-       stats->tx_bytes = xlr_nae_rdreg(priv->base_addr, TX_BYTE_COUNTER);
-       stats->tx_errors = xlr_nae_rdreg(priv->base_addr, TX_FCS_ERROR_COUNTER);
-       stats->rx_dropped = xlr_nae_rdreg(priv->base_addr,
-                                         RX_DROP_PACKET_COUNTER);
-       stats->tx_dropped = xlr_nae_rdreg(priv->base_addr,
-                                         TX_DROP_FRAME_COUNTER);
-
-       stats->multicast = xlr_nae_rdreg(priv->base_addr,
-                                        RX_MULTICAST_PACKET_COUNTER);
-       stats->collisions = xlr_nae_rdreg(priv->base_addr,
-                                         TX_TOTAL_COLLISION_COUNTER);
-
-       stats->rx_length_errors = xlr_nae_rdreg(priv->base_addr,
-                                               RX_FRAME_LENGTH_ERROR_COUNTER);
-       stats->rx_over_errors = xlr_nae_rdreg(priv->base_addr,
-                                             RX_DROP_PACKET_COUNTER);
-       stats->rx_crc_errors = xlr_nae_rdreg(priv->base_addr,
-                                            RX_FCS_ERROR_COUNTER);
-       stats->rx_frame_errors = xlr_nae_rdreg(priv->base_addr,
-                                              RX_ALIGNMENT_ERROR_COUNTER);
-
-       stats->rx_fifo_errors = xlr_nae_rdreg(priv->base_addr,
-                                             RX_DROP_PACKET_COUNTER);
-       stats->rx_missed_errors = xlr_nae_rdreg(priv->base_addr,
-                                               RX_CARRIER_SENSE_ERROR_COUNTER);
-
-       stats->rx_errors = (stats->rx_over_errors + stats->rx_crc_errors +
-                           stats->rx_frame_errors + stats->rx_fifo_errors +
-                           stats->rx_missed_errors);
-
-       stats->tx_aborted_errors = xlr_nae_rdreg(priv->base_addr,
-                                                TX_EXCESSIVE_COLLISION_PACKET_COUNTER);
-       stats->tx_carrier_errors = xlr_nae_rdreg(priv->base_addr,
-                                                TX_DROP_FRAME_COUNTER);
-       stats->tx_fifo_errors = xlr_nae_rdreg(priv->base_addr,
-                                             TX_DROP_FRAME_COUNTER);
-}
-
-static const struct net_device_ops xlr_netdev_ops = {
-       .ndo_open = xlr_net_open,
-       .ndo_stop = xlr_net_stop,
-       .ndo_start_xmit = xlr_net_start_xmit,
-       .ndo_select_queue = dev_pick_tx_cpu_id,
-       .ndo_set_mac_address = xlr_net_set_mac_addr,
-       .ndo_set_rx_mode = xlr_set_rx_mode,
-       .ndo_get_stats64 = xlr_stats,
-};
-
-/*
- * Gmac init
- */
-static void *xlr_config_spill(struct xlr_net_priv *priv, int reg_start_0,
-                             int reg_start_1, int reg_size, int size)
-{
-       void *spill;
-       u32 *base;
-       unsigned long phys_addr;
-       u32 spill_size;
-
-       base = priv->base_addr;
-       spill_size = size;
-       spill = kmalloc(spill_size + SMP_CACHE_BYTES, GFP_KERNEL);
-       if (!spill)
-               return ZERO_SIZE_PTR;
-
-       spill = PTR_ALIGN(spill, SMP_CACHE_BYTES);
-       phys_addr = virt_to_phys(spill);
-       dev_dbg(&priv->ndev->dev, "Allocated spill %d bytes at %lx\n",
-               size, phys_addr);
-       xlr_nae_wreg(base, reg_start_0, (phys_addr >> 5) & 0xffffffff);
-       xlr_nae_wreg(base, reg_start_1, ((u64)phys_addr >> 37) & 0x07);
-       xlr_nae_wreg(base, reg_size, spill_size);
-
-       return spill;
-}
-
-/*
- * Configure the 6 FIFO's that are used by the network accelarator to
- * communicate with the rest of the XLx device. 4 of the FIFO's are for
- * packets from NA --> cpu (called Class FIFO's) and 2 are for feeding
- * the NA with free descriptors.
- */
-static void xlr_config_fifo_spill_area(struct xlr_net_priv *priv)
-{
-       priv->frin_spill = xlr_config_spill(priv,
-                                           R_REG_FRIN_SPILL_MEM_START_0,
-                                           R_REG_FRIN_SPILL_MEM_START_1,
-                                           R_REG_FRIN_SPILL_MEM_SIZE,
-                                           MAX_FRIN_SPILL * sizeof(u64));
-       priv->frout_spill = xlr_config_spill(priv,
-                                            R_FROUT_SPILL_MEM_START_0,
-                                            R_FROUT_SPILL_MEM_START_1,
-                                            R_FROUT_SPILL_MEM_SIZE,
-                                            MAX_FROUT_SPILL * sizeof(u64));
-       priv->class_0_spill = xlr_config_spill(priv,
-                                              R_CLASS0_SPILL_MEM_START_0,
-                                              R_CLASS0_SPILL_MEM_START_1,
-                                              R_CLASS0_SPILL_MEM_SIZE,
-                                              MAX_CLASS_0_SPILL * sizeof(u64));
-       priv->class_1_spill = xlr_config_spill(priv,
-                                              R_CLASS1_SPILL_MEM_START_0,
-                                              R_CLASS1_SPILL_MEM_START_1,
-                                              R_CLASS1_SPILL_MEM_SIZE,
-                                              MAX_CLASS_1_SPILL * sizeof(u64));
-       priv->class_2_spill = xlr_config_spill(priv,
-                                              R_CLASS2_SPILL_MEM_START_0,
-                                              R_CLASS2_SPILL_MEM_START_1,
-                                              R_CLASS2_SPILL_MEM_SIZE,
-                                              MAX_CLASS_2_SPILL * sizeof(u64));
-       priv->class_3_spill = xlr_config_spill(priv,
-                                              R_CLASS3_SPILL_MEM_START_0,
-                                              R_CLASS3_SPILL_MEM_START_1,
-                                              R_CLASS3_SPILL_MEM_SIZE,
-                                              MAX_CLASS_3_SPILL * sizeof(u64));
-}
-
-/*
- * Configure PDE to Round-Robin distribution of packets to the
- * available cpu
- */
-static void xlr_config_pde(struct xlr_net_priv *priv)
-{
-       int i = 0;
-       u64 bkt_map = 0;
-
-       /* Each core has 8 buckets(station) */
-       for (i = 0; i < hweight32(priv->nd->cpu_mask); i++)
-               bkt_map |= (0xff << (i * 8));
-
-       xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_0, (bkt_map & 0xffffffff));
-       xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_0 + 1,
-                    ((bkt_map >> 32) & 0xffffffff));
-
-       xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_1, (bkt_map & 0xffffffff));
-       xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_1 + 1,
-                    ((bkt_map >> 32) & 0xffffffff));
-
-       xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_2, (bkt_map & 0xffffffff));
-       xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_2 + 1,
-                    ((bkt_map >> 32) & 0xffffffff));
-
-       xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_3, (bkt_map & 0xffffffff));
-       xlr_nae_wreg(priv->base_addr, R_PDE_CLASS_3 + 1,
-                    ((bkt_map >> 32) & 0xffffffff));
-}
-
-/*
- * Setup the Message ring credits, bucket size and other
- * common configuration
- */
-static int xlr_config_common(struct xlr_net_priv *priv)
-{
-       struct xlr_fmn_info *gmac = priv->nd->gmac_fmn_info;
-       int start_stn_id = gmac->start_stn_id;
-       int end_stn_id = gmac->end_stn_id;
-       int *bucket_size = priv->nd->bucket_size;
-       int i, j, err;
-
-       /* Setting non-core MsgBktSize(0x321 - 0x325) */
-       for (i = start_stn_id; i <= end_stn_id; i++) {
-               xlr_nae_wreg(priv->base_addr,
-                            R_GMAC_RFR0_BUCKET_SIZE + i - start_stn_id,
-                            bucket_size[i]);
-       }
-
-       /*
-        * Setting non-core Credit counter register
-        * Distributing Gmac's credit to CPU's
-        */
-       for (i = 0; i < 8; i++) {
-               for (j = 0; j < 8; j++)
-                       xlr_nae_wreg(priv->base_addr,
-                                    (R_CC_CPU0_0 + (i * 8)) + j,
-                                    gmac->credit_config[(i * 8) + j]);
-       }
-
-       xlr_nae_wreg(priv->base_addr, R_MSG_TX_THRESHOLD, 3);
-       xlr_nae_wreg(priv->base_addr, R_DMACR0, 0xffffffff);
-       xlr_nae_wreg(priv->base_addr, R_DMACR1, 0xffffffff);
-       xlr_nae_wreg(priv->base_addr, R_DMACR2, 0xffffffff);
-       xlr_nae_wreg(priv->base_addr, R_DMACR3, 0xffffffff);
-       xlr_nae_wreg(priv->base_addr, R_FREEQCARVE, 0);
-
-       err = xlr_net_fill_rx_ring(priv->ndev);
-       if (err)
-               return err;
-       nlm_register_fmn_handler(start_stn_id, end_stn_id, xlr_net_fmn_handler,
-                                priv->adapter);
-       return 0;
-}
-
-static void xlr_config_translate_table(struct xlr_net_priv *priv)
-{
-       u32 cpu_mask;
-       u32 val;
-       int bkts[32]; /* one bucket is assumed for each cpu */
-       int b1, b2, c1, c2, i, j, k;
-       int use_bkt;
-
-       use_bkt = 0;
-       cpu_mask = priv->nd->cpu_mask;
-
-       pr_info("Using %s-based distribution\n",
-               (use_bkt) ? "bucket" : "class");
-       j = 0;
-       for (i = 0; i < 32; i++) {
-               if ((1 << i) & cpu_mask) {
-                       /* for each cpu, mark the 4+threadid bucket */
-                       bkts[j] = ((i / 4) * 8) + (i % 4);
-                       j++;
-               }
-       }
-
-       /*configure the 128 * 9 Translation table to send to available buckets*/
-       k = 0;
-       c1 = 3;
-       c2 = 0;
-       for (i = 0; i < 64; i++) {
-               /*
-                * On use_bkt set the b0, b1 are used, else
-                * the 4 classes are used, here implemented
-                * a logic to distribute the packets to the
-                * buckets equally or based on the class
-                */
-               c1 = (c1 + 1) & 3;
-               c2 = (c1 + 1) & 3;
-               b1 = bkts[k];
-               k = (k + 1) % j;
-               b2 = bkts[k];
-               k = (k + 1) % j;
-
-               val = ((c1 << 23) | (b1 << 17) | (use_bkt << 16) |
-                               (c2 << 7) | (b2 << 1) | (use_bkt << 0));
-               dev_dbg(&priv->ndev->dev, "Table[%d] b1=%d b2=%d c1=%d c2=%d\n",
-                       i, b1, b2, c1, c2);
-               xlr_nae_wreg(priv->base_addr, R_TRANSLATETABLE + i, val);
-               c1 = c2;
-       }
-}
-
-static void xlr_config_parser(struct xlr_net_priv *priv)
-{
-       u32 val;
-
-       /* Mark it as ETHERNET type */
-       xlr_nae_wreg(priv->base_addr, R_L2TYPE_0, 0x01);
-
-       /* Use 7bit CRChash for flow classification with 127 as CRC polynomial*/
-       xlr_nae_wreg(priv->base_addr, R_PARSERCONFIGREG,
-                    ((0x7f << 8) | (1 << 1)));
-
-       /* configure the parser : L2 Type is configured in the bootloader */
-       /* extract IP: src, dest protocol */
-       xlr_nae_wreg(priv->base_addr, R_L3CTABLE,
-                    (9 << 20) | (1 << 19) | (1 << 18) | (0x01 << 16) |
-                    (0x0800 << 0));
-       xlr_nae_wreg(priv->base_addr, R_L3CTABLE + 1,
-                    (9 << 25) | (1 << 21) | (12 << 14) | (4 << 10) |
-                    (16 << 4) | 4);
-
-       /* Configure to extract SRC port and Dest port for TCP and UDP pkts */
-       xlr_nae_wreg(priv->base_addr, R_L4CTABLE, 6);
-       xlr_nae_wreg(priv->base_addr, R_L4CTABLE + 2, 17);
-       val = ((0 << 21) | (2 << 17) | (2 << 11) | (2 << 7));
-       xlr_nae_wreg(priv->base_addr, R_L4CTABLE + 1, val);
-       xlr_nae_wreg(priv->base_addr, R_L4CTABLE + 3, val);
-
-       xlr_config_translate_table(priv);
-}
-
-static int xlr_phy_write(u32 *base_addr, int phy_addr, int regnum, u16 val)
-{
-       unsigned long timeout, stoptime, checktime;
-       int timedout;
-
-       /* 100ms timeout*/
-       timeout = msecs_to_jiffies(100);
-       stoptime = jiffies + timeout;
-       timedout = 0;
-
-       xlr_nae_wreg(base_addr, R_MII_MGMT_ADDRESS, (phy_addr << 8) | regnum);
-
-       /* Write the data which starts the write cycle */
-       xlr_nae_wreg(base_addr, R_MII_MGMT_WRITE_DATA, (u32)val);
-
-       /* poll for the read cycle to complete */
-       while (!timedout) {
-               checktime = jiffies;
-               if (xlr_nae_rdreg(base_addr, R_MII_MGMT_INDICATORS) == 0)
-                       break;
-               timedout = time_after(checktime, stoptime);
-       }
-       if (timedout) {
-               pr_info("Phy device write err: device busy");
-               return -EBUSY;
-       }
-
-       return 0;
-}
-
-static int xlr_phy_read(u32 *base_addr, int phy_addr, int regnum)
-{
-       unsigned long timeout, stoptime, checktime;
-       int timedout;
-
-       /* 100ms timeout*/
-       timeout = msecs_to_jiffies(100);
-       stoptime = jiffies + timeout;
-       timedout = 0;
-
-       /* setup the phy reg to be used */
-       xlr_nae_wreg(base_addr, R_MII_MGMT_ADDRESS,
-                    (phy_addr << 8) | (regnum << 0));
-
-       /* Issue the read command */
-       xlr_nae_wreg(base_addr, R_MII_MGMT_COMMAND,
-                    (1 << O_MII_MGMT_COMMAND__rstat));
-
-       /* poll for the read cycle to complete */
-       while (!timedout) {
-               checktime = jiffies;
-               if (xlr_nae_rdreg(base_addr, R_MII_MGMT_INDICATORS) == 0)
-                       break;
-               timedout = time_after(checktime, stoptime);
-       }
-       if (timedout) {
-               pr_info("Phy device read err: device busy");
-               return -EBUSY;
-       }
-
-       /* clear the read cycle */
-       xlr_nae_wreg(base_addr, R_MII_MGMT_COMMAND, 0);
-
-       /* Read the data */
-       return xlr_nae_rdreg(base_addr, R_MII_MGMT_STATUS);
-}
-
-static int xlr_mii_write(struct mii_bus *bus, int phy_addr, int regnum, u16 val)
-{
-       struct xlr_net_priv *priv = bus->priv;
-       int ret;
-
-       ret = xlr_phy_write(priv->mii_addr, phy_addr, regnum, val);
-       dev_dbg(&priv->ndev->dev, "mii_write phy %d : %d <- %x [%x]\n",
-               phy_addr, regnum, val, ret);
-       return ret;
-}
-
-static int xlr_mii_read(struct mii_bus *bus, int phy_addr, int regnum)
-{
-       struct xlr_net_priv *priv = bus->priv;
-       int ret;
-
-       ret =  xlr_phy_read(priv->mii_addr, phy_addr, regnum);
-       dev_dbg(&priv->ndev->dev, "mii_read phy %d : %d [%x]\n",
-               phy_addr, regnum, ret);
-       return ret;
-}
-
-/*
- * XLR ports are RGMII. XLS ports are SGMII mostly except the port0,
- * which can be configured either SGMII or RGMII, considered SGMII
- * by default, if board setup to RGMII the port_type need to set
- * accordingly.Serdes and PCS layer need to configured for SGMII
- */
-static void xlr_sgmii_init(struct xlr_net_priv *priv)
-{
-       int phy;
-
-       xlr_phy_write(priv->serdes_addr, 26, 0, 0x6DB0);
-       xlr_phy_write(priv->serdes_addr, 26, 1, 0xFFFF);
-       xlr_phy_write(priv->serdes_addr, 26, 2, 0xB6D0);
-       xlr_phy_write(priv->serdes_addr, 26, 3, 0x00FF);
-       xlr_phy_write(priv->serdes_addr, 26, 4, 0x0000);
-       xlr_phy_write(priv->serdes_addr, 26, 5, 0x0000);
-       xlr_phy_write(priv->serdes_addr, 26, 6, 0x0005);
-       xlr_phy_write(priv->serdes_addr, 26, 7, 0x0001);
-       xlr_phy_write(priv->serdes_addr, 26, 8, 0x0000);
-       xlr_phy_write(priv->serdes_addr, 26, 9, 0x0000);
-       xlr_phy_write(priv->serdes_addr, 26, 10, 0x0000);
-
-       /* program  GPIO values for serdes init parameters */
-       xlr_nae_wreg(priv->gpio_addr, 0x20, 0x7e6802);
-       xlr_nae_wreg(priv->gpio_addr, 0x10, 0x7104);
-
-       xlr_nae_wreg(priv->gpio_addr, 0x22, 0x7e6802);
-       xlr_nae_wreg(priv->gpio_addr, 0x21, 0x7104);
-
-       /* enable autoneg - more magic */
-       phy = priv->phy_addr % 4 + 27;
-       xlr_phy_write(priv->pcs_addr, phy, 0, 0x1000);
-       xlr_phy_write(priv->pcs_addr, phy, 0, 0x0200);
-}
-
-void xlr_set_gmac_speed(struct xlr_net_priv *priv)
-{
-       struct phy_device *phydev = xlr_get_phydev(priv);
-       int speed;
-
-       if (phydev->interface == PHY_INTERFACE_MODE_SGMII)
-               xlr_sgmii_init(priv);
-
-       if (phydev->speed != priv->phy_speed) {
-               speed = phydev->speed;
-               if (speed == SPEED_1000) {
-                       /* Set interface to Byte mode */
-                       xlr_nae_wreg(priv->base_addr, R_MAC_CONFIG_2, 0x7217);
-                       priv->phy_speed = speed;
-               } else if (speed == SPEED_100 || speed == SPEED_10) {
-                       /* Set interface to Nibble mode */
-                       xlr_nae_wreg(priv->base_addr, R_MAC_CONFIG_2, 0x7117);
-                       priv->phy_speed = speed;
-               }
-               /* Set SGMII speed in Interface control reg */
-               if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
-                       if (speed == SPEED_10)
-                               xlr_nae_wreg(priv->base_addr,
-                                            R_INTERFACE_CONTROL,
-                                            SGMII_SPEED_10);
-                       if (speed == SPEED_100)
-                               xlr_nae_wreg(priv->base_addr,
-                                            R_INTERFACE_CONTROL,
-                                            SGMII_SPEED_100);
-                       if (speed == SPEED_1000)
-                               xlr_nae_wreg(priv->base_addr,
-                                            R_INTERFACE_CONTROL,
-                                            SGMII_SPEED_1000);
-               }
-               if (speed == SPEED_10)
-                       xlr_nae_wreg(priv->base_addr, R_CORECONTROL, 0x2);
-               if (speed == SPEED_100)
-                       xlr_nae_wreg(priv->base_addr, R_CORECONTROL, 0x1);
-               if (speed == SPEED_1000)
-                       xlr_nae_wreg(priv->base_addr, R_CORECONTROL, 0x0);
-       }
-       pr_info("gmac%d : %dMbps\n", priv->port_id, priv->phy_speed);
-}
-
-static void xlr_gmac_link_adjust(struct net_device *ndev)
-{
-       struct xlr_net_priv *priv = netdev_priv(ndev);
-       struct phy_device *phydev = xlr_get_phydev(priv);
-       u32 intreg;
-
-       intreg = xlr_nae_rdreg(priv->base_addr, R_INTREG);
-       if (phydev->link) {
-               if (phydev->speed != priv->phy_speed) {
-                       xlr_set_gmac_speed(priv);
-                       pr_info("gmac%d : Link up\n", priv->port_id);
-               }
-       } else {
-               xlr_set_gmac_speed(priv);
-               pr_info("gmac%d : Link down\n", priv->port_id);
-       }
-}
-
-static int xlr_mii_probe(struct xlr_net_priv *priv)
-{
-       struct phy_device *phydev = xlr_get_phydev(priv);
-
-       if (!phydev) {
-               pr_err("no PHY found on phy_addr %d\n", priv->phy_addr);
-               return -ENODEV;
-       }
-
-       /* Attach MAC to PHY */
-       phydev = phy_connect(priv->ndev, phydev_name(phydev),
-                            xlr_gmac_link_adjust, priv->nd->phy_interface);
-
-       if (IS_ERR(phydev)) {
-               pr_err("could not attach PHY\n");
-               return PTR_ERR(phydev);
-       }
-       phydev->supported &= (ADVERTISED_10baseT_Full
-                               | ADVERTISED_10baseT_Half
-                               | ADVERTISED_100baseT_Full
-                               | ADVERTISED_100baseT_Half
-                               | ADVERTISED_1000baseT_Full
-                               | ADVERTISED_Autoneg
-                               | ADVERTISED_MII);
-
-       phydev->advertising = phydev->supported;
-       phy_attached_info(phydev);
-       return 0;
-}
-
-static int xlr_setup_mdio(struct xlr_net_priv *priv,
-                         struct platform_device *pdev)
-{
-       int err;
-
-       priv->mii_bus = mdiobus_alloc();
-       if (!priv->mii_bus) {
-               pr_err("mdiobus alloc failed\n");
-               return -ENOMEM;
-       }
-
-       priv->mii_bus->priv = priv;
-       priv->mii_bus->name = "xlr-mdio";
-       snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d",
-                priv->mii_bus->name, priv->port_id);
-       priv->mii_bus->read = xlr_mii_read;
-       priv->mii_bus->write = xlr_mii_write;
-       priv->mii_bus->parent = &pdev->dev;
-
-       /* Scan only the enabled address */
-       priv->mii_bus->phy_mask = ~(1 << priv->phy_addr);
-
-       /* setting clock divisor to 54 */
-       xlr_nae_wreg(priv->base_addr, R_MII_MGMT_CONFIG, 0x7);
-
-       err = mdiobus_register(priv->mii_bus);
-       if (err) {
-               mdiobus_free(priv->mii_bus);
-               pr_err("mdio bus registration failed\n");
-               return err;
-       }
-
-       pr_info("Registered mdio bus id : %s\n", priv->mii_bus->id);
-       err = xlr_mii_probe(priv);
-       if (err) {
-               mdiobus_free(priv->mii_bus);
-               return err;
-       }
-       return 0;
-}
-
-static void xlr_port_enable(struct xlr_net_priv *priv)
-{
-       u32 prid = (read_c0_prid() & 0xf000);
-
-       /* Setup MAC_CONFIG reg if (xls & rgmii) */
-       if ((prid == 0x8000 || prid == 0x4000 || prid == 0xc000) &&
-           priv->nd->phy_interface == PHY_INTERFACE_MODE_RGMII)
-               xlr_reg_update(priv->base_addr, R_RX_CONTROL,
-                              (1 << O_RX_CONTROL__RGMII),
-                              (1 << O_RX_CONTROL__RGMII));
-
-       /* Rx Tx enable */
-       xlr_reg_update(priv->base_addr, R_MAC_CONFIG_1,
-                      ((1 << O_MAC_CONFIG_1__rxen) |
-                       (1 << O_MAC_CONFIG_1__txen) |
-                       (1 << O_MAC_CONFIG_1__rxfc) |
-                       (1 << O_MAC_CONFIG_1__txfc)),
-                      ((1 << O_MAC_CONFIG_1__rxen) |
-                       (1 << O_MAC_CONFIG_1__txen) |
-                       (1 << O_MAC_CONFIG_1__rxfc) |
-                       (1 << O_MAC_CONFIG_1__txfc)));
-
-       /* Setup tx control reg */
-       xlr_reg_update(priv->base_addr, R_TX_CONTROL,
-                      ((1 << O_TX_CONTROL__TXENABLE) |
-                      (512 << O_TX_CONTROL__TXTHRESHOLD)), 0x3fff);
-
-       /* Setup rx control reg */
-       xlr_reg_update(priv->base_addr, R_RX_CONTROL,
-                      1 << O_RX_CONTROL__RXENABLE,
-                      1 << O_RX_CONTROL__RXENABLE);
-}
-
-static void xlr_port_disable(struct xlr_net_priv *priv)
-{
-       /* Setup MAC_CONFIG reg */
-       /* Rx Tx disable*/
-       xlr_reg_update(priv->base_addr, R_MAC_CONFIG_1,
-                      ((1 << O_MAC_CONFIG_1__rxen) |
-                       (1 << O_MAC_CONFIG_1__txen) |
-                       (1 << O_MAC_CONFIG_1__rxfc) |
-                       (1 << O_MAC_CONFIG_1__txfc)), 0x0);
-
-       /* Setup tx control reg */
-       xlr_reg_update(priv->base_addr, R_TX_CONTROL,
-                      ((1 << O_TX_CONTROL__TXENABLE) |
-                      (512 << O_TX_CONTROL__TXTHRESHOLD)), 0);
-
-       /* Setup rx control reg */
-       xlr_reg_update(priv->base_addr, R_RX_CONTROL,
-                      1 << O_RX_CONTROL__RXENABLE, 0);
-}
-
-/*
- * Initialization of gmac
- */
-static int xlr_gmac_init(struct xlr_net_priv *priv,
-                        struct platform_device *pdev)
-{
-       int ret;
-
-       pr_info("Initializing the gmac%d\n", priv->port_id);
-
-       xlr_port_disable(priv);
-
-       xlr_nae_wreg(priv->base_addr, R_DESC_PACK_CTRL,
-                    (1 << O_DESC_PACK_CTRL__MAXENTRY) |
-                    (BYTE_OFFSET << O_DESC_PACK_CTRL__BYTEOFFSET) |
-                    (1600 << O_DESC_PACK_CTRL__REGULARSIZE));
-
-       ret = xlr_setup_mdio(priv, pdev);
-       if (ret)
-               return ret;
-       xlr_port_enable(priv);
-
-       /* Enable Full-duplex/1000Mbps/CRC */
-       xlr_nae_wreg(priv->base_addr, R_MAC_CONFIG_2, 0x7217);
-       /* speed 2.5Mhz */
-       xlr_nae_wreg(priv->base_addr, R_CORECONTROL, 0x02);
-       /* Setup Interrupt mask reg */
-       xlr_nae_wreg(priv->base_addr, R_INTMASK, (1 << O_INTMASK__TXILLEGAL) |
-                    (1 << O_INTMASK__MDINT) | (1 << O_INTMASK__TXFETCHERROR) |
-                    (1 << O_INTMASK__P2PSPILLECC) | (1 << O_INTMASK__TAGFULL) |
-                    (1 << O_INTMASK__UNDERRUN) | (1 << O_INTMASK__ABORT));
-
-       /* Clear all stats */
-       xlr_reg_update(priv->base_addr, R_STATCTRL, 0, 1 << O_STATCTRL__CLRCNT);
-       xlr_reg_update(priv->base_addr, R_STATCTRL, 1 << 2, 1 << 2);
-       return 0;
-}
-
-static int xlr_net_probe(struct platform_device *pdev)
-{
-       struct xlr_net_priv *priv = NULL;
-       struct net_device *ndev;
-       struct resource *res;
-       struct xlr_adapter *adapter;
-       int err, port;
-
-       pr_info("XLR/XLS Ethernet Driver controller %d\n", pdev->id);
-       /*
-        * Allocate our adapter data structure and attach it to the device.
-        */
-       adapter = devm_kzalloc(&pdev->dev, sizeof(*adapter), GFP_KERNEL);
-       if (!adapter)
-               return -ENOMEM;
-
-       /*
-        * XLR and XLS have 1 and 2 NAE controller respectively
-        * Each controller has 4 gmac ports, mapping each controller
-        * under one parent device, 4 gmac ports under one device.
-        */
-       for (port = 0; port < pdev->num_resources / 2; port++) {
-               ndev = alloc_etherdev_mq(sizeof(struct xlr_net_priv), 32);
-               if (!ndev) {
-                       dev_err(&pdev->dev,
-                               "Allocation of Ethernet device failed\n");
-                       return -ENOMEM;
-               }
-
-               priv = netdev_priv(ndev);
-               priv->pdev = pdev;
-               priv->ndev = ndev;
-               priv->port_id = (pdev->id * 4) + port;
-               priv->nd = (struct xlr_net_data *)pdev->dev.platform_data;
-               priv->base_addr = devm_platform_ioremap_resource(pdev, port);
-               if (IS_ERR(priv->base_addr)) {
-                       err = PTR_ERR(priv->base_addr);
-                       goto err_gmac;
-               }
-               priv->adapter = adapter;
-               adapter->netdev[port] = ndev;
-
-               res = platform_get_resource(pdev, IORESOURCE_IRQ, port);
-               if (!res) {
-                       dev_err(&pdev->dev, "No irq resource for MAC %d\n",
-                               priv->port_id);
-                       err = -ENODEV;
-                       goto err_gmac;
-               }
-
-               ndev->irq = res->start;
-
-               priv->phy_addr = priv->nd->phy_addr[port];
-               priv->tx_stnid = priv->nd->tx_stnid[port];
-               priv->mii_addr = priv->nd->mii_addr;
-               priv->serdes_addr = priv->nd->serdes_addr;
-               priv->pcs_addr = priv->nd->pcs_addr;
-               priv->gpio_addr = priv->nd->gpio_addr;
-
-               ndev->netdev_ops = &xlr_netdev_ops;
-               ndev->watchdog_timeo = HZ;
-
-               /* Setup Mac address and Rx mode */
-               eth_hw_addr_random(ndev);
-               xlr_hw_set_mac_addr(ndev);
-               xlr_set_rx_mode(ndev);
-
-               priv->num_rx_desc += MAX_NUM_DESC_SPILL;
-               ndev->ethtool_ops = &xlr_ethtool_ops;
-               SET_NETDEV_DEV(ndev, &pdev->dev);
-
-               xlr_config_fifo_spill_area(priv);
-               /* Configure PDE to Round-Robin pkt distribution */
-               xlr_config_pde(priv);
-               xlr_config_parser(priv);
-
-               /* Call init with respect to port */
-               if (strcmp(res->name, "gmac") == 0) {
-                       err = xlr_gmac_init(priv, pdev);
-                       if (err) {
-                               dev_err(&pdev->dev, "gmac%d init failed\n",
-                                       priv->port_id);
-                               goto err_gmac;
-                       }
-               }
-
-               if (priv->port_id == 0 || priv->port_id == 4) {
-                       err = xlr_config_common(priv);
-                       if (err)
-                               goto err_netdev;
-               }
-
-               err = register_netdev(ndev);
-               if (err) {
-                       dev_err(&pdev->dev,
-                               "Registering netdev failed for gmac%d\n",
-                               priv->port_id);
-                       goto err_netdev;
-               }
-               platform_set_drvdata(pdev, priv);
-       }
-
-       return 0;
-
-err_netdev:
-       mdiobus_free(priv->mii_bus);
-err_gmac:
-       free_netdev(ndev);
-       return err;
-}
-
-static int xlr_net_remove(struct platform_device *pdev)
-{
-       struct xlr_net_priv *priv = platform_get_drvdata(pdev);
-
-       unregister_netdev(priv->ndev);
-       mdiobus_unregister(priv->mii_bus);
-       mdiobus_free(priv->mii_bus);
-       free_netdev(priv->ndev);
-       return 0;
-}
-
-static struct platform_driver xlr_net_driver = {
-       .probe          = xlr_net_probe,
-       .remove         = xlr_net_remove,
-       .driver         = {
-               .name   = "xlr-net",
-       },
-};
-
-module_platform_driver(xlr_net_driver);
-
-MODULE_AUTHOR("Ganesan Ramalingam <ganesanr@broadcom.com>");
-MODULE_DESCRIPTION("Ethernet driver for Netlogic XLR/XLS");
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_ALIAS("platform:xlr-net");
diff --git a/drivers/staging/netlogic/xlr_net.h b/drivers/staging/netlogic/xlr_net.h
deleted file mode 100644 (file)
index 8365b74..0000000
+++ /dev/null
@@ -1,1079 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
-/*
- * Copyright (c) 2003-2012 Broadcom Corporation
- * All Rights Reserved
- */
-
-/* #define MAC_SPLIT_MODE */
-
-#define MAC_SPACING                 0x400
-#define XGMAC_SPACING               0x400
-
-/* PE-MCXMAC register and bit field definitions */
-#define R_MAC_CONFIG_1                                              0x00
-#define   O_MAC_CONFIG_1__srst                                      31
-#define   O_MAC_CONFIG_1__simr                                      30
-#define   O_MAC_CONFIG_1__hrrmc                                     18
-#define   W_MAC_CONFIG_1__hrtmc                                      2
-#define   O_MAC_CONFIG_1__hrrfn                                     16
-#define   W_MAC_CONFIG_1__hrtfn                                      2
-#define   O_MAC_CONFIG_1__intlb                                      8
-#define   O_MAC_CONFIG_1__rxfc                                       5
-#define   O_MAC_CONFIG_1__txfc                                       4
-#define   O_MAC_CONFIG_1__srxen                                      3
-#define   O_MAC_CONFIG_1__rxen                                       2
-#define   O_MAC_CONFIG_1__stxen                                      1
-#define   O_MAC_CONFIG_1__txen                                       0
-#define R_MAC_CONFIG_2                                              0x01
-#define   O_MAC_CONFIG_2__prlen                                     12
-#define   W_MAC_CONFIG_2__prlen                                      4
-#define   O_MAC_CONFIG_2__speed                                      8
-#define   W_MAC_CONFIG_2__speed                                      2
-#define   O_MAC_CONFIG_2__hugen                                      5
-#define   O_MAC_CONFIG_2__flchk                                      4
-#define   O_MAC_CONFIG_2__crce                                       1
-#define   O_MAC_CONFIG_2__fulld                                      0
-#define R_IPG_IFG                                                   0x02
-#define   O_IPG_IFG__ipgr1                                          24
-#define   W_IPG_IFG__ipgr1                                           7
-#define   O_IPG_IFG__ipgr2                                          16
-#define   W_IPG_IFG__ipgr2                                           7
-#define   O_IPG_IFG__mifg                                            8
-#define   W_IPG_IFG__mifg                                            8
-#define   O_IPG_IFG__ipgt                                            0
-#define   W_IPG_IFG__ipgt                                            7
-#define R_HALF_DUPLEX                                               0x03
-#define   O_HALF_DUPLEX__abebt                                      24
-#define   W_HALF_DUPLEX__abebt                                       4
-#define   O_HALF_DUPLEX__abebe                                      19
-#define   O_HALF_DUPLEX__bpnb                                       18
-#define   O_HALF_DUPLEX__nobo                                       17
-#define   O_HALF_DUPLEX__edxsdfr                                    16
-#define   O_HALF_DUPLEX__retry                                      12
-#define   W_HALF_DUPLEX__retry                                       4
-#define   O_HALF_DUPLEX__lcol                                        0
-#define   W_HALF_DUPLEX__lcol                                       10
-#define R_MAXIMUM_FRAME_LENGTH                                      0x04
-#define   O_MAXIMUM_FRAME_LENGTH__maxf                               0
-#define   W_MAXIMUM_FRAME_LENGTH__maxf                              16
-#define R_TEST                                                      0x07
-#define   O_TEST__mbof                                               3
-#define   O_TEST__rthdf                                              2
-#define   O_TEST__tpause                                             1
-#define   O_TEST__sstct                                              0
-#define R_MII_MGMT_CONFIG                                           0x08
-#define   O_MII_MGMT_CONFIG__scinc                                   5
-#define   O_MII_MGMT_CONFIG__spre                                    4
-#define   O_MII_MGMT_CONFIG__clks                                    3
-#define   W_MII_MGMT_CONFIG__clks                                    3
-#define R_MII_MGMT_COMMAND                                          0x09
-#define   O_MII_MGMT_COMMAND__scan                                   1
-#define   O_MII_MGMT_COMMAND__rstat                                  0
-#define R_MII_MGMT_ADDRESS                                          0x0A
-#define   O_MII_MGMT_ADDRESS__fiad                                   8
-#define   W_MII_MGMT_ADDRESS__fiad                                   5
-#define   O_MII_MGMT_ADDRESS__fgad                                   5
-#define   W_MII_MGMT_ADDRESS__fgad                                   0
-#define R_MII_MGMT_WRITE_DATA                                       0x0B
-#define   O_MII_MGMT_WRITE_DATA__ctld                                0
-#define   W_MII_MGMT_WRITE_DATA__ctld                               16
-#define R_MII_MGMT_STATUS                                           0x0C
-#define R_MII_MGMT_INDICATORS                                       0x0D
-#define   O_MII_MGMT_INDICATORS__nvalid                              2
-#define   O_MII_MGMT_INDICATORS__scan                                1
-#define   O_MII_MGMT_INDICATORS__busy                                0
-#define R_INTERFACE_CONTROL                                         0x0E
-#define   O_INTERFACE_CONTROL__hrstint                              31
-#define   O_INTERFACE_CONTROL__tbimode                              27
-#define   O_INTERFACE_CONTROL__ghdmode                              26
-#define   O_INTERFACE_CONTROL__lhdmode                              25
-#define   O_INTERFACE_CONTROL__phymod                               24
-#define   O_INTERFACE_CONTROL__hrrmi                                23
-#define   O_INTERFACE_CONTROL__rspd                                 16
-#define   O_INTERFACE_CONTROL__hr100                                15
-#define   O_INTERFACE_CONTROL__frcq                                 10
-#define   O_INTERFACE_CONTROL__nocfr                                 9
-#define   O_INTERFACE_CONTROL__dlfct                                 8
-#define   O_INTERFACE_CONTROL__enjab                                 0
-#define R_INTERFACE_STATUS                                         0x0F
-#define   O_INTERFACE_STATUS__xsdfr                                  9
-#define   O_INTERFACE_STATUS__ssrr                                   8
-#define   W_INTERFACE_STATUS__ssrr                                   5
-#define   O_INTERFACE_STATUS__miilf                                  3
-#define   O_INTERFACE_STATUS__locar                                  2
-#define   O_INTERFACE_STATUS__sqerr                                  1
-#define   O_INTERFACE_STATUS__jabber                                 0
-#define R_STATION_ADDRESS_LS                                       0x10
-#define R_STATION_ADDRESS_MS                                       0x11
-
-/* A-XGMAC register and bit field definitions */
-#define R_XGMAC_CONFIG_0    0x00
-#define   O_XGMAC_CONFIG_0__hstmacrst               31
-#define   O_XGMAC_CONFIG_0__hstrstrctl              23
-#define   O_XGMAC_CONFIG_0__hstrstrfn               22
-#define   O_XGMAC_CONFIG_0__hstrsttctl              18
-#define   O_XGMAC_CONFIG_0__hstrsttfn               17
-#define   O_XGMAC_CONFIG_0__hstrstmiim              16
-#define   O_XGMAC_CONFIG_0__hstloopback             8
-#define R_XGMAC_CONFIG_1    0x01
-#define   O_XGMAC_CONFIG_1__hsttctlen               31
-#define   O_XGMAC_CONFIG_1__hsttfen                 30
-#define   O_XGMAC_CONFIG_1__hstrctlen               29
-#define   O_XGMAC_CONFIG_1__hstrfen                 28
-#define   O_XGMAC_CONFIG_1__tfen                    26
-#define   O_XGMAC_CONFIG_1__rfen                    24
-#define   O_XGMAC_CONFIG_1__hstrctlshrtp            12
-#define   O_XGMAC_CONFIG_1__hstdlyfcstx             10
-#define   W_XGMAC_CONFIG_1__hstdlyfcstx              2
-#define   O_XGMAC_CONFIG_1__hstdlyfcsrx              8
-#define   W_XGMAC_CONFIG_1__hstdlyfcsrx              2
-#define   O_XGMAC_CONFIG_1__hstppen                  7
-#define   O_XGMAC_CONFIG_1__hstbytswp                6
-#define   O_XGMAC_CONFIG_1__hstdrplt64               5
-#define   O_XGMAC_CONFIG_1__hstprmscrx               4
-#define   O_XGMAC_CONFIG_1__hstlenchk                3
-#define   O_XGMAC_CONFIG_1__hstgenfcs                2
-#define   O_XGMAC_CONFIG_1__hstpadmode               0
-#define   W_XGMAC_CONFIG_1__hstpadmode               2
-#define R_XGMAC_CONFIG_2    0x02
-#define   O_XGMAC_CONFIG_2__hsttctlfrcp             31
-#define   O_XGMAC_CONFIG_2__hstmlnkflth             27
-#define   O_XGMAC_CONFIG_2__hstalnkflth             26
-#define   O_XGMAC_CONFIG_2__rflnkflt                24
-#define   W_XGMAC_CONFIG_2__rflnkflt                 2
-#define   O_XGMAC_CONFIG_2__hstipgextmod            16
-#define   W_XGMAC_CONFIG_2__hstipgextmod             5
-#define   O_XGMAC_CONFIG_2__hstrctlfrcp             15
-#define   O_XGMAC_CONFIG_2__hstipgexten              5
-#define   O_XGMAC_CONFIG_2__hstmipgext               0
-#define   W_XGMAC_CONFIG_2__hstmipgext               5
-#define R_XGMAC_CONFIG_3    0x03
-#define   O_XGMAC_CONFIG_3__hstfltrfrm              31
-#define   W_XGMAC_CONFIG_3__hstfltrfrm              16
-#define   O_XGMAC_CONFIG_3__hstfltrfrmdc            15
-#define   W_XGMAC_CONFIG_3__hstfltrfrmdc            16
-#define R_XGMAC_STATION_ADDRESS_LS      0x04
-#define   O_XGMAC_STATION_ADDRESS_LS__hstmacadr0    0
-#define   W_XGMAC_STATION_ADDRESS_LS__hstmacadr0    32
-#define R_XGMAC_STATION_ADDRESS_MS      0x05
-#define R_XGMAC_MAX_FRAME_LEN           0x08
-#define   O_XGMAC_MAX_FRAME_LEN__hstmxfrmwctx       16
-#define   W_XGMAC_MAX_FRAME_LEN__hstmxfrmwctx       14
-#define   O_XGMAC_MAX_FRAME_LEN__hstmxfrmbcrx        0
-#define   W_XGMAC_MAX_FRAME_LEN__hstmxfrmbcrx       16
-#define R_XGMAC_REV_LEVEL               0x0B
-#define   O_XGMAC_REV_LEVEL__revlvl                  0
-#define   W_XGMAC_REV_LEVEL__revlvl                 15
-#define R_XGMAC_MIIM_COMMAND            0x10
-#define   O_XGMAC_MIIM_COMMAND__hstldcmd             3
-#define   O_XGMAC_MIIM_COMMAND__hstmiimcmd           0
-#define   W_XGMAC_MIIM_COMMAND__hstmiimcmd           3
-#define R_XGMAC_MIIM_FILED              0x11
-#define   O_XGMAC_MIIM_FILED__hststfield            30
-#define   W_XGMAC_MIIM_FILED__hststfield             2
-#define   O_XGMAC_MIIM_FILED__hstopfield            28
-#define   W_XGMAC_MIIM_FILED__hstopfield             2
-#define   O_XGMAC_MIIM_FILED__hstphyadx             23
-#define   W_XGMAC_MIIM_FILED__hstphyadx              5
-#define   O_XGMAC_MIIM_FILED__hstregadx             18
-#define   W_XGMAC_MIIM_FILED__hstregadx              5
-#define   O_XGMAC_MIIM_FILED__hsttafield            16
-#define   W_XGMAC_MIIM_FILED__hsttafield             2
-#define   O_XGMAC_MIIM_FILED__miimrddat              0
-#define   W_XGMAC_MIIM_FILED__miimrddat             16
-#define R_XGMAC_MIIM_CONFIG             0x12
-#define   O_XGMAC_MIIM_CONFIG__hstnopram             7
-#define   O_XGMAC_MIIM_CONFIG__hstclkdiv             0
-#define   W_XGMAC_MIIM_CONFIG__hstclkdiv             7
-#define R_XGMAC_MIIM_LINK_FAIL_VECTOR   0x13
-#define   O_XGMAC_MIIM_LINK_FAIL_VECTOR__miimlfvec   0
-#define   W_XGMAC_MIIM_LINK_FAIL_VECTOR__miimlfvec  32
-#define R_XGMAC_MIIM_INDICATOR          0x14
-#define   O_XGMAC_MIIM_INDICATOR__miimphylf          4
-#define   O_XGMAC_MIIM_INDICATOR__miimmoncplt        3
-#define   O_XGMAC_MIIM_INDICATOR__miimmonvld         2
-#define   O_XGMAC_MIIM_INDICATOR__miimmon            1
-#define   O_XGMAC_MIIM_INDICATOR__miimbusy           0
-
-/* GMAC stats registers */
-#define R_RBYT                                                     0x27
-#define R_RPKT                                                     0x28
-#define R_RFCS                                                     0x29
-#define R_RMCA                                                     0x2A
-#define R_RBCA                                                     0x2B
-#define R_RXCF                                                     0x2C
-#define R_RXPF                                                     0x2D
-#define R_RXUO                                                     0x2E
-#define R_RALN                                                     0x2F
-#define R_RFLR                                                     0x30
-#define R_RCDE                                                     0x31
-#define R_RCSE                                                     0x32
-#define R_RUND                                                     0x33
-#define R_ROVR                                                     0x34
-#define R_TBYT                                                     0x38
-#define R_TPKT                                                     0x39
-#define R_TMCA                                                     0x3A
-#define R_TBCA                                                     0x3B
-#define R_TXPF                                                     0x3C
-#define R_TDFR                                                     0x3D
-#define R_TEDF                                                     0x3E
-#define R_TSCL                                                     0x3F
-#define R_TMCL                                                     0x40
-#define R_TLCL                                                     0x41
-#define R_TXCL                                                     0x42
-#define R_TNCL                                                     0x43
-#define R_TJBR                                                     0x46
-#define R_TFCS                                                     0x47
-#define R_TXCF                                                     0x48
-#define R_TOVR                                                     0x49
-#define R_TUND                                                     0x4A
-#define R_TFRG                                                     0x4B
-
-/* Glue logic register and bit field definitions */
-#define R_MAC_ADDR0                                                 0x50
-#define R_MAC_ADDR1                                                 0x52
-#define R_MAC_ADDR2                                                 0x54
-#define R_MAC_ADDR3                                                 0x56
-#define R_MAC_ADDR_MASK2                                            0x58
-#define R_MAC_ADDR_MASK3                                            0x5A
-#define R_MAC_FILTER_CONFIG                                         0x5C
-#define   O_MAC_FILTER_CONFIG__BROADCAST_EN                         10
-#define   O_MAC_FILTER_CONFIG__PAUSE_FRAME_EN                       9
-#define   O_MAC_FILTER_CONFIG__ALL_MCAST_EN                         8
-#define   O_MAC_FILTER_CONFIG__ALL_UCAST_EN                         7
-#define   O_MAC_FILTER_CONFIG__HASH_MCAST_EN                        6
-#define   O_MAC_FILTER_CONFIG__HASH_UCAST_EN                        5
-#define   O_MAC_FILTER_CONFIG__ADDR_MATCH_DISC                      4
-#define   O_MAC_FILTER_CONFIG__MAC_ADDR3_VALID                      3
-#define   O_MAC_FILTER_CONFIG__MAC_ADDR2_VALID                      2
-#define   O_MAC_FILTER_CONFIG__MAC_ADDR1_VALID                      1
-#define   O_MAC_FILTER_CONFIG__MAC_ADDR0_VALID                      0
-#define R_HASH_TABLE_VECTOR                                         0x30
-#define R_TX_CONTROL                                                 0x0A0
-#define   O_TX_CONTROL__TX15HALT                                     31
-#define   O_TX_CONTROL__TX14HALT                                     30
-#define   O_TX_CONTROL__TX13HALT                                     29
-#define   O_TX_CONTROL__TX12HALT                                     28
-#define   O_TX_CONTROL__TX11HALT                                     27
-#define   O_TX_CONTROL__TX10HALT                                     26
-#define   O_TX_CONTROL__TX9HALT                                      25
-#define   O_TX_CONTROL__TX8HALT                                      24
-#define   O_TX_CONTROL__TX7HALT                                      23
-#define   O_TX_CONTROL__TX6HALT                                      22
-#define   O_TX_CONTROL__TX5HALT                                      21
-#define   O_TX_CONTROL__TX4HALT                                      20
-#define   O_TX_CONTROL__TX3HALT                                      19
-#define   O_TX_CONTROL__TX2HALT                                      18
-#define   O_TX_CONTROL__TX1HALT                                      17
-#define   O_TX_CONTROL__TX0HALT                                      16
-#define   O_TX_CONTROL__TXIDLE                                       15
-#define   O_TX_CONTROL__TXENABLE                                     14
-#define   O_TX_CONTROL__TXTHRESHOLD                                  0
-#define   W_TX_CONTROL__TXTHRESHOLD                                  14
-#define R_RX_CONTROL                                                 0x0A1
-#define   O_RX_CONTROL__RGMII                                        10
-#define   O_RX_CONTROL__SOFTRESET                                   2
-#define   O_RX_CONTROL__RXHALT                                       1
-#define   O_RX_CONTROL__RXENABLE                                     0
-#define R_DESC_PACK_CTRL                                            0x0A2
-#define   O_DESC_PACK_CTRL__BYTEOFFSET                              17
-#define   W_DESC_PACK_CTRL__BYTEOFFSET                              3
-#define   O_DESC_PACK_CTRL__PREPADENABLE                            16
-#define   O_DESC_PACK_CTRL__MAXENTRY                                14
-#define   W_DESC_PACK_CTRL__MAXENTRY                                2
-#define   O_DESC_PACK_CTRL__REGULARSIZE                             0
-#define   W_DESC_PACK_CTRL__REGULARSIZE                             14
-#define R_STATCTRL                                                  0x0A3
-#define   O_STATCTRL__OVERFLOWEN                                    4
-#define   O_STATCTRL__GIG                                           3
-#define   O_STATCTRL__STEN                                          2
-#define   O_STATCTRL__CLRCNT                                        1
-#define   O_STATCTRL__AUTOZ                                         0
-#define R_L2ALLOCCTRL                                               0x0A4
-#define   O_L2ALLOCCTRL__TXL2ALLOCATE                               9
-#define   W_L2ALLOCCTRL__TXL2ALLOCATE                               9
-#define   O_L2ALLOCCTRL__RXL2ALLOCATE                               0
-#define   W_L2ALLOCCTRL__RXL2ALLOCATE                               9
-#define R_INTMASK                                                   0x0A5
-#define   O_INTMASK__SPI4TXERROR                                     28
-#define   O_INTMASK__SPI4RXERROR                                     27
-#define   O_INTMASK__RGMIIHALFDUPCOLLISION                           27
-#define   O_INTMASK__ABORT                                           26
-#define   O_INTMASK__UNDERRUN                                        25
-#define   O_INTMASK__DISCARDPACKET                                   24
-#define   O_INTMASK__ASYNCFIFOFULL                                   23
-#define   O_INTMASK__TAGFULL                                         22
-#define   O_INTMASK__CLASS3FULL                                      21
-#define   O_INTMASK__C3EARLYFULL                                     20
-#define   O_INTMASK__CLASS2FULL                                      19
-#define   O_INTMASK__C2EARLYFULL                                     18
-#define   O_INTMASK__CLASS1FULL                                      17
-#define   O_INTMASK__C1EARLYFULL                                     16
-#define   O_INTMASK__CLASS0FULL                                      15
-#define   O_INTMASK__C0EARLYFULL                                     14
-#define   O_INTMASK__RXDATAFULL                                      13
-#define   O_INTMASK__RXEARLYFULL                                     12
-#define   O_INTMASK__RFREEEMPTY                                      9
-#define   O_INTMASK__RFEARLYEMPTY                                    8
-#define   O_INTMASK__P2PSPILLECC                                     7
-#define   O_INTMASK__FREEDESCFULL                                    5
-#define   O_INTMASK__FREEEARLYFULL                                   4
-#define   O_INTMASK__TXFETCHERROR                                    3
-#define   O_INTMASK__STATCARRY                                       2
-#define   O_INTMASK__MDINT                                           1
-#define   O_INTMASK__TXILLEGAL                                       0
-#define R_INTREG                                                    0x0A6
-#define   O_INTREG__SPI4TXERROR                                     28
-#define   O_INTREG__SPI4RXERROR                                     27
-#define   O_INTREG__RGMIIHALFDUPCOLLISION                           27
-#define   O_INTREG__ABORT                                           26
-#define   O_INTREG__UNDERRUN                                        25
-#define   O_INTREG__DISCARDPACKET                                   24
-#define   O_INTREG__ASYNCFIFOFULL                                   23
-#define   O_INTREG__TAGFULL                                         22
-#define   O_INTREG__CLASS3FULL                                      21
-#define   O_INTREG__C3EARLYFULL                                     20
-#define   O_INTREG__CLASS2FULL                                      19
-#define   O_INTREG__C2EARLYFULL                                     18
-#define   O_INTREG__CLASS1FULL                                      17
-#define   O_INTREG__C1EARLYFULL                                     16
-#define   O_INTREG__CLASS0FULL                                      15
-#define   O_INTREG__C0EARLYFULL                                     14
-#define   O_INTREG__RXDATAFULL                                      13
-#define   O_INTREG__RXEARLYFULL                                     12
-#define   O_INTREG__RFREEEMPTY                                      9
-#define   O_INTREG__RFEARLYEMPTY                                    8
-#define   O_INTREG__P2PSPILLECC                                     7
-#define   O_INTREG__FREEDESCFULL                                    5
-#define   O_INTREG__FREEEARLYFULL                                   4
-#define   O_INTREG__TXFETCHERROR                                    3
-#define   O_INTREG__STATCARRY                                       2
-#define   O_INTREG__MDINT                                           1
-#define   O_INTREG__TXILLEGAL                                       0
-#define R_TXRETRY                                                   0x0A7
-#define   O_TXRETRY__COLLISIONRETRY                                 6
-#define   O_TXRETRY__BUSERRORRETRY                                  5
-#define   O_TXRETRY__UNDERRUNRETRY                                  4
-#define   O_TXRETRY__RETRIES                                        0
-#define   W_TXRETRY__RETRIES                                        4
-#define R_CORECONTROL                                               0x0A8
-#define   O_CORECONTROL__ERRORTHREAD                                4
-#define   W_CORECONTROL__ERRORTHREAD                                7
-#define   O_CORECONTROL__SHUTDOWN                                   2
-#define   O_CORECONTROL__SPEED                                      0
-#define   W_CORECONTROL__SPEED                                      2
-#define R_BYTEOFFSET0                                               0x0A9
-#define R_BYTEOFFSET1                                               0x0AA
-#define R_L2TYPE_0                                                  0x0F0
-#define   O_L2TYPE__EXTRAHDRPROTOSIZE                               26
-#define   W_L2TYPE__EXTRAHDRPROTOSIZE                               5
-#define   O_L2TYPE__EXTRAHDRPROTOOFFSET                             20
-#define   W_L2TYPE__EXTRAHDRPROTOOFFSET                             6
-#define   O_L2TYPE__EXTRAHEADERSIZE                                 14
-#define   W_L2TYPE__EXTRAHEADERSIZE                                 6
-#define   O_L2TYPE__PROTOOFFSET                                     8
-#define   W_L2TYPE__PROTOOFFSET                                     6
-#define   O_L2TYPE__L2HDROFFSET                                     2
-#define   W_L2TYPE__L2HDROFFSET                                     6
-#define   O_L2TYPE__L2PROTO                                         0
-#define   W_L2TYPE__L2PROTO                                         2
-#define R_L2TYPE_1                                                  0xF0
-#define R_L2TYPE_2                                                  0xF0
-#define R_L2TYPE_3                                                  0xF0
-#define R_PARSERCONFIGREG                                           0x100
-#define   O_PARSERCONFIGREG__CRCHASHPOLY                            8
-#define   W_PARSERCONFIGREG__CRCHASHPOLY                            7
-#define   O_PARSERCONFIGREG__PREPADOFFSET                           4
-#define   W_PARSERCONFIGREG__PREPADOFFSET                           4
-#define   O_PARSERCONFIGREG__USECAM                                 2
-#define   O_PARSERCONFIGREG__USEHASH                                1
-#define   O_PARSERCONFIGREG__USEPROTO                               0
-#define R_L3CTABLE                                                  0x140
-#define   O_L3CTABLE__OFFSET0                                       25
-#define   W_L3CTABLE__OFFSET0                                       7
-#define   O_L3CTABLE__LEN0                                          21
-#define   W_L3CTABLE__LEN0                                          4
-#define   O_L3CTABLE__OFFSET1                                       14
-#define   W_L3CTABLE__OFFSET1                                       7
-#define   O_L3CTABLE__LEN1                                          10
-#define   W_L3CTABLE__LEN1                                          4
-#define   O_L3CTABLE__OFFSET2                                       4
-#define   W_L3CTABLE__OFFSET2                                       6
-#define   O_L3CTABLE__LEN2                                          0
-#define   W_L3CTABLE__LEN2                                          4
-#define   O_L3CTABLE__L3HDROFFSET                                   26
-#define   W_L3CTABLE__L3HDROFFSET                                   6
-#define   O_L3CTABLE__L4PROTOOFFSET                                 20
-#define   W_L3CTABLE__L4PROTOOFFSET                                 6
-#define   O_L3CTABLE__IPCHKSUMCOMPUTE                               19
-#define   O_L3CTABLE__L4CLASSIFY                                    18
-#define   O_L3CTABLE__L2PROTO                                       16
-#define   W_L3CTABLE__L2PROTO                                       2
-#define   O_L3CTABLE__L3PROTOKEY                                    0
-#define   W_L3CTABLE__L3PROTOKEY                                    16
-#define R_L4CTABLE                                                  0x160
-#define   O_L4CTABLE__OFFSET0                                       21
-#define   W_L4CTABLE__OFFSET0                                       6
-#define   O_L4CTABLE__LEN0                                          17
-#define   W_L4CTABLE__LEN0                                          4
-#define   O_L4CTABLE__OFFSET1                                       11
-#define   W_L4CTABLE__OFFSET1                                       6
-#define   O_L4CTABLE__LEN1                                          7
-#define   W_L4CTABLE__LEN1                                          4
-#define   O_L4CTABLE__TCPCHKSUMENABLE                               0
-#define R_CAM4X128TABLE                                             0x172
-#define   O_CAM4X128TABLE__CLASSID                                  7
-#define   W_CAM4X128TABLE__CLASSID                                  2
-#define   O_CAM4X128TABLE__BUCKETID                                 1
-#define   W_CAM4X128TABLE__BUCKETID                                 6
-#define   O_CAM4X128TABLE__USEBUCKET                                0
-#define R_CAM4X128KEY                                               0x180
-#define R_TRANSLATETABLE                                            0x1A0
-#define R_DMACR0                                                    0x200
-#define   O_DMACR0__DATA0WRMAXCR                                    27
-#define   W_DMACR0__DATA0WRMAXCR                                    3
-#define   O_DMACR0__DATA0RDMAXCR                                    24
-#define   W_DMACR0__DATA0RDMAXCR                                    3
-#define   O_DMACR0__DATA1WRMAXCR                                    21
-#define   W_DMACR0__DATA1WRMAXCR                                    3
-#define   O_DMACR0__DATA1RDMAXCR                                    18
-#define   W_DMACR0__DATA1RDMAXCR                                    3
-#define   O_DMACR0__DATA2WRMAXCR                                    15
-#define   W_DMACR0__DATA2WRMAXCR                                    3
-#define   O_DMACR0__DATA2RDMAXCR                                    12
-#define   W_DMACR0__DATA2RDMAXCR                                    3
-#define   O_DMACR0__DATA3WRMAXCR                                    9
-#define   W_DMACR0__DATA3WRMAXCR                                    3
-#define   O_DMACR0__DATA3RDMAXCR                                    6
-#define   W_DMACR0__DATA3RDMAXCR                                    3
-#define   O_DMACR0__DATA4WRMAXCR                                    3
-#define   W_DMACR0__DATA4WRMAXCR                                    3
-#define   O_DMACR0__DATA4RDMAXCR                                    0
-#define   W_DMACR0__DATA4RDMAXCR                                    3
-#define R_DMACR1                                                    0x201
-#define   O_DMACR1__DATA5WRMAXCR                                    27
-#define   W_DMACR1__DATA5WRMAXCR                                    3
-#define   O_DMACR1__DATA5RDMAXCR                                    24
-#define   W_DMACR1__DATA5RDMAXCR                                    3
-#define   O_DMACR1__DATA6WRMAXCR                                    21
-#define   W_DMACR1__DATA6WRMAXCR                                    3
-#define   O_DMACR1__DATA6RDMAXCR                                    18
-#define   W_DMACR1__DATA6RDMAXCR                                    3
-#define   O_DMACR1__DATA7WRMAXCR                                    15
-#define   W_DMACR1__DATA7WRMAXCR                                    3
-#define   O_DMACR1__DATA7RDMAXCR                                    12
-#define   W_DMACR1__DATA7RDMAXCR                                    3
-#define   O_DMACR1__DATA8WRMAXCR                                    9
-#define   W_DMACR1__DATA8WRMAXCR                                    3
-#define   O_DMACR1__DATA8RDMAXCR                                    6
-#define   W_DMACR1__DATA8RDMAXCR                                    3
-#define   O_DMACR1__DATA9WRMAXCR                                    3
-#define   W_DMACR1__DATA9WRMAXCR                                    3
-#define   O_DMACR1__DATA9RDMAXCR                                    0
-#define   W_DMACR1__DATA9RDMAXCR                                    3
-#define R_DMACR2                                                    0x202
-#define   O_DMACR2__DATA10WRMAXCR                                   27
-#define   W_DMACR2__DATA10WRMAXCR                                   3
-#define   O_DMACR2__DATA10RDMAXCR                                   24
-#define   W_DMACR2__DATA10RDMAXCR                                   3
-#define   O_DMACR2__DATA11WRMAXCR                                   21
-#define   W_DMACR2__DATA11WRMAXCR                                   3
-#define   O_DMACR2__DATA11RDMAXCR                                   18
-#define   W_DMACR2__DATA11RDMAXCR                                   3
-#define   O_DMACR2__DATA12WRMAXCR                                   15
-#define   W_DMACR2__DATA12WRMAXCR                                   3
-#define   O_DMACR2__DATA12RDMAXCR                                   12
-#define   W_DMACR2__DATA12RDMAXCR                                   3
-#define   O_DMACR2__DATA13WRMAXCR                                   9
-#define   W_DMACR2__DATA13WRMAXCR                                   3
-#define   O_DMACR2__DATA13RDMAXCR                                   6
-#define   W_DMACR2__DATA13RDMAXCR                                   3
-#define   O_DMACR2__DATA14WRMAXCR                                   3
-#define   W_DMACR2__DATA14WRMAXCR                                   3
-#define   O_DMACR2__DATA14RDMAXCR                                   0
-#define   W_DMACR2__DATA14RDMAXCR                                   3
-#define R_DMACR3                                                    0x203
-#define   O_DMACR3__DATA15WRMAXCR                                   27
-#define   W_DMACR3__DATA15WRMAXCR                                   3
-#define   O_DMACR3__DATA15RDMAXCR                                   24
-#define   W_DMACR3__DATA15RDMAXCR                                   3
-#define   O_DMACR3__SPCLASSWRMAXCR                                  21
-#define   W_DMACR3__SPCLASSWRMAXCR                                  3
-#define   O_DMACR3__SPCLASSRDMAXCR                                  18
-#define   W_DMACR3__SPCLASSRDMAXCR                                  3
-#define   O_DMACR3__JUMFRINWRMAXCR                                  15
-#define   W_DMACR3__JUMFRINWRMAXCR                                  3
-#define   O_DMACR3__JUMFRINRDMAXCR                                  12
-#define   W_DMACR3__JUMFRINRDMAXCR                                  3
-#define   O_DMACR3__REGFRINWRMAXCR                                  9
-#define   W_DMACR3__REGFRINWRMAXCR                                  3
-#define   O_DMACR3__REGFRINRDMAXCR                                  6
-#define   W_DMACR3__REGFRINRDMAXCR                                  3
-#define   O_DMACR3__FROUTWRMAXCR                                    3
-#define   W_DMACR3__FROUTWRMAXCR                                    3
-#define   O_DMACR3__FROUTRDMAXCR                                    0
-#define   W_DMACR3__FROUTRDMAXCR                                    3
-#define R_REG_FRIN_SPILL_MEM_START_0                                0x204
-#define   O_REG_FRIN_SPILL_MEM_START_0__REGFRINSPILLMEMSTART0        0
-#define   W_REG_FRIN_SPILL_MEM_START_0__REGFRINSPILLMEMSTART0       32
-#define R_REG_FRIN_SPILL_MEM_START_1                                0x205
-#define   O_REG_FRIN_SPILL_MEM_START_1__REGFRINSPILLMEMSTART1        0
-#define   W_REG_FRIN_SPILL_MEM_START_1__REGFRINSPILLMEMSTART1        3
-#define R_REG_FRIN_SPILL_MEM_SIZE                                   0x206
-#define   O_REG_FRIN_SPILL_MEM_SIZE__REGFRINSPILLMEMSIZE             0
-#define   W_REG_FRIN_SPILL_MEM_SIZE__REGFRINSPILLMEMSIZE            32
-#define R_FROUT_SPILL_MEM_START_0                                   0x207
-#define   O_FROUT_SPILL_MEM_START_0__FROUTSPILLMEMSTART0             0
-#define   W_FROUT_SPILL_MEM_START_0__FROUTSPILLMEMSTART0            32
-#define R_FROUT_SPILL_MEM_START_1                                   0x208
-#define   O_FROUT_SPILL_MEM_START_1__FROUTSPILLMEMSTART1             0
-#define   W_FROUT_SPILL_MEM_START_1__FROUTSPILLMEMSTART1             3
-#define R_FROUT_SPILL_MEM_SIZE                                      0x209
-#define   O_FROUT_SPILL_MEM_SIZE__FROUTSPILLMEMSIZE                  0
-#define   W_FROUT_SPILL_MEM_SIZE__FROUTSPILLMEMSIZE                 32
-#define R_CLASS0_SPILL_MEM_START_0                                  0x20A
-#define   O_CLASS0_SPILL_MEM_START_0__CLASS0SPILLMEMSTART0           0
-#define   W_CLASS0_SPILL_MEM_START_0__CLASS0SPILLMEMSTART0          32
-#define R_CLASS0_SPILL_MEM_START_1                                  0x20B
-#define   O_CLASS0_SPILL_MEM_START_1__CLASS0SPILLMEMSTART1           0
-#define   W_CLASS0_SPILL_MEM_START_1__CLASS0SPILLMEMSTART1           3
-#define R_CLASS0_SPILL_MEM_SIZE                                     0x20C
-#define   O_CLASS0_SPILL_MEM_SIZE__CLASS0SPILLMEMSIZE                0
-#define   W_CLASS0_SPILL_MEM_SIZE__CLASS0SPILLMEMSIZE               32
-#define R_JUMFRIN_SPILL_MEM_START_0                                 0x20D
-#define   O_JUMFRIN_SPILL_MEM_START_0__JUMFRINSPILLMEMSTART0          0
-#define   W_JUMFRIN_SPILL_MEM_START_0__JUMFRINSPILLMEMSTART0         32
-#define R_JUMFRIN_SPILL_MEM_START_1                                 0x20E
-#define   O_JUMFRIN_SPILL_MEM_START_1__JUMFRINSPILLMEMSTART1         0
-#define   W_JUMFRIN_SPILL_MEM_START_1__JUMFRINSPILLMEMSTART1         3
-#define R_JUMFRIN_SPILL_MEM_SIZE                                    0x20F
-#define   O_JUMFRIN_SPILL_MEM_SIZE__JUMFRINSPILLMEMSIZE              0
-#define   W_JUMFRIN_SPILL_MEM_SIZE__JUMFRINSPILLMEMSIZE             32
-#define R_CLASS1_SPILL_MEM_START_0                                  0x210
-#define   O_CLASS1_SPILL_MEM_START_0__CLASS1SPILLMEMSTART0           0
-#define   W_CLASS1_SPILL_MEM_START_0__CLASS1SPILLMEMSTART0          32
-#define R_CLASS1_SPILL_MEM_START_1                                  0x211
-#define   O_CLASS1_SPILL_MEM_START_1__CLASS1SPILLMEMSTART1           0
-#define   W_CLASS1_SPILL_MEM_START_1__CLASS1SPILLMEMSTART1           3
-#define R_CLASS1_SPILL_MEM_SIZE                                     0x212
-#define   O_CLASS1_SPILL_MEM_SIZE__CLASS1SPILLMEMSIZE                0
-#define   W_CLASS1_SPILL_MEM_SIZE__CLASS1SPILLMEMSIZE               32
-#define R_CLASS2_SPILL_MEM_START_0                                  0x213
-#define   O_CLASS2_SPILL_MEM_START_0__CLASS2SPILLMEMSTART0           0
-#define   W_CLASS2_SPILL_MEM_START_0__CLASS2SPILLMEMSTART0          32
-#define R_CLASS2_SPILL_MEM_START_1                                  0x214
-#define   O_CLASS2_SPILL_MEM_START_1__CLASS2SPILLMEMSTART1           0
-#define   W_CLASS2_SPILL_MEM_START_1__CLASS2SPILLMEMSTART1           3
-#define R_CLASS2_SPILL_MEM_SIZE                                     0x215
-#define   O_CLASS2_SPILL_MEM_SIZE__CLASS2SPILLMEMSIZE                0
-#define   W_CLASS2_SPILL_MEM_SIZE__CLASS2SPILLMEMSIZE               32
-#define R_CLASS3_SPILL_MEM_START_0                                  0x216
-#define   O_CLASS3_SPILL_MEM_START_0__CLASS3SPILLMEMSTART0           0
-#define   W_CLASS3_SPILL_MEM_START_0__CLASS3SPILLMEMSTART0          32
-#define R_CLASS3_SPILL_MEM_START_1                                  0x217
-#define   O_CLASS3_SPILL_MEM_START_1__CLASS3SPILLMEMSTART1           0
-#define   W_CLASS3_SPILL_MEM_START_1__CLASS3SPILLMEMSTART1           3
-#define R_CLASS3_SPILL_MEM_SIZE                                     0x218
-#define   O_CLASS3_SPILL_MEM_SIZE__CLASS3SPILLMEMSIZE                0
-#define   W_CLASS3_SPILL_MEM_SIZE__CLASS3SPILLMEMSIZE               32
-#define R_REG_FRIN1_SPILL_MEM_START_0                               0x219
-#define R_REG_FRIN1_SPILL_MEM_START_1                               0x21a
-#define R_REG_FRIN1_SPILL_MEM_SIZE                                  0x21b
-#define R_SPIHNGY0                                                  0x219
-#define   O_SPIHNGY0__EG_HNGY_THRESH_0                              24
-#define   W_SPIHNGY0__EG_HNGY_THRESH_0                              7
-#define   O_SPIHNGY0__EG_HNGY_THRESH_1                              16
-#define   W_SPIHNGY0__EG_HNGY_THRESH_1                              7
-#define   O_SPIHNGY0__EG_HNGY_THRESH_2                              8
-#define   W_SPIHNGY0__EG_HNGY_THRESH_2                              7
-#define   O_SPIHNGY0__EG_HNGY_THRESH_3                              0
-#define   W_SPIHNGY0__EG_HNGY_THRESH_3                              7
-#define R_SPIHNGY1                                                  0x21A
-#define   O_SPIHNGY1__EG_HNGY_THRESH_4                              24
-#define   W_SPIHNGY1__EG_HNGY_THRESH_4                              7
-#define   O_SPIHNGY1__EG_HNGY_THRESH_5                              16
-#define   W_SPIHNGY1__EG_HNGY_THRESH_5                              7
-#define   O_SPIHNGY1__EG_HNGY_THRESH_6                              8
-#define   W_SPIHNGY1__EG_HNGY_THRESH_6                              7
-#define   O_SPIHNGY1__EG_HNGY_THRESH_7                              0
-#define   W_SPIHNGY1__EG_HNGY_THRESH_7                              7
-#define R_SPIHNGY2                                                  0x21B
-#define   O_SPIHNGY2__EG_HNGY_THRESH_8                              24
-#define   W_SPIHNGY2__EG_HNGY_THRESH_8                              7
-#define   O_SPIHNGY2__EG_HNGY_THRESH_9                              16
-#define   W_SPIHNGY2__EG_HNGY_THRESH_9                              7
-#define   O_SPIHNGY2__EG_HNGY_THRESH_10                             8
-#define   W_SPIHNGY2__EG_HNGY_THRESH_10                             7
-#define   O_SPIHNGY2__EG_HNGY_THRESH_11                             0
-#define   W_SPIHNGY2__EG_HNGY_THRESH_11                             7
-#define R_SPIHNGY3                                                  0x21C
-#define   O_SPIHNGY3__EG_HNGY_THRESH_12                             24
-#define   W_SPIHNGY3__EG_HNGY_THRESH_12                             7
-#define   O_SPIHNGY3__EG_HNGY_THRESH_13                             16
-#define   W_SPIHNGY3__EG_HNGY_THRESH_13                             7
-#define   O_SPIHNGY3__EG_HNGY_THRESH_14                             8
-#define   W_SPIHNGY3__EG_HNGY_THRESH_14                             7
-#define   O_SPIHNGY3__EG_HNGY_THRESH_15                             0
-#define   W_SPIHNGY3__EG_HNGY_THRESH_15                             7
-#define R_SPISTRV0                                                  0x21D
-#define   O_SPISTRV0__EG_STRV_THRESH_0                              24
-#define   W_SPISTRV0__EG_STRV_THRESH_0                              7
-#define   O_SPISTRV0__EG_STRV_THRESH_1                              16
-#define   W_SPISTRV0__EG_STRV_THRESH_1                              7
-#define   O_SPISTRV0__EG_STRV_THRESH_2                              8
-#define   W_SPISTRV0__EG_STRV_THRESH_2                              7
-#define   O_SPISTRV0__EG_STRV_THRESH_3                              0
-#define   W_SPISTRV0__EG_STRV_THRESH_3                              7
-#define R_SPISTRV1                                                  0x21E
-#define   O_SPISTRV1__EG_STRV_THRESH_4                              24
-#define   W_SPISTRV1__EG_STRV_THRESH_4                              7
-#define   O_SPISTRV1__EG_STRV_THRESH_5                              16
-#define   W_SPISTRV1__EG_STRV_THRESH_5                              7
-#define   O_SPISTRV1__EG_STRV_THRESH_6                              8
-#define   W_SPISTRV1__EG_STRV_THRESH_6                              7
-#define   O_SPISTRV1__EG_STRV_THRESH_7                              0
-#define   W_SPISTRV1__EG_STRV_THRESH_7                              7
-#define R_SPISTRV2                                                  0x21F
-#define   O_SPISTRV2__EG_STRV_THRESH_8                              24
-#define   W_SPISTRV2__EG_STRV_THRESH_8                              7
-#define   O_SPISTRV2__EG_STRV_THRESH_9                              16
-#define   W_SPISTRV2__EG_STRV_THRESH_9                              7
-#define   O_SPISTRV2__EG_STRV_THRESH_10                             8
-#define   W_SPISTRV2__EG_STRV_THRESH_10                             7
-#define   O_SPISTRV2__EG_STRV_THRESH_11                             0
-#define   W_SPISTRV2__EG_STRV_THRESH_11                             7
-#define R_SPISTRV3                                                  0x220
-#define   O_SPISTRV3__EG_STRV_THRESH_12                             24
-#define   W_SPISTRV3__EG_STRV_THRESH_12                             7
-#define   O_SPISTRV3__EG_STRV_THRESH_13                             16
-#define   W_SPISTRV3__EG_STRV_THRESH_13                             7
-#define   O_SPISTRV3__EG_STRV_THRESH_14                             8
-#define   W_SPISTRV3__EG_STRV_THRESH_14                             7
-#define   O_SPISTRV3__EG_STRV_THRESH_15                             0
-#define   W_SPISTRV3__EG_STRV_THRESH_15                             7
-#define R_TXDATAFIFO0                                               0x221
-#define   O_TXDATAFIFO0__TX0DATAFIFOSTART                           24
-#define   W_TXDATAFIFO0__TX0DATAFIFOSTART                           7
-#define   O_TXDATAFIFO0__TX0DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO0__TX0DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO0__TX1DATAFIFOSTART                           8
-#define   W_TXDATAFIFO0__TX1DATAFIFOSTART                           7
-#define   O_TXDATAFIFO0__TX1DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO0__TX1DATAFIFOSIZE                            7
-#define R_TXDATAFIFO1                                               0x222
-#define   O_TXDATAFIFO1__TX2DATAFIFOSTART                           24
-#define   W_TXDATAFIFO1__TX2DATAFIFOSTART                           7
-#define   O_TXDATAFIFO1__TX2DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO1__TX2DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO1__TX3DATAFIFOSTART                           8
-#define   W_TXDATAFIFO1__TX3DATAFIFOSTART                           7
-#define   O_TXDATAFIFO1__TX3DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO1__TX3DATAFIFOSIZE                            7
-#define R_TXDATAFIFO2                                               0x223
-#define   O_TXDATAFIFO2__TX4DATAFIFOSTART                           24
-#define   W_TXDATAFIFO2__TX4DATAFIFOSTART                           7
-#define   O_TXDATAFIFO2__TX4DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO2__TX4DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO2__TX5DATAFIFOSTART                           8
-#define   W_TXDATAFIFO2__TX5DATAFIFOSTART                           7
-#define   O_TXDATAFIFO2__TX5DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO2__TX5DATAFIFOSIZE                            7
-#define R_TXDATAFIFO3                                               0x224
-#define   O_TXDATAFIFO3__TX6DATAFIFOSTART                           24
-#define   W_TXDATAFIFO3__TX6DATAFIFOSTART                           7
-#define   O_TXDATAFIFO3__TX6DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO3__TX6DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO3__TX7DATAFIFOSTART                           8
-#define   W_TXDATAFIFO3__TX7DATAFIFOSTART                           7
-#define   O_TXDATAFIFO3__TX7DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO3__TX7DATAFIFOSIZE                            7
-#define R_TXDATAFIFO4                                               0x225
-#define   O_TXDATAFIFO4__TX8DATAFIFOSTART                           24
-#define   W_TXDATAFIFO4__TX8DATAFIFOSTART                           7
-#define   O_TXDATAFIFO4__TX8DATAFIFOSIZE                            16
-#define   W_TXDATAFIFO4__TX8DATAFIFOSIZE                            7
-#define   O_TXDATAFIFO4__TX9DATAFIFOSTART                           8
-#define   W_TXDATAFIFO4__TX9DATAFIFOSTART                           7
-#define   O_TXDATAFIFO4__TX9DATAFIFOSIZE                            0
-#define   W_TXDATAFIFO4__TX9DATAFIFOSIZE                            7
-#define R_TXDATAFIFO5                                               0x226
-#define   O_TXDATAFIFO5__TX10DATAFIFOSTART                          24
-#define   W_TXDATAFIFO5__TX10DATAFIFOSTART                          7
-#define   O_TXDATAFIFO5__TX10DATAFIFOSIZE                           16
-#define   W_TXDATAFIFO5__TX10DATAFIFOSIZE                           7
-#define   O_TXDATAFIFO5__TX11DATAFIFOSTART                          8
-#define   W_TXDATAFIFO5__TX11DATAFIFOSTART                          7
-#define   O_TXDATAFIFO5__TX11DATAFIFOSIZE                           0
-#define   W_TXDATAFIFO5__TX11DATAFIFOSIZE                           7
-#define R_TXDATAFIFO6                                               0x227
-#define   O_TXDATAFIFO6__TX12DATAFIFOSTART                          24
-#define   W_TXDATAFIFO6__TX12DATAFIFOSTART                          7
-#define   O_TXDATAFIFO6__TX12DATAFIFOSIZE                           16
-#define   W_TXDATAFIFO6__TX12DATAFIFOSIZE                           7
-#define   O_TXDATAFIFO6__TX13DATAFIFOSTART                          8
-#define   W_TXDATAFIFO6__TX13DATAFIFOSTART                          7
-#define   O_TXDATAFIFO6__TX13DATAFIFOSIZE                           0
-#define   W_TXDATAFIFO6__TX13DATAFIFOSIZE                           7
-#define R_TXDATAFIFO7                                               0x228
-#define   O_TXDATAFIFO7__TX14DATAFIFOSTART                          24
-#define   W_TXDATAFIFO7__TX14DATAFIFOSTART                          7
-#define   O_TXDATAFIFO7__TX14DATAFIFOSIZE                           16
-#define   W_TXDATAFIFO7__TX14DATAFIFOSIZE                           7
-#define   O_TXDATAFIFO7__TX15DATAFIFOSTART                          8
-#define   W_TXDATAFIFO7__TX15DATAFIFOSTART                          7
-#define   O_TXDATAFIFO7__TX15DATAFIFOSIZE                           0
-#define   W_TXDATAFIFO7__TX15DATAFIFOSIZE                           7
-#define R_RXDATAFIFO0                                               0x229
-#define   O_RXDATAFIFO0__RX0DATAFIFOSTART                           24
-#define   W_RXDATAFIFO0__RX0DATAFIFOSTART                           7
-#define   O_RXDATAFIFO0__RX0DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO0__RX0DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO0__RX1DATAFIFOSTART                           8
-#define   W_RXDATAFIFO0__RX1DATAFIFOSTART                           7
-#define   O_RXDATAFIFO0__RX1DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO0__RX1DATAFIFOSIZE                            7
-#define R_RXDATAFIFO1                                               0x22A
-#define   O_RXDATAFIFO1__RX2DATAFIFOSTART                           24
-#define   W_RXDATAFIFO1__RX2DATAFIFOSTART                           7
-#define   O_RXDATAFIFO1__RX2DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO1__RX2DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO1__RX3DATAFIFOSTART                           8
-#define   W_RXDATAFIFO1__RX3DATAFIFOSTART                           7
-#define   O_RXDATAFIFO1__RX3DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO1__RX3DATAFIFOSIZE                            7
-#define R_RXDATAFIFO2                                               0x22B
-#define   O_RXDATAFIFO2__RX4DATAFIFOSTART                           24
-#define   W_RXDATAFIFO2__RX4DATAFIFOSTART                           7
-#define   O_RXDATAFIFO2__RX4DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO2__RX4DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO2__RX5DATAFIFOSTART                           8
-#define   W_RXDATAFIFO2__RX5DATAFIFOSTART                           7
-#define   O_RXDATAFIFO2__RX5DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO2__RX5DATAFIFOSIZE                            7
-#define R_RXDATAFIFO3                                               0x22C
-#define   O_RXDATAFIFO3__RX6DATAFIFOSTART                           24
-#define   W_RXDATAFIFO3__RX6DATAFIFOSTART                           7
-#define   O_RXDATAFIFO3__RX6DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO3__RX6DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO3__RX7DATAFIFOSTART                           8
-#define   W_RXDATAFIFO3__RX7DATAFIFOSTART                           7
-#define   O_RXDATAFIFO3__RX7DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO3__RX7DATAFIFOSIZE                            7
-#define R_RXDATAFIFO4                                               0x22D
-#define   O_RXDATAFIFO4__RX8DATAFIFOSTART                           24
-#define   W_RXDATAFIFO4__RX8DATAFIFOSTART                           7
-#define   O_RXDATAFIFO4__RX8DATAFIFOSIZE                            16
-#define   W_RXDATAFIFO4__RX8DATAFIFOSIZE                            7
-#define   O_RXDATAFIFO4__RX9DATAFIFOSTART                           8
-#define   W_RXDATAFIFO4__RX9DATAFIFOSTART                           7
-#define   O_RXDATAFIFO4__RX9DATAFIFOSIZE                            0
-#define   W_RXDATAFIFO4__RX9DATAFIFOSIZE                            7
-#define R_RXDATAFIFO5                                               0x22E
-#define   O_RXDATAFIFO5__RX10DATAFIFOSTART                          24
-#define   W_RXDATAFIFO5__RX10DATAFIFOSTART                          7
-#define   O_RXDATAFIFO5__RX10DATAFIFOSIZE                           16
-#define   W_RXDATAFIFO5__RX10DATAFIFOSIZE                           7
-#define   O_RXDATAFIFO5__RX11DATAFIFOSTART                          8
-#define   W_RXDATAFIFO5__RX11DATAFIFOSTART                          7
-#define   O_RXDATAFIFO5__RX11DATAFIFOSIZE                           0
-#define   W_RXDATAFIFO5__RX11DATAFIFOSIZE                           7
-#define R_RXDATAFIFO6                                               0x22F
-#define   O_RXDATAFIFO6__RX12DATAFIFOSTART                          24
-#define   W_RXDATAFIFO6__RX12DATAFIFOSTART                          7
-#define   O_RXDATAFIFO6__RX12DATAFIFOSIZE                           16
-#define   W_RXDATAFIFO6__RX12DATAFIFOSIZE                           7
-#define   O_RXDATAFIFO6__RX13DATAFIFOSTART                          8
-#define   W_RXDATAFIFO6__RX13DATAFIFOSTART                          7
-#define   O_RXDATAFIFO6__RX13DATAFIFOSIZE                           0
-#define   W_RXDATAFIFO6__RX13DATAFIFOSIZE                           7
-#define R_RXDATAFIFO7                                               0x230
-#define   O_RXDATAFIFO7__RX14DATAFIFOSTART                          24
-#define   W_RXDATAFIFO7__RX14DATAFIFOSTART                          7
-#define   O_RXDATAFIFO7__RX14DATAFIFOSIZE                           16
-#define   W_RXDATAFIFO7__RX14DATAFIFOSIZE                           7
-#define   O_RXDATAFIFO7__RX15DATAFIFOSTART                          8
-#define   W_RXDATAFIFO7__RX15DATAFIFOSTART                          7
-#define   O_RXDATAFIFO7__RX15DATAFIFOSIZE                           0
-#define   W_RXDATAFIFO7__RX15DATAFIFOSIZE                           7
-#define R_XGMACPADCALIBRATION                                       0x231
-#define R_FREEQCARVE                                                0x233
-#define R_SPI4STATICDELAY0                                          0x240
-#define   O_SPI4STATICDELAY0__DATALINE7                             28
-#define   W_SPI4STATICDELAY0__DATALINE7                             4
-#define   O_SPI4STATICDELAY0__DATALINE6                             24
-#define   W_SPI4STATICDELAY0__DATALINE6                             4
-#define   O_SPI4STATICDELAY0__DATALINE5                             20
-#define   W_SPI4STATICDELAY0__DATALINE5                             4
-#define   O_SPI4STATICDELAY0__DATALINE4                             16
-#define   W_SPI4STATICDELAY0__DATALINE4                             4
-#define   O_SPI4STATICDELAY0__DATALINE3                             12
-#define   W_SPI4STATICDELAY0__DATALINE3                             4
-#define   O_SPI4STATICDELAY0__DATALINE2                             8
-#define   W_SPI4STATICDELAY0__DATALINE2                             4
-#define   O_SPI4STATICDELAY0__DATALINE1                             4
-#define   W_SPI4STATICDELAY0__DATALINE1                             4
-#define   O_SPI4STATICDELAY0__DATALINE0                             0
-#define   W_SPI4STATICDELAY0__DATALINE0                             4
-#define R_SPI4STATICDELAY1                                          0x241
-#define   O_SPI4STATICDELAY1__DATALINE15                            28
-#define   W_SPI4STATICDELAY1__DATALINE15                            4
-#define   O_SPI4STATICDELAY1__DATALINE14                            24
-#define   W_SPI4STATICDELAY1__DATALINE14                            4
-#define   O_SPI4STATICDELAY1__DATALINE13                            20
-#define   W_SPI4STATICDELAY1__DATALINE13                            4
-#define   O_SPI4STATICDELAY1__DATALINE12                            16
-#define   W_SPI4STATICDELAY1__DATALINE12                            4
-#define   O_SPI4STATICDELAY1__DATALINE11                            12
-#define   W_SPI4STATICDELAY1__DATALINE11                            4
-#define   O_SPI4STATICDELAY1__DATALINE10                            8
-#define   W_SPI4STATICDELAY1__DATALINE10                            4
-#define   O_SPI4STATICDELAY1__DATALINE9                             4
-#define   W_SPI4STATICDELAY1__DATALINE9                             4
-#define   O_SPI4STATICDELAY1__DATALINE8                             0
-#define   W_SPI4STATICDELAY1__DATALINE8                             4
-#define R_SPI4STATICDELAY2                                          0x242
-#define   O_SPI4STATICDELAY0__TXSTAT1                               8
-#define   W_SPI4STATICDELAY0__TXSTAT1                               4
-#define   O_SPI4STATICDELAY0__TXSTAT0                               4
-#define   W_SPI4STATICDELAY0__TXSTAT0                               4
-#define   O_SPI4STATICDELAY0__RXCONTROL                             0
-#define   W_SPI4STATICDELAY0__RXCONTROL                             4
-#define R_SPI4CONTROL                                               0x243
-#define   O_SPI4CONTROL__STATICDELAY                                2
-#define   O_SPI4CONTROL__LVDS_LVTTL                                 1
-#define   O_SPI4CONTROL__SPI4ENABLE                                 0
-#define R_CLASSWATERMARKS                                           0x244
-#define   O_CLASSWATERMARKS__CLASS0WATERMARK                        24
-#define   W_CLASSWATERMARKS__CLASS0WATERMARK                        5
-#define   O_CLASSWATERMARKS__CLASS1WATERMARK                        16
-#define   W_CLASSWATERMARKS__CLASS1WATERMARK                        5
-#define   O_CLASSWATERMARKS__CLASS3WATERMARK                        0
-#define   W_CLASSWATERMARKS__CLASS3WATERMARK                        5
-#define R_RXWATERMARKS1                                              0x245
-#define   O_RXWATERMARKS__RX0DATAWATERMARK                          24
-#define   W_RXWATERMARKS__RX0DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX1DATAWATERMARK                          16
-#define   W_RXWATERMARKS__RX1DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX3DATAWATERMARK                          0
-#define   W_RXWATERMARKS__RX3DATAWATERMARK                          7
-#define R_RXWATERMARKS2                                              0x246
-#define   O_RXWATERMARKS__RX4DATAWATERMARK                          24
-#define   W_RXWATERMARKS__RX4DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX5DATAWATERMARK                          16
-#define   W_RXWATERMARKS__RX5DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX6DATAWATERMARK                          8
-#define   W_RXWATERMARKS__RX6DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX7DATAWATERMARK                          0
-#define   W_RXWATERMARKS__RX7DATAWATERMARK                          7
-#define R_RXWATERMARKS3                                              0x247
-#define   O_RXWATERMARKS__RX8DATAWATERMARK                          24
-#define   W_RXWATERMARKS__RX8DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX9DATAWATERMARK                          16
-#define   W_RXWATERMARKS__RX9DATAWATERMARK                          7
-#define   O_RXWATERMARKS__RX10DATAWATERMARK                         8
-#define   W_RXWATERMARKS__RX10DATAWATERMARK                         7
-#define   O_RXWATERMARKS__RX11DATAWATERMARK                         0
-#define   W_RXWATERMARKS__RX11DATAWATERMARK                         7
-#define R_RXWATERMARKS4                                              0x248
-#define   O_RXWATERMARKS__RX12DATAWATERMARK                         24
-#define   W_RXWATERMARKS__RX12DATAWATERMARK                         7
-#define   O_RXWATERMARKS__RX13DATAWATERMARK                         16
-#define   W_RXWATERMARKS__RX13DATAWATERMARK                         7
-#define   O_RXWATERMARKS__RX14DATAWATERMARK                         8
-#define   W_RXWATERMARKS__RX14DATAWATERMARK                         7
-#define   O_RXWATERMARKS__RX15DATAWATERMARK                         0
-#define   W_RXWATERMARKS__RX15DATAWATERMARK                         7
-#define R_FREEWATERMARKS                                            0x249
-#define   O_FREEWATERMARKS__FREEOUTWATERMARK                        16
-#define   W_FREEWATERMARKS__FREEOUTWATERMARK                        16
-#define   O_FREEWATERMARKS__JUMFRWATERMARK                          8
-#define   W_FREEWATERMARKS__JUMFRWATERMARK                          7
-#define   O_FREEWATERMARKS__REGFRWATERMARK                          0
-#define   W_FREEWATERMARKS__REGFRWATERMARK                          7
-#define R_EGRESSFIFOCARVINGSLOTS                                    0x24a
-
-#define CTRL_RES0           0
-#define CTRL_RES1           1
-#define CTRL_REG_FREE       2
-#define CTRL_JUMBO_FREE     3
-#define CTRL_CONT           4
-#define CTRL_EOP            5
-#define CTRL_START          6
-#define CTRL_SNGL           7
-
-#define CTRL_B0_NOT_EOP     0
-#define CTRL_B0_EOP         1
-
-#define R_ROUND_ROBIN_TABLE                 0
-#define R_PDE_CLASS_0                       0x300
-#define R_PDE_CLASS_1                       0x302
-#define R_PDE_CLASS_2                       0x304
-#define R_PDE_CLASS_3                       0x306
-
-#define R_MSG_TX_THRESHOLD                  0x308
-
-#define R_GMAC_JFR0_BUCKET_SIZE              0x320
-#define R_GMAC_RFR0_BUCKET_SIZE              0x321
-#define R_GMAC_TX0_BUCKET_SIZE              0x322
-#define R_GMAC_TX1_BUCKET_SIZE              0x323
-#define R_GMAC_TX2_BUCKET_SIZE              0x324
-#define R_GMAC_TX3_BUCKET_SIZE              0x325
-#define R_GMAC_JFR1_BUCKET_SIZE              0x326
-#define R_GMAC_RFR1_BUCKET_SIZE              0x327
-
-#define R_XGS_TX0_BUCKET_SIZE               0x320
-#define R_XGS_TX1_BUCKET_SIZE               0x321
-#define R_XGS_TX2_BUCKET_SIZE               0x322
-#define R_XGS_TX3_BUCKET_SIZE               0x323
-#define R_XGS_TX4_BUCKET_SIZE               0x324
-#define R_XGS_TX5_BUCKET_SIZE               0x325
-#define R_XGS_TX6_BUCKET_SIZE               0x326
-#define R_XGS_TX7_BUCKET_SIZE               0x327
-#define R_XGS_TX8_BUCKET_SIZE               0x328
-#define R_XGS_TX9_BUCKET_SIZE               0x329
-#define R_XGS_TX10_BUCKET_SIZE              0x32A
-#define R_XGS_TX11_BUCKET_SIZE              0x32B
-#define R_XGS_TX12_BUCKET_SIZE              0x32C
-#define R_XGS_TX13_BUCKET_SIZE              0x32D
-#define R_XGS_TX14_BUCKET_SIZE              0x32E
-#define R_XGS_TX15_BUCKET_SIZE              0x32F
-#define R_XGS_JFR_BUCKET_SIZE               0x330
-#define R_XGS_RFR_BUCKET_SIZE               0x331
-
-#define R_CC_CPU0_0                         0x380
-#define R_CC_CPU1_0                         0x388
-#define R_CC_CPU2_0                         0x390
-#define R_CC_CPU3_0                         0x398
-#define R_CC_CPU4_0                         0x3a0
-#define R_CC_CPU5_0                         0x3a8
-#define R_CC_CPU6_0                         0x3b0
-#define R_CC_CPU7_0                         0x3b8
-
-#define XLR_GMAC_BLK_SZ                            (XLR_IO_GMAC_1_OFFSET - \
-               XLR_IO_GMAC_0_OFFSET)
-
-/* Constants used for configuring the devices */
-
-#define XLR_FB_STN                     6 /* Bucket used for Tx freeback */
-
-#define MAC_B2B_IPG                     88
-
-#define        XLR_NET_PREPAD_LEN              32
-
-/* frame sizes need to be cacheline aligned */
-#define MAX_FRAME_SIZE                  (1536 + XLR_NET_PREPAD_LEN)
-#define MAX_FRAME_SIZE_JUMBO            9216
-
-#define MAC_SKB_BACK_PTR_SIZE           SMP_CACHE_BYTES
-#define MAC_PREPAD                      0
-#define BYTE_OFFSET                     2
-#define XLR_RX_BUF_SIZE                 (MAX_FRAME_SIZE + BYTE_OFFSET + \
-               MAC_PREPAD + MAC_SKB_BACK_PTR_SIZE + SMP_CACHE_BYTES)
-#define MAC_CRC_LEN                     4
-#define MAX_NUM_MSGRNG_STN_CC           128
-#define MAX_MSG_SND_ATTEMPTS           100     /* 13 stns x 4 entry msg/stn +
-                                                * headroom
-                                                */
-
-#define MAC_FRIN_TO_BE_SENT_THRESHOLD   16
-
-#define MAX_NUM_DESC_SPILL             1024
-#define MAX_FRIN_SPILL                  (MAX_NUM_DESC_SPILL << 2)
-#define MAX_FROUT_SPILL                 (MAX_NUM_DESC_SPILL << 2)
-#define MAX_CLASS_0_SPILL               (MAX_NUM_DESC_SPILL << 2)
-#define MAX_CLASS_1_SPILL               (MAX_NUM_DESC_SPILL << 2)
-#define MAX_CLASS_2_SPILL               (MAX_NUM_DESC_SPILL << 2)
-#define MAX_CLASS_3_SPILL               (MAX_NUM_DESC_SPILL << 2)
-
-enum {
-       SGMII_SPEED_10 = 0x00000000,
-       SGMII_SPEED_100 = 0x02000000,
-       SGMII_SPEED_1000 = 0x04000000,
-};
-
-enum tsv_rsv_reg {
-       TX_RX_64_BYTE_FRAME = 0x20,
-       TX_RX_64_127_BYTE_FRAME,
-       TX_RX_128_255_BYTE_FRAME,
-       TX_RX_256_511_BYTE_FRAME,
-       TX_RX_512_1023_BYTE_FRAME,
-       TX_RX_1024_1518_BYTE_FRAME,
-       TX_RX_1519_1522_VLAN_BYTE_FRAME,
-
-       RX_BYTE_COUNTER = 0x27,
-       RX_PACKET_COUNTER,
-       RX_FCS_ERROR_COUNTER,
-       RX_MULTICAST_PACKET_COUNTER,
-       RX_BROADCAST_PACKET_COUNTER,
-       RX_CONTROL_FRAME_PACKET_COUNTER,
-       RX_PAUSE_FRAME_PACKET_COUNTER,
-       RX_UNKNOWN_OP_CODE_COUNTER,
-       RX_ALIGNMENT_ERROR_COUNTER,
-       RX_FRAME_LENGTH_ERROR_COUNTER,
-       RX_CODE_ERROR_COUNTER,
-       RX_CARRIER_SENSE_ERROR_COUNTER,
-       RX_UNDERSIZE_PACKET_COUNTER,
-       RX_OVERSIZE_PACKET_COUNTER,
-       RX_FRAGMENTS_COUNTER,
-       RX_JABBER_COUNTER,
-       RX_DROP_PACKET_COUNTER,
-
-       TX_BYTE_COUNTER   = 0x38,
-       TX_PACKET_COUNTER,
-       TX_MULTICAST_PACKET_COUNTER,
-       TX_BROADCAST_PACKET_COUNTER,
-       TX_PAUSE_CONTROL_FRAME_COUNTER,
-       TX_DEFERRAL_PACKET_COUNTER,
-       TX_EXCESSIVE_DEFERRAL_PACKET_COUNTER,
-       TX_SINGLE_COLLISION_PACKET_COUNTER,
-       TX_MULTI_COLLISION_PACKET_COUNTER,
-       TX_LATE_COLLISION_PACKET_COUNTER,
-       TX_EXCESSIVE_COLLISION_PACKET_COUNTER,
-       TX_TOTAL_COLLISION_COUNTER,
-       TX_PAUSE_FRAME_HONERED_COUNTER,
-       TX_DROP_FRAME_COUNTER,
-       TX_JABBER_FRAME_COUNTER,
-       TX_FCS_ERROR_COUNTER,
-       TX_CONTROL_FRAME_COUNTER,
-       TX_OVERSIZE_FRAME_COUNTER,
-       TX_UNDERSIZE_FRAME_COUNTER,
-       TX_FRAGMENT_FRAME_COUNTER,
-
-       CARRY_REG_1 = 0x4c,
-       CARRY_REG_2 = 0x4d,
-};
-
-struct xlr_adapter {
-       struct net_device *netdev[4];
-};
-
-struct xlr_net_priv {
-       u32 __iomem *base_addr;
-       struct net_device *ndev;
-       struct xlr_adapter *adapter;
-       struct mii_bus *mii_bus;
-       int num_rx_desc;
-       int phy_addr;   /* PHY addr on MDIO bus */
-       int pcs_id;     /* PCS id on MDIO bus */
-       int port_id;    /* Port(gmac/xgmac) number, i.e 0-7 */
-       int tx_stnid;
-       u32 __iomem *mii_addr;
-       u32 __iomem *serdes_addr;
-       u32 __iomem *pcs_addr;
-       u32 __iomem *gpio_addr;
-       int phy_speed;
-       int port_type;
-       struct timer_list queue_timer;
-       int wakeup_q;
-       struct platform_device *pdev;
-       struct xlr_net_data *nd;
-
-       u64 *frin_spill;
-       u64 *frout_spill;
-       u64 *class_0_spill;
-       u64 *class_1_spill;
-       u64 *class_2_spill;
-       u64 *class_3_spill;
-};
-
-void xlr_set_gmac_speed(struct xlr_net_priv *priv);
index 55c3d4a..b4820ad 100644 (file)
@@ -107,6 +107,7 @@ static struct rt_channel_plan_map   RTW_ChannelPlanMap[RT_CHANNEL_DOMAIN_MAX] = {
        {0x01}, /* 0x10, RT_CHANNEL_DOMAIN_JAPAN */
        {0x02}, /* 0x11, RT_CHANNEL_DOMAIN_FCC_NO_DFS */
        {0x01}, /* 0x12, RT_CHANNEL_DOMAIN_JAPAN_NO_DFS */
+       {0x00}, /* 0x13 */
        {0x02}, /* 0x14, RT_CHANNEL_DOMAIN_TAIWAN_NO_DFS */
        {0x00}, /* 0x15, RT_CHANNEL_DOMAIN_ETSI_NO_DFS */
        {0x00}, /* 0x16, RT_CHANNEL_DOMAIN_KOREA_NO_DFS */
@@ -118,6 +119,7 @@ static struct rt_channel_plan_map   RTW_ChannelPlanMap[RT_CHANNEL_DOMAIN_MAX] = {
        {0x00}, /* 0x1C, */
        {0x00}, /* 0x1D, */
        {0x00}, /* 0x1E, */
+       {0x00}, /* 0x1F, */
        /*  0x20 ~ 0x7F , New Define ===== */
        {0x00}, /* 0x20, RT_CHANNEL_DOMAIN_WORLD_NULL */
        {0x01}, /* 0x21, RT_CHANNEL_DOMAIN_ETSI1_NULL */
@@ -6845,12 +6847,12 @@ void report_del_sta_event(struct adapter *padapter, unsigned char *MacAddr, unsi
        struct mlme_ext_priv            *pmlmeext = &padapter->mlmeextpriv;
        struct cmd_priv *pcmdpriv = &padapter->cmdpriv;
 
-       pcmd_obj = kzalloc(sizeof(struct cmd_obj), GFP_KERNEL);
+       pcmd_obj = kzalloc(sizeof(*pcmd_obj), GFP_ATOMIC);
        if (!pcmd_obj)
                return;
 
        cmdsz = (sizeof(struct stadel_event) + sizeof(struct C2HEvent_Header));
-       pevtcmd = kzalloc(cmdsz, GFP_KERNEL);
+       pevtcmd = kzalloc(cmdsz, GFP_ATOMIC);
        if (!pevtcmd) {
                kfree(pcmd_obj);
                return;
index 52d42e5..9404355 100644 (file)
@@ -1980,6 +1980,7 @@ static int rtw_wx_read32(struct net_device *dev,
        u32 data32;
        u32 bytes;
        u8 *ptmp;
+       int ret;
 
        padapter = (struct adapter *)rtw_netdev_priv(dev);
        p = &wrqu->data;
@@ -2007,12 +2008,17 @@ static int rtw_wx_read32(struct net_device *dev,
                break;
        default:
                DBG_88E(KERN_INFO "%s: usage> read [bytes],[address(hex)]\n", __func__);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto err_free_ptmp;
        }
        DBG_88E(KERN_INFO "%s: addr = 0x%08X data =%s\n", __func__, addr, extra);
 
        kfree(ptmp);
        return 0;
+
+err_free_ptmp:
+       kfree(ptmp);
+       return ret;
 }
 
 static int rtw_wx_write32(struct net_device *dev,
index a9b6ffd..f7ce724 100644 (file)
@@ -112,7 +112,7 @@ void rtw_report_sec_ie(struct adapter *adapter, u8 authmode, u8 *sec_ie)
 
        buff = NULL;
        if (authmode == _WPA_IE_ID_) {
-               buff = kzalloc(IW_CUSTOM_MAX, GFP_KERNEL);
+               buff = kzalloc(IW_CUSTOM_MAX, GFP_ATOMIC);
                if (!buff)
                        return;
                p = buff;
index d2e9df6..b9ce718 100644 (file)
@@ -2549,13 +2549,14 @@ static void _rtl92e_pci_disconnect(struct pci_dev *pdev)
                        free_irq(dev->irq, dev);
                        priv->irq = 0;
                }
-               free_rtllib(dev);
 
                if (dev->mem_start != 0) {
                        iounmap((void __iomem *)dev->mem_start);
                        release_mem_region(pci_resource_start(pdev, 1),
                                        pci_resource_len(pdev, 1));
                }
+
+               free_rtllib(dev);
        }
 
        pci_disable_device(pdev);
index 0b65de9..95a88f6 100644 (file)
@@ -520,7 +520,7 @@ static ssize_t target_fabric_port_alua_tg_pt_gp_show(struct config_item *item,
 {
        struct se_lun *lun = item_to_lun(item);
 
-       if (!lun || !lun->lun_se_dev)
+       if (!lun->lun_se_dev)
                return -ENODEV;
 
        return core_alua_show_tg_pt_gp_info(lun, page);
@@ -531,7 +531,7 @@ static ssize_t target_fabric_port_alua_tg_pt_gp_store(struct config_item *item,
 {
        struct se_lun *lun = item_to_lun(item);
 
-       if (!lun || !lun->lun_se_dev)
+       if (!lun->lun_se_dev)
                return -ENODEV;
 
        return core_alua_store_tg_pt_gp_info(lun, page, count);
@@ -542,7 +542,7 @@ static ssize_t target_fabric_port_alua_tg_pt_offline_show(
 {
        struct se_lun *lun = item_to_lun(item);
 
-       if (!lun || !lun->lun_se_dev)
+       if (!lun->lun_se_dev)
                return -ENODEV;
 
        return core_alua_show_offline_bit(lun, page);
@@ -553,7 +553,7 @@ static ssize_t target_fabric_port_alua_tg_pt_offline_store(
 {
        struct se_lun *lun = item_to_lun(item);
 
-       if (!lun || !lun->lun_se_dev)
+       if (!lun->lun_se_dev)
                return -ENODEV;
 
        return core_alua_store_offline_bit(lun, page, count);
@@ -564,7 +564,7 @@ static ssize_t target_fabric_port_alua_tg_pt_status_show(
 {
        struct se_lun *lun = item_to_lun(item);
 
-       if (!lun || !lun->lun_se_dev)
+       if (!lun->lun_se_dev)
                return -ENODEV;
 
        return core_alua_show_secondary_status(lun, page);
@@ -575,7 +575,7 @@ static ssize_t target_fabric_port_alua_tg_pt_status_store(
 {
        struct se_lun *lun = item_to_lun(item);
 
-       if (!lun || !lun->lun_se_dev)
+       if (!lun->lun_se_dev)
                return -ENODEV;
 
        return core_alua_store_secondary_status(lun, page, count);
@@ -586,7 +586,7 @@ static ssize_t target_fabric_port_alua_tg_pt_write_md_show(
 {
        struct se_lun *lun = item_to_lun(item);
 
-       if (!lun || !lun->lun_se_dev)
+       if (!lun->lun_se_dev)
                return -ENODEV;
 
        return core_alua_show_secondary_write_metadata(lun, page);
@@ -597,7 +597,7 @@ static ssize_t target_fabric_port_alua_tg_pt_write_md_store(
 {
        struct se_lun *lun = item_to_lun(item);
 
-       if (!lun || !lun->lun_se_dev)
+       if (!lun->lun_se_dev)
                return -ENODEV;
 
        return core_alua_store_secondary_write_metadata(lun, page, count);
index 22703a0..4c76498 100644 (file)
@@ -40,11 +40,11 @@ static void spc_fill_alua_data(struct se_lun *lun, unsigned char *buf)
         *
         * See spc4r17 section 6.4.2 Table 135
         */
-       spin_lock(&lun->lun_tg_pt_gp_lock);
-       tg_pt_gp = lun->lun_tg_pt_gp;
+       rcu_read_lock();
+       tg_pt_gp = rcu_dereference(lun->lun_tg_pt_gp);
        if (tg_pt_gp)
                buf[5] |= tg_pt_gp->tg_pt_gp_alua_access_type;
-       spin_unlock(&lun->lun_tg_pt_gp_lock);
+       rcu_read_unlock();
 }
 
 static u16
@@ -325,14 +325,14 @@ check_t10_vend_desc:
                 * Get the PROTOCOL IDENTIFIER as defined by spc4r17
                 * section 7.5.1 Table 362
                 */
-               spin_lock(&lun->lun_tg_pt_gp_lock);
-               tg_pt_gp = lun->lun_tg_pt_gp;
+               rcu_read_lock();
+               tg_pt_gp = rcu_dereference(lun->lun_tg_pt_gp);
                if (!tg_pt_gp) {
-                       spin_unlock(&lun->lun_tg_pt_gp_lock);
+                       rcu_read_unlock();
                        goto check_lu_gp;
                }
                tg_pt_gp_id = tg_pt_gp->tg_pt_gp_id;
-               spin_unlock(&lun->lun_tg_pt_gp_lock);
+               rcu_read_unlock();
 
                buf[off] = tpg->proto_id << 4;
                buf[off++] |= 0x1; /* CODE SET == Binary */
index 4542482..d8c8683 100644 (file)
@@ -810,10 +810,9 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev)
                return -EINVAL;
 
        optee = kzalloc(sizeof(*optee), GFP_KERNEL);
-       if (!optee) {
-               rc = -ENOMEM;
-               goto err;
-       }
+       if (!optee)
+               return -ENOMEM;
+
        optee->pool = optee_ffa_config_dyn_shm();
        if (IS_ERR(optee->pool)) {
                rc = PTR_ERR(optee->pool);
index f0bf01e..71e0dd2 100644 (file)
@@ -522,6 +522,7 @@ static struct xenbus_driver xencons_driver = {
        .remove = xencons_remove,
        .resume = xencons_resume,
        .otherend_changed = xencons_backend_changed,
+       .not_essential = true,
 };
 #endif /* CONFIG_HVC_XEN_FRONTEND */
 
index 7f656fa..5163d60 100644 (file)
@@ -237,6 +237,7 @@ struct brcmuart_priv {
        u32             rx_err;
        u32             rx_timeout;
        u32             rx_abort;
+       u32             saved_mctrl;
 };
 
 static struct dentry *brcmuart_debugfs_root;
@@ -1133,16 +1134,27 @@ static int brcmuart_remove(struct platform_device *pdev)
 static int __maybe_unused brcmuart_suspend(struct device *dev)
 {
        struct brcmuart_priv *priv = dev_get_drvdata(dev);
+       struct uart_8250_port *up = serial8250_get_port(priv->line);
+       struct uart_port *port = &up->port;
 
        serial8250_suspend_port(priv->line);
        clk_disable_unprepare(priv->baud_mux_clk);
 
+       /*
+        * This will prevent resume from enabling RTS before the
+        *  baud rate has been resored.
+        */
+       priv->saved_mctrl = port->mctrl;
+       port->mctrl = 0;
+
        return 0;
 }
 
 static int __maybe_unused brcmuart_resume(struct device *dev)
 {
        struct brcmuart_priv *priv = dev_get_drvdata(dev);
+       struct uart_8250_port *up = serial8250_get_port(priv->line);
+       struct uart_port *port = &up->port;
        int ret;
 
        ret = clk_prepare_enable(priv->baud_mux_clk);
@@ -1165,6 +1177,7 @@ static int __maybe_unused brcmuart_resume(struct device *dev)
                start_rx_dma(serial8250_get_port(priv->line));
        }
        serial8250_resume_port(priv->line);
+       port->mctrl = priv->saved_mctrl;
        return 0;
 }
 
index 5d43de1..60f8fff 100644 (file)
@@ -1324,29 +1324,33 @@ pericom_do_set_divisor(struct uart_port *port, unsigned int baud,
 {
        int scr;
        int lcr;
-       int actual_baud;
-       int tolerance;
 
-       for (scr = 5 ; scr <= 15 ; scr++) {
-               actual_baud = 921600 * 16 / scr;
-               tolerance = actual_baud / 50;
+       for (scr = 16; scr > 4; scr--) {
+               unsigned int maxrate = port->uartclk / scr;
+               unsigned int divisor = max(maxrate / baud, 1U);
+               int delta = maxrate / divisor - baud;
 
-               if ((baud < actual_baud + tolerance) &&
-                       (baud > actual_baud - tolerance)) {
+               if (baud > maxrate + baud / 50)
+                       continue;
 
+               if (delta > baud / 50)
+                       divisor++;
+
+               if (divisor > 0xffff)
+                       continue;
+
+               /* Update delta due to possible divisor change */
+               delta = maxrate / divisor - baud;
+               if (abs(delta) < baud / 50) {
                        lcr = serial_port_in(port, UART_LCR);
                        serial_port_out(port, UART_LCR, lcr | 0x80);
-
-                       serial_port_out(port, UART_DLL, 1);
-                       serial_port_out(port, UART_DLM, 0);
+                       serial_port_out(port, UART_DLL, divisor & 0xff);
+                       serial_port_out(port, UART_DLM, divisor >> 8 & 0xff);
                        serial_port_out(port, 2, 16 - scr);
                        serial_port_out(port, UART_LCR, lcr);
                        return;
-               } else if (baud > actual_baud) {
-                       break;
                }
        }
-       serial8250_do_set_divisor(port, baud, quot, quot_frac);
 }
 static int pci_pericom_setup(struct serial_private *priv,
                  const struct pciserial_board *board,
@@ -2291,12 +2295,19 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
                .setup      = pci_pericom_setup_four_at_eight,
        },
        {
-               .vendor     = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S,
+               .vendor     = PCI_VENDOR_ID_ACCESIO,
                .device     = PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4,
                .subvendor  = PCI_ANY_ID,
                .subdevice  = PCI_ANY_ID,
                .setup      = pci_pericom_setup_four_at_eight,
        },
+       {
+               .vendor     = PCI_VENDOR_ID_ACCESIO,
+               .device     = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S,
+               .subvendor  = PCI_ANY_ID,
+               .subdevice  = PCI_ANY_ID,
+               .setup      = pci_pericom_setup_four_at_eight,
+       },
        {
                .vendor     = PCI_VENDOR_ID_ACCESIO,
                .device     = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4,
index 5775cbf..46e2079 100644 (file)
@@ -2024,13 +2024,6 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl)
        struct uart_8250_port *up = up_to_u8250p(port);
        unsigned char mcr;
 
-       if (port->rs485.flags & SER_RS485_ENABLED) {
-               if (serial8250_in_MCR(up) & UART_MCR_RTS)
-                       mctrl |= TIOCM_RTS;
-               else
-                       mctrl &= ~TIOCM_RTS;
-       }
-
        mcr = serial8250_TIOCM_to_MCR(mctrl);
 
        mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr;
index 6ff94cf..fc543ac 100644 (file)
@@ -1533,7 +1533,7 @@ config SERIAL_LITEUART
        tristate "LiteUART serial port support"
        depends on HAS_IOMEM
        depends on OF || COMPILE_TEST
-       depends on LITEX
+       depends on LITEX || COMPILE_TEST
        select SERIAL_CORE
        help
          This driver is for the FPGA-based LiteUART serial controller from LiteX
index d361cd8..52518a6 100644 (file)
@@ -2947,6 +2947,7 @@ MODULE_DEVICE_TABLE(of, sbsa_uart_of_match);
 
 static const struct acpi_device_id __maybe_unused sbsa_uart_acpi_match[] = {
        { "ARMH0011", 0 },
+       { "ARMHB000", 0 },
        {},
 };
 MODULE_DEVICE_TABLE(acpi, sbsa_uart_acpi_match);
index b1e7190..ac5112d 100644 (file)
@@ -2625,6 +2625,7 @@ OF_EARLYCON_DECLARE(lpuart, "fsl,vf610-lpuart", lpuart_early_console_setup);
 OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1021a-lpuart", lpuart32_early_console_setup);
 OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1028a-lpuart", ls1028a_early_console_setup);
 OF_EARLYCON_DECLARE(lpuart32, "fsl,imx7ulp-lpuart", lpuart32_imx_early_console_setup);
+OF_EARLYCON_DECLARE(lpuart32, "fsl,imx8qxp-lpuart", lpuart32_imx_early_console_setup);
 EARLYCON_DECLARE(lpuart, lpuart_early_console_setup);
 EARLYCON_DECLARE(lpuart32, lpuart32_early_console_setup);
 
index dbc0559..2941659 100644 (file)
@@ -270,8 +270,10 @@ static int liteuart_probe(struct platform_device *pdev)
 
        /* get membase */
        port->membase = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
-       if (IS_ERR(port->membase))
-               return PTR_ERR(port->membase);
+       if (IS_ERR(port->membase)) {
+               ret = PTR_ERR(port->membase);
+               goto err_erase_id;
+       }
 
        /* values not from device tree */
        port->dev = &pdev->dev;
@@ -285,7 +287,18 @@ static int liteuart_probe(struct platform_device *pdev)
        port->line = dev_id;
        spin_lock_init(&port->lock);
 
-       return uart_add_one_port(&liteuart_driver, &uart->port);
+       platform_set_drvdata(pdev, port);
+
+       ret = uart_add_one_port(&liteuart_driver, &uart->port);
+       if (ret)
+               goto err_erase_id;
+
+       return 0;
+
+err_erase_id:
+       xa_erase(&liteuart_array, uart->id);
+
+       return ret;
 }
 
 static int liteuart_remove(struct platform_device *pdev)
@@ -293,6 +306,7 @@ static int liteuart_remove(struct platform_device *pdev)
        struct uart_port *port = platform_get_drvdata(pdev);
        struct liteuart_port *uart = to_liteuart_port(port);
 
+       uart_remove_one_port(&liteuart_driver, port);
        xa_erase(&liteuart_array, uart->id);
 
        return 0;
index fcef7a9..489d192 100644 (file)
@@ -598,6 +598,9 @@ static void msm_start_rx_dma(struct msm_port *msm_port)
        u32 val;
        int ret;
 
+       if (IS_ENABLED(CONFIG_CONSOLE_POLL))
+               return;
+
        if (!dma->chan)
                return;
 
index 45e2e41..b6223fa 100644 (file)
@@ -1506,7 +1506,7 @@ static struct tegra_uart_chip_data tegra20_uart_chip_data = {
        .fifo_mode_enable_status        = false,
        .uart_max_port                  = 5,
        .max_dma_burst_bytes            = 4,
-       .error_tolerance_low_range      = 0,
+       .error_tolerance_low_range      = -4,
        .error_tolerance_high_range     = 4,
 };
 
@@ -1517,7 +1517,7 @@ static struct tegra_uart_chip_data tegra30_uart_chip_data = {
        .fifo_mode_enable_status        = false,
        .uart_max_port                  = 5,
        .max_dma_burst_bytes            = 4,
-       .error_tolerance_low_range      = 0,
+       .error_tolerance_low_range      = -4,
        .error_tolerance_high_range     = 4,
 };
 
index 1e738f2..61e3dd0 100644 (file)
@@ -1075,6 +1075,11 @@ uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear)
                goto out;
 
        if (!tty_io_error(tty)) {
+               if (uport->rs485.flags & SER_RS485_ENABLED) {
+                       set &= ~TIOCM_RTS;
+                       clear &= ~TIOCM_RTS;
+               }
+
                uart_update_mctrl(uport, set, clear);
                ret = 0;
        }
@@ -1549,6 +1554,7 @@ static void uart_tty_port_shutdown(struct tty_port *port)
 {
        struct uart_state *state = container_of(port, struct uart_state, port);
        struct uart_port *uport = uart_port_check(state);
+       char *buf;
 
        /*
         * At this point, we stop accepting input.  To do this, we
@@ -1570,8 +1576,18 @@ static void uart_tty_port_shutdown(struct tty_port *port)
         */
        tty_port_set_suspended(port, 0);
 
-       uart_change_pm(state, UART_PM_STATE_OFF);
+       /*
+        * Free the transmit buffer.
+        */
+       spin_lock_irq(&uport->lock);
+       buf = state->xmit.buf;
+       state->xmit.buf = NULL;
+       spin_unlock_irq(&uport->lock);
 
+       if (buf)
+               free_page((unsigned long)buf);
+
+       uart_change_pm(state, UART_PM_STATE_OFF);
 }
 
 static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
index 1f3b4a1..f9af7eb 100644 (file)
@@ -337,19 +337,6 @@ static void cdns3_ep_inc_deq(struct cdns3_endpoint *priv_ep)
        cdns3_ep_inc_trb(&priv_ep->dequeue, &priv_ep->ccs, priv_ep->num_trbs);
 }
 
-static void cdns3_move_deq_to_next_trb(struct cdns3_request *priv_req)
-{
-       struct cdns3_endpoint *priv_ep = priv_req->priv_ep;
-       int current_trb = priv_req->start_trb;
-
-       while (current_trb != priv_req->end_trb) {
-               cdns3_ep_inc_deq(priv_ep);
-               current_trb = priv_ep->dequeue;
-       }
-
-       cdns3_ep_inc_deq(priv_ep);
-}
-
 /**
  * cdns3_allow_enable_l1 - enable/disable permits to transition to L1.
  * @priv_dev: Extended gadget object
@@ -1517,10 +1504,11 @@ static void cdns3_transfer_completed(struct cdns3_device *priv_dev,
 
                trb = priv_ep->trb_pool + priv_ep->dequeue;
 
-               /* Request was dequeued and TRB was changed to TRB_LINK. */
-               if (TRB_FIELD_TO_TYPE(le32_to_cpu(trb->control)) == TRB_LINK) {
+               /* The TRB was changed as link TRB, and the request was handled at ep_dequeue */
+               while (TRB_FIELD_TO_TYPE(le32_to_cpu(trb->control)) == TRB_LINK) {
                        trace_cdns3_complete_trb(priv_ep, trb);
-                       cdns3_move_deq_to_next_trb(priv_req);
+                       cdns3_ep_inc_deq(priv_ep);
+                       trb = priv_ep->trb_pool + priv_ep->dequeue;
                }
 
                if (!request->stream_id) {
index ad9aee3..97866bf 100644 (file)
@@ -987,6 +987,9 @@ int cdnsp_endpoint_init(struct cdnsp_device *pdev,
 
        /* Set up the endpoint ring. */
        pep->ring = cdnsp_ring_alloc(pdev, 2, ring_type, max_packet, mem_flags);
+       if (!pep->ring)
+               return -ENOMEM;
+
        pep->skip = false;
 
        /* Fill the endpoint context */
index f1d1006..097142f 100644 (file)
@@ -420,15 +420,15 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
        data->phy = devm_usb_get_phy_by_phandle(dev, "fsl,usbphy", 0);
        if (IS_ERR(data->phy)) {
                ret = PTR_ERR(data->phy);
-               if (ret == -ENODEV) {
-                       data->phy = devm_usb_get_phy_by_phandle(dev, "phys", 0);
-                       if (IS_ERR(data->phy)) {
-                               ret = PTR_ERR(data->phy);
-                               if (ret == -ENODEV)
-                                       data->phy = NULL;
-                               else
-                                       goto err_clk;
-                       }
+               if (ret != -ENODEV)
+                       goto err_clk;
+               data->phy = devm_usb_get_phy_by_phandle(dev, "phys", 0);
+               if (IS_ERR(data->phy)) {
+                       ret = PTR_ERR(data->phy);
+                       if (ret == -ENODEV)
+                               data->phy = NULL;
+                       else
+                               goto err_clk;
                }
        }
 
index 86658a8..00070a8 100644 (file)
@@ -4700,8 +4700,6 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
        if (oldspeed == USB_SPEED_LOW)
                delay = HUB_LONG_RESET_TIME;
 
-       mutex_lock(hcd->address0_mutex);
-
        /* Reset the device; full speed may morph to high speed */
        /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
        retval = hub_port_reset(hub, port1, udev, delay, false);
@@ -5016,7 +5014,6 @@ fail:
                hub_port_disable(hub, port1, 0);
                update_devnum(udev, devnum);    /* for disconnect processing */
        }
-       mutex_unlock(hcd->address0_mutex);
        return retval;
 }
 
@@ -5191,6 +5188,7 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
        struct usb_port *port_dev = hub->ports[port1 - 1];
        struct usb_device *udev = port_dev->child;
        static int unreliable_port = -1;
+       bool retry_locked;
 
        /* Disconnect any existing devices under this port */
        if (udev) {
@@ -5246,8 +5244,11 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
                unit_load = 100;
 
        status = 0;
-       for (i = 0; i < PORT_INIT_TRIES; i++) {
 
+       for (i = 0; i < PORT_INIT_TRIES; i++) {
+               usb_lock_port(port_dev);
+               mutex_lock(hcd->address0_mutex);
+               retry_locked = true;
                /* reallocate for each attempt, since references
                 * to the previous one can escape in various ways
                 */
@@ -5255,6 +5256,8 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
                if (!udev) {
                        dev_err(&port_dev->dev,
                                        "couldn't allocate usb_device\n");
+                       mutex_unlock(hcd->address0_mutex);
+                       usb_unlock_port(port_dev);
                        goto done;
                }
 
@@ -5276,12 +5279,14 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
                }
 
                /* reset (non-USB 3.0 devices) and get descriptor */
-               usb_lock_port(port_dev);
                status = hub_port_init(hub, udev, port1, i);
-               usb_unlock_port(port_dev);
                if (status < 0)
                        goto loop;
 
+               mutex_unlock(hcd->address0_mutex);
+               usb_unlock_port(port_dev);
+               retry_locked = false;
+
                if (udev->quirks & USB_QUIRK_DELAY_INIT)
                        msleep(2000);
 
@@ -5374,6 +5379,10 @@ loop:
                usb_ep0_reinit(udev);
                release_devnum(udev);
                hub_free_dev(udev);
+               if (retry_locked) {
+                       mutex_unlock(hcd->address0_mutex);
+                       usb_unlock_port(port_dev);
+               }
                usb_put_dev(udev);
                if ((status == -ENOTCONN) || (status == -ENOTSUPP))
                        break;
@@ -5915,6 +5924,8 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
        bos = udev->bos;
        udev->bos = NULL;
 
+       mutex_lock(hcd->address0_mutex);
+
        for (i = 0; i < PORT_INIT_TRIES; ++i) {
 
                /* ep0 maxpacket size may change; let the HCD know about it.
@@ -5924,6 +5935,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
                if (ret >= 0 || ret == -ENOTCONN || ret == -ENODEV)
                        break;
        }
+       mutex_unlock(hcd->address0_mutex);
 
        if (ret < 0)
                goto re_enumerate;
index 8239fe7..019351c 100644 (file)
@@ -434,6 +434,9 @@ static const struct usb_device_id usb_quirk_list[] = {
        { USB_DEVICE(0x1532, 0x0116), .driver_info =
                        USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+       /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */
+       { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM },
+
        /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */
        { USB_DEVICE(0x17ef, 0xa012), .driver_info =
                        USB_QUIRK_DISCONNECT_SUSPEND },
index 4ab4a1d..ab8d7da 100644 (file)
@@ -1198,6 +1198,8 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg,
                        }
                        ctrl |= DXEPCTL_CNAK;
                } else {
+                       hs_req->req.frame_number = hs_ep->target_frame;
+                       hs_req->req.actual = 0;
                        dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA);
                        return;
                }
@@ -2857,9 +2859,12 @@ static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep)
 
        do {
                hs_req = get_ep_head(hs_ep);
-               if (hs_req)
+               if (hs_req) {
+                       hs_req->req.frame_number = hs_ep->target_frame;
+                       hs_req->req.actual = 0;
                        dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req,
                                                    -ENODATA);
+               }
                dwc2_gadget_incr_frame_num(hs_ep);
                /* Update current frame number value. */
                hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg);
@@ -2912,8 +2917,11 @@ static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep)
 
        while (dwc2_gadget_target_frame_elapsed(ep)) {
                hs_req = get_ep_head(ep);
-               if (hs_req)
+               if (hs_req) {
+                       hs_req->req.frame_number = ep->target_frame;
+                       hs_req->req.actual = 0;
                        dwc2_hsotg_complete_request(hsotg, ep, hs_req, -ENODATA);
+               }
 
                dwc2_gadget_incr_frame_num(ep);
                /* Update current frame number value. */
@@ -3002,8 +3010,11 @@ static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep)
 
        while (dwc2_gadget_target_frame_elapsed(hs_ep)) {
                hs_req = get_ep_head(hs_ep);
-               if (hs_req)
+               if (hs_req) {
+                       hs_req->req.frame_number = hs_ep->target_frame;
+                       hs_req->req.actual = 0;
                        dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA);
+               }
 
                dwc2_gadget_incr_frame_num(hs_ep);
                /* Update current frame number value. */
index 89a7883..24beff6 100644 (file)
@@ -59,7 +59,7 @@
 #define DWC2_UNRESERVE_DELAY (msecs_to_jiffies(5))
 
 /* If we get a NAK, wait this long before retrying */
-#define DWC2_RETRY_WAIT_DELAY (1 * 1E6L)
+#define DWC2_RETRY_WAIT_DELAY (1 * NSEC_PER_MSEC)
 
 /**
  * dwc2_periodic_channel_available() - Checks that a channel is available for a
index 643239d..f4c0995 100644 (file)
@@ -1594,9 +1594,11 @@ static int dwc3_probe(struct platform_device *pdev)
 
        dwc3_get_properties(dwc);
 
-       ret = dma_set_mask_and_coherent(dwc->sysdev, DMA_BIT_MASK(64));
-       if (ret)
-               return ret;
+       if (!dwc->sysdev_is_parent) {
+               ret = dma_set_mask_and_coherent(dwc->sysdev, DMA_BIT_MASK(64));
+               if (ret)
+                       return ret;
+       }
 
        dwc->reset = devm_reset_control_array_get_optional_shared(dev);
        if (IS_ERR(dwc->reset))
index 620c8d3..5c491d0 100644 (file)
 #define DWC3_GHWPARAMS8                0xc600
 #define DWC3_GUCTL3            0xc60c
 #define DWC3_GFLADJ            0xc630
-#define DWC3_GHWPARAMS9                0xc680
+#define DWC3_GHWPARAMS9                0xc6e0
 
 /* Device Registers */
 #define DWC3_DCFG              0xc700
index 23de2a5..7e3db00 100644 (file)
@@ -310,13 +310,24 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
        if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) {
                int link_state;
 
+               /*
+                * Initiate remote wakeup if the link state is in U3 when
+                * operating in SS/SSP or L1/L2 when operating in HS/FS. If the
+                * link state is in U1/U2, no remote wakeup is needed. The Start
+                * Transfer command will initiate the link recovery.
+                */
                link_state = dwc3_gadget_get_link_state(dwc);
-               if (link_state == DWC3_LINK_STATE_U1 ||
-                   link_state == DWC3_LINK_STATE_U2 ||
-                   link_state == DWC3_LINK_STATE_U3) {
+               switch (link_state) {
+               case DWC3_LINK_STATE_U2:
+                       if (dwc->gadget->speed >= USB_SPEED_SUPER)
+                               break;
+
+                       fallthrough;
+               case DWC3_LINK_STATE_U3:
                        ret = __dwc3_gadget_wakeup(dwc);
                        dev_WARN_ONCE(dwc->dev, ret, "wakeup failed --> %d\n",
                                        ret);
+                       break;
                }
        }
 
@@ -3252,6 +3263,9 @@ static bool dwc3_gadget_endpoint_trbs_complete(struct dwc3_ep *dep,
        struct dwc3             *dwc = dep->dwc;
        bool                    no_started_trb = true;
 
+       if (!dep->endpoint.desc)
+               return no_started_trb;
+
        dwc3_gadget_ep_cleanup_completed_requests(dep, event, status);
 
        if (dep->flags & DWC3_EP_END_TRANSFER_PENDING)
@@ -3299,6 +3313,9 @@ static void dwc3_gadget_endpoint_transfer_in_progress(struct dwc3_ep *dep,
 {
        int status = 0;
 
+       if (!dep->endpoint.desc)
+               return;
+
        if (usb_endpoint_xfer_isoc(dep->endpoint.desc))
                dwc3_gadget_endpoint_frame_from_event(dep, event);
 
@@ -3352,6 +3369,14 @@ static void dwc3_gadget_endpoint_command_complete(struct dwc3_ep *dep,
        if (cmd != DWC3_DEPCMD_ENDTRANSFER)
                return;
 
+       /*
+        * The END_TRANSFER command will cause the controller to generate a
+        * NoStream Event, and it's not due to the host DP NoStream rejection.
+        * Ignore the next NoStream event.
+        */
+       if (dep->stream_capable)
+               dep->flags |= DWC3_EP_IGNORE_NEXT_NOSTREAM;
+
        dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING;
        dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
        dwc3_gadget_ep_cleanup_cancelled_requests(dep);
@@ -3574,14 +3599,6 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
        WARN_ON_ONCE(ret);
        dep->resource_index = 0;
 
-       /*
-        * The END_TRANSFER command will cause the controller to generate a
-        * NoStream Event, and it's not due to the host DP NoStream rejection.
-        * Ignore the next NoStream event.
-        */
-       if (dep->stream_capable)
-               dep->flags |= DWC3_EP_IGNORE_NEXT_NOSTREAM;
-
        if (!interrupt)
                dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
        else
index f5ca670..857159d 100644 (file)
@@ -2136,7 +2136,7 @@ static int xudc_probe(struct platform_device *pdev)
 
        ret = usb_add_gadget_udc(&pdev->dev, &udc->gadget);
        if (ret)
-               goto fail;
+               goto err_disable_unprepare_clk;
 
        udc->dev = &udc->gadget.dev;
 
@@ -2155,6 +2155,9 @@ static int xudc_probe(struct platform_device *pdev)
                 udc->dma_enabled ? "with DMA" : "without DMA");
 
        return 0;
+
+err_disable_unprepare_clk:
+       clk_disable_unprepare(udc->clk);
 fail:
        dev_err(&pdev->dev, "probe failed, %d\n", ret);
        return ret;
index 311597b..eaa49ae 100644 (file)
@@ -366,7 +366,9 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci,
 /* Must be called with xhci->lock held, releases and aquires lock back */
 static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
 {
-       u32 temp_32;
+       struct xhci_segment *new_seg    = xhci->cmd_ring->deq_seg;
+       union xhci_trb *new_deq         = xhci->cmd_ring->dequeue;
+       u64 crcr;
        int ret;
 
        xhci_dbg(xhci, "Abort command ring\n");
@@ -375,13 +377,18 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags)
 
        /*
         * The control bits like command stop, abort are located in lower
-        * dword of the command ring control register. Limit the write
-        * to the lower dword to avoid corrupting the command ring pointer
-        * in case if the command ring is stopped by the time upper dword
-        * is written.
+        * dword of the command ring control register.
+        * Some controllers require all 64 bits to be written to abort the ring.
+        * Make sure the upper dword is valid, pointing to the next command,
+        * avoiding corrupting the command ring pointer in case the command ring
+        * is stopped by the time the upper dword is written.
         */
-       temp_32 = readl(&xhci->op_regs->cmd_ring);
-       writel(temp_32 | CMD_RING_ABORT, &xhci->op_regs->cmd_ring);
+       next_trb(xhci, NULL, &new_seg, &new_deq);
+       if (trb_is_link(new_deq))
+               next_trb(xhci, NULL, &new_seg, &new_deq);
+
+       crcr = xhci_trb_virt_to_dma(new_seg, new_deq);
+       xhci_write_64(xhci, crcr | CMD_RING_ABORT, &xhci->op_regs->cmd_ring);
 
        /* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the
         * completion of the Command Abort operation. If CRR is not negated in 5
index 1bf494b..c8af2cd 100644 (file)
@@ -1400,6 +1400,7 @@ static void tegra_xusb_deinit_usb_phy(struct tegra_xusb *tegra)
 
 static int tegra_xusb_probe(struct platform_device *pdev)
 {
+       struct of_phandle_args args;
        struct tegra_xusb *tegra;
        struct device_node *np;
        struct resource *regs;
@@ -1454,10 +1455,17 @@ static int tegra_xusb_probe(struct platform_device *pdev)
                goto put_padctl;
        }
 
-       tegra->padctl_irq = of_irq_get(np, 0);
-       if (tegra->padctl_irq <= 0) {
-               err = (tegra->padctl_irq == 0) ? -ENODEV : tegra->padctl_irq;
-               goto put_padctl;
+       /* Older device-trees don't have padctrl interrupt */
+       err = of_irq_parse_one(np, 0, &args);
+       if (!err) {
+               tegra->padctl_irq = of_irq_get(np, 0);
+               if (tegra->padctl_irq <= 0) {
+                       err = (tegra->padctl_irq == 0) ? -ENODEV : tegra->padctl_irq;
+                       goto put_padctl;
+               }
+       } else {
+               dev_dbg(&pdev->dev,
+                       "%pOF is missing an interrupt, disabling PM support\n", np);
        }
 
        tegra->host_clk = devm_clk_get(&pdev->dev, "xusb_host");
@@ -1696,11 +1704,15 @@ static int tegra_xusb_probe(struct platform_device *pdev)
                goto remove_usb3;
        }
 
-       err = devm_request_threaded_irq(&pdev->dev, tegra->padctl_irq, NULL, tegra_xusb_padctl_irq,
-                                       IRQF_ONESHOT, dev_name(&pdev->dev), tegra);
-       if (err < 0) {
-               dev_err(&pdev->dev, "failed to request padctl IRQ: %d\n", err);
-               goto remove_usb3;
+       if (tegra->padctl_irq) {
+               err = devm_request_threaded_irq(&pdev->dev, tegra->padctl_irq,
+                                               NULL, tegra_xusb_padctl_irq,
+                                               IRQF_ONESHOT, dev_name(&pdev->dev),
+                                               tegra);
+               if (err < 0) {
+                       dev_err(&pdev->dev, "failed to request padctl IRQ: %d\n", err);
+                       goto remove_usb3;
+               }
        }
 
        err = tegra_xusb_enable_firmware_messages(tegra);
@@ -1718,13 +1730,16 @@ static int tegra_xusb_probe(struct platform_device *pdev)
        /* Enable wake for both USB 2.0 and USB 3.0 roothubs */
        device_init_wakeup(&tegra->hcd->self.root_hub->dev, true);
        device_init_wakeup(&xhci->shared_hcd->self.root_hub->dev, true);
-       device_init_wakeup(tegra->dev, true);
 
        pm_runtime_use_autosuspend(tegra->dev);
        pm_runtime_set_autosuspend_delay(tegra->dev, 2000);
        pm_runtime_mark_last_busy(tegra->dev);
        pm_runtime_set_active(tegra->dev);
-       pm_runtime_enable(tegra->dev);
+
+       if (tegra->padctl_irq) {
+               device_init_wakeup(tegra->dev, true);
+               pm_runtime_enable(tegra->dev);
+       }
 
        return 0;
 
@@ -1772,7 +1787,9 @@ static int tegra_xusb_remove(struct platform_device *pdev)
        dma_free_coherent(&pdev->dev, tegra->fw.size, tegra->fw.virt,
                          tegra->fw.phys);
 
-       pm_runtime_disable(&pdev->dev);
+       if (tegra->padctl_irq)
+               pm_runtime_disable(&pdev->dev);
+
        pm_runtime_put(&pdev->dev);
 
        tegra_xusb_powergate_partitions(tegra);
index a484ff5..546fce4 100644 (file)
@@ -1267,6 +1267,8 @@ static const struct usb_device_id option_ids[] = {
          .driver_info = NCTRL(2) },
        { USB_DEVICE(TELIT_VENDOR_ID, 0x9010),                          /* Telit SBL FN980 flashing device */
          .driver_info = NCTRL(0) | ZLP },
+       { USB_DEVICE(TELIT_VENDOR_ID, 0x9200),                          /* Telit LE910S1 flashing device */
+         .driver_info = NCTRL(0) | ZLP },
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
        { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
          .driver_info = RSVD(1) },
@@ -2094,6 +2096,9 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) },    /* Fibocom FG150 Diag */
        { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) },          /* Fibocom FG150 AT */
        { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) },                   /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */
+       { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) },                   /* Fibocom FM101-GL (laptop MBIM) */
+       { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff),                     /* Fibocom FM101-GL (laptop MBIM) */
+         .driver_info = RSVD(4) },
        { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) },                   /* LongSung M5710 */
        { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) },                   /* GosunCn GM500 RNDIS */
        { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) },                   /* GosunCn GM500 MBIM */
index f45ca7d..a70fd86 100644 (file)
@@ -432,6 +432,7 @@ static int pl2303_detect_type(struct usb_serial *serial)
        case 0x200:
                switch (bcdDevice) {
                case 0x100:
+               case 0x105:
                case 0x305:
                case 0x405:
                        /*
index 7a2a178..72f9001 100644 (file)
@@ -669,25 +669,27 @@ static int tcpm_set_cc(struct tcpc_dev *dev, enum typec_cc_status cc)
                ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK,
                                             FUSB_REG_MASK_BC_LVL |
                                             FUSB_REG_MASK_COMP_CHNG,
-                                            FUSB_REG_MASK_COMP_CHNG);
+                                            FUSB_REG_MASK_BC_LVL);
                if (ret < 0) {
                        fusb302_log(chip, "cannot set SRC interrupt, ret=%d",
                                    ret);
                        goto done;
                }
                chip->intr_comp_chng = true;
+               chip->intr_bc_lvl = false;
                break;
        case TYPEC_CC_RD:
                ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK,
                                             FUSB_REG_MASK_BC_LVL |
                                             FUSB_REG_MASK_COMP_CHNG,
-                                            FUSB_REG_MASK_BC_LVL);
+                                            FUSB_REG_MASK_COMP_CHNG);
                if (ret < 0) {
                        fusb302_log(chip, "cannot set SRC interrupt, ret=%d",
                                    ret);
                        goto done;
                }
                chip->intr_bc_lvl = true;
+               chip->intr_comp_chng = false;
                break;
        default:
                break;
index 7f2f3ff..6010b99 100644 (file)
@@ -4110,11 +4110,7 @@ static void run_state_machine(struct tcpm_port *port)
                                       tcpm_try_src(port) ? SRC_TRY
                                                          : SNK_ATTACHED,
                                       0);
-               else
-                       /* Wait for VBUS, but not forever */
-                       tcpm_set_state(port, PORT_RESET, PD_T_PS_SOURCE_ON);
                break;
-
        case SRC_TRY:
                port->try_src_count++;
                tcpm_set_cc(port, tcpm_rp_cc(port));
index fb8ef12..6d27a5b 100644 (file)
@@ -653,7 +653,7 @@ static int cd321x_switch_power_state(struct tps6598x *tps, u8 target_state)
        if (state == target_state)
                return 0;
 
-       ret = tps6598x_exec_cmd(tps, "SPSS", sizeof(u8), &target_state, 0, NULL);
+       ret = tps6598x_exec_cmd(tps, "SSPS", sizeof(u8), &target_state, 0, NULL);
        if (ret)
                return ret;
 
@@ -707,6 +707,7 @@ static int tps6598x_probe(struct i2c_client *client)
        u32 conf;
        u32 vid;
        int ret;
+       u64 mask1;
 
        tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
        if (!tps)
@@ -730,11 +731,6 @@ static int tps6598x_probe(struct i2c_client *client)
        if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
                tps->i2c_protocol = true;
 
-       /* Make sure the controller has application firmware running */
-       ret = tps6598x_check_mode(tps);
-       if (ret)
-               return ret;
-
        if (np && of_device_is_compatible(np, "apple,cd321x")) {
                /* Switch CD321X chips to the correct system power state */
                ret = cd321x_switch_power_state(tps, TPS_SYSTEM_POWER_STATE_S0);
@@ -742,24 +738,27 @@ static int tps6598x_probe(struct i2c_client *client)
                        return ret;
 
                /* CD321X chips have all interrupts masked initially */
-               ret = tps6598x_write64(tps, TPS_REG_INT_MASK1,
-                                       APPLE_CD_REG_INT_POWER_STATUS_UPDATE |
-                                       APPLE_CD_REG_INT_DATA_STATUS_UPDATE |
-                                       APPLE_CD_REG_INT_PLUG_EVENT);
-               if (ret)
-                       return ret;
+               mask1 = APPLE_CD_REG_INT_POWER_STATUS_UPDATE |
+                       APPLE_CD_REG_INT_DATA_STATUS_UPDATE |
+                       APPLE_CD_REG_INT_PLUG_EVENT;
 
                irq_handler = cd321x_interrupt;
        } else {
                /* Enable power status, data status and plug event interrupts */
-               ret = tps6598x_write64(tps, TPS_REG_INT_MASK1,
-                                      TPS_REG_INT_POWER_STATUS_UPDATE |
-                                      TPS_REG_INT_DATA_STATUS_UPDATE |
-                                      TPS_REG_INT_PLUG_EVENT);
-               if (ret)
-                       return ret;
+               mask1 = TPS_REG_INT_POWER_STATUS_UPDATE |
+                       TPS_REG_INT_DATA_STATUS_UPDATE |
+                       TPS_REG_INT_PLUG_EVENT;
        }
 
+       /* Make sure the controller has application firmware running */
+       ret = tps6598x_check_mode(tps);
+       if (ret)
+               return ret;
+
+       ret = tps6598x_write64(tps, TPS_REG_INT_MASK1, mask1);
+       if (ret)
+               return ret;
+
        ret = tps6598x_read32(tps, TPS_REG_STATUS, &status);
        if (ret < 0)
                return ret;
index 5f484ff..41b0cd1 100644 (file)
@@ -591,8 +591,11 @@ static void vdpasim_free(struct vdpa_device *vdpa)
                vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov);
        }
 
-       put_iova_domain(&vdpasim->iova);
-       iova_cache_put();
+       if (vdpa_get_dma_dev(vdpa)) {
+               put_iova_domain(&vdpasim->iova);
+               iova_cache_put();
+       }
+
        kvfree(vdpasim->buffer);
        if (vdpasim->iommu)
                vhost_iotlb_free(vdpasim->iommu);
index 56cd551..362f91e 100644 (file)
@@ -98,7 +98,8 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
                        version = cpu_to_le16(0x0201);
 
                if (igd_opregion_shift_copy(buf, &off,
-                                           &version + (pos - OPREGION_VERSION),
+                                           (u8 *)&version +
+                                           (pos - OPREGION_VERSION),
                                            &pos, &remaining, bytes))
                        return -EFAULT;
        }
@@ -121,7 +122,7 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
                                          OPREGION_SIZE : 0);
 
                if (igd_opregion_shift_copy(buf, &off,
-                                           &rvda + (pos - OPREGION_RVDA),
+                                           (u8 *)&rvda + (pos - OPREGION_RVDA),
                                            &pos, &remaining, bytes))
                        return -EFAULT;
        }
index 82fb754..735d1d3 100644 (file)
@@ -232,7 +232,7 @@ static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
 }
 #endif /* CONFIG_VFIO_NOIOMMU */
 
-/**
+/*
  * IOMMU driver registration
  */
 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
@@ -285,7 +285,7 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
                                     unsigned long action, void *data);
 static void vfio_group_get(struct vfio_group *group);
 
-/**
+/*
  * Container objects - containers are created when /dev/vfio/vfio is
  * opened, but their lifecycle extends until the last user is done, so
  * it's freed via kref.  Must support container/group/device being
@@ -309,7 +309,7 @@ static void vfio_container_put(struct vfio_container *container)
        kref_put(&container->kref, vfio_container_release);
 }
 
-/**
+/*
  * Group objects - create, release, get, put, search
  */
 static struct vfio_group *
@@ -488,7 +488,7 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
        return group;
 }
 
-/**
+/*
  * Device objects - create, release, get, put, search
  */
 /* Device reference always implies a group reference */
@@ -595,7 +595,7 @@ static int vfio_dev_viable(struct device *dev, void *data)
        return ret;
 }
 
-/**
+/*
  * Async device support
  */
 static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
@@ -689,7 +689,7 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
-/**
+/*
  * VFIO driver API
  */
 void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
@@ -831,7 +831,7 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device)
 }
 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
 
-/**
+/*
  * Get a reference to the vfio_device for a device.  Even if the
  * caller thinks they own the device, they could be racing with a
  * release call path, so we can't trust drvdata for the shortcut.
@@ -965,7 +965,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
 }
 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
 
-/**
+/*
  * VFIO base fd, /dev/vfio/vfio
  */
 static long vfio_ioctl_check_extension(struct vfio_container *container,
@@ -1183,7 +1183,7 @@ static const struct file_operations vfio_fops = {
        .compat_ioctl   = compat_ptr_ioctl,
 };
 
-/**
+/*
  * VFIO Group fd, /dev/vfio/$GROUP
  */
 static void __vfio_group_unset_container(struct vfio_group *group)
@@ -1536,7 +1536,7 @@ static const struct file_operations vfio_group_fops = {
        .release        = vfio_group_fops_release,
 };
 
-/**
+/*
  * VFIO Device fd
  */
 static int vfio_device_fops_release(struct inode *inode, struct file *filep)
@@ -1611,7 +1611,7 @@ static const struct file_operations vfio_device_fops = {
        .mmap           = vfio_device_fops_mmap,
 };
 
-/**
+/*
  * External user API, exported by symbols to be linked dynamically.
  *
  * The protocol includes:
@@ -1659,7 +1659,7 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
 }
 EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
 
-/**
+/*
  * External user API, exported by symbols to be linked dynamically.
  * The external user passes in a device pointer
  * to verify that:
@@ -1725,7 +1725,7 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
 }
 EXPORT_SYMBOL_GPL(vfio_external_check_extension);
 
-/**
+/*
  * Sub-module support
  */
 /*
@@ -2272,7 +2272,7 @@ struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group)
 }
 EXPORT_SYMBOL_GPL(vfio_group_iommu_domain);
 
-/**
+/*
  * Module/class support
  */
 static char *vfio_devnode(struct device *dev, umode_t *mode)
index 01c59ce..29cced1 100644 (file)
@@ -1014,12 +1014,12 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep)
 
        mutex_lock(&d->mutex);
        filep->private_data = NULL;
+       vhost_vdpa_clean_irq(v);
        vhost_vdpa_reset(v);
        vhost_dev_stop(&v->vdev);
        vhost_vdpa_iotlb_free(v);
        vhost_vdpa_free_domain(v);
        vhost_vdpa_config_put(v);
-       vhost_vdpa_clean_irq(v);
        vhost_dev_cleanup(&v->vdev);
        kfree(v->vdev.vqs);
        mutex_unlock(&d->mutex);
index 938aefb..d6ca1c7 100644 (file)
@@ -511,8 +511,6 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
 
        vhost_disable_notify(&vsock->dev, vq);
        do {
-               u32 len;
-
                if (!vhost_vsock_more_replies(vsock)) {
                        /* Stop tx until the device processes already
                         * pending replies.  Leave tx virtqueue
@@ -540,7 +538,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
                        continue;
                }
 
-               len = pkt->len;
+               total_len += sizeof(pkt->hdr) + pkt->len;
 
                /* Deliver to monitoring devices all received packets */
                virtio_transport_deliver_tap_pkt(pkt);
@@ -553,9 +551,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
                else
                        virtio_transport_free_pkt(pkt);
 
-               len += sizeof(pkt->hdr);
-               vhost_add_used(vq, head, len);
-               total_len += len;
+               vhost_add_used(vq, head, 0);
                added = true;
        } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
 
index ef9c57c..9a49ea6 100644 (file)
@@ -366,11 +366,17 @@ static void vgacon_init(struct vc_data *c, int init)
        struct uni_pagedir *p;
 
        /*
-        * We cannot be loaded as a module, therefore init is always 1,
-        * but vgacon_init can be called more than once, and init will
-        * not be 1.
+        * We cannot be loaded as a module, therefore init will be 1
+        * if we are the default console, however if we are a fallback
+        * console, for example if fbcon has failed registration, then
+        * init will be 0, so we need to make sure our boot parameters
+        * have been copied to the console structure for vgacon_resize
+        * ultimately called by vc_resize.  Any subsequent calls to
+        * vgacon_init init will have init set to 0 too.
         */
        c->vc_can_do_color = vga_can_do_color;
+       c->vc_scan_lines = vga_scan_lines;
+       c->vc_font.height = c->vc_cell_height = vga_video_font_height;
 
        /* set dimensions manually if init != 0 since vc_resize() will fail */
        if (init) {
@@ -379,8 +385,6 @@ static void vgacon_init(struct vc_data *c, int init)
        } else
                vc_resize(c, vga_video_num_columns, vga_video_num_lines);
 
-       c->vc_scan_lines = vga_scan_lines;
-       c->vc_font.height = c->vc_cell_height = vga_video_font_height;
        c->vc_complement_mask = 0x7700;
        if (vga_512_chars)
                c->vc_hi_font_mask = 0x0800;
index 5ec5144..6826f98 100644 (file)
@@ -695,6 +695,7 @@ static struct xenbus_driver xenfb_driver = {
        .remove = xenfb_remove,
        .resume = xenfb_resume,
        .otherend_changed = xenfb_backend_changed,
+       .not_essential = true,
 };
 
 static int __init xenfb_init(void)
index 00f64f2..6d2614e 100644 (file)
@@ -14,9 +14,6 @@
 #include <linux/spinlock.h>
 #include <xen/xen.h>
 
-static bool force_used_validation = false;
-module_param(force_used_validation, bool, 0444);
-
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
 #define BAD_RING(_vq, fmt, args...)                            \
@@ -185,9 +182,6 @@ struct vring_virtqueue {
                } packed;
        };
 
-       /* Per-descriptor in buffer length */
-       u32 *buflen;
-
        /* How to notify other side. FIXME: commonalize hcalls! */
        bool (*notify)(struct virtqueue *vq);
 
@@ -496,7 +490,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
        unsigned int i, n, avail, descs_used, prev, err_idx;
        int head;
        bool indirect;
-       u32 buflen = 0;
 
        START_USE(vq);
 
@@ -578,7 +571,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
                                                     VRING_DESC_F_NEXT |
                                                     VRING_DESC_F_WRITE,
                                                     indirect);
-                       buflen += sg->length;
                }
        }
        /* Last one doesn't continue. */
@@ -618,10 +610,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
        else
                vq->split.desc_state[head].indir_desc = ctx;
 
-       /* Store in buffer length if necessary */
-       if (vq->buflen)
-               vq->buflen[head] = buflen;
-
        /* Put entry in available array (but don't update avail->idx until they
         * do sync). */
        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -796,11 +784,6 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
                BAD_RING(vq, "id %u is not a head!\n", i);
                return NULL;
        }
-       if (vq->buflen && unlikely(*len > vq->buflen[i])) {
-               BAD_RING(vq, "used len %d is larger than in buflen %u\n",
-                       *len, vq->buflen[i]);
-               return NULL;
-       }
 
        /* detach_buf_split clears data, so grab it now. */
        ret = vq->split.desc_state[i].data;
@@ -1079,7 +1062,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
        unsigned int i, n, err_idx;
        u16 head, id;
        dma_addr_t addr;
-       u32 buflen = 0;
 
        head = vq->packed.next_avail_idx;
        desc = alloc_indirect_packed(total_sg, gfp);
@@ -1109,8 +1091,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
                        desc[i].addr = cpu_to_le64(addr);
                        desc[i].len = cpu_to_le32(sg->length);
                        i++;
-                       if (n >= out_sgs)
-                               buflen += sg->length;
                }
        }
 
@@ -1164,10 +1144,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
        vq->packed.desc_state[id].indir_desc = desc;
        vq->packed.desc_state[id].last = id;
 
-       /* Store in buffer length if necessary */
-       if (vq->buflen)
-               vq->buflen[id] = buflen;
-
        vq->num_added += 1;
 
        pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -1203,7 +1179,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
        __le16 head_flags, flags;
        u16 head, id, prev, curr, avail_used_flags;
        int err;
-       u32 buflen = 0;
 
        START_USE(vq);
 
@@ -1283,8 +1258,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
                                        1 << VRING_PACKED_DESC_F_AVAIL |
                                        1 << VRING_PACKED_DESC_F_USED;
                        }
-                       if (n >= out_sgs)
-                               buflen += sg->length;
                }
        }
 
@@ -1304,10 +1277,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
        vq->packed.desc_state[id].indir_desc = ctx;
        vq->packed.desc_state[id].last = prev;
 
-       /* Store in buffer length if necessary */
-       if (vq->buflen)
-               vq->buflen[id] = buflen;
-
        /*
         * A driver MUST NOT make the first descriptor in the list
         * available before all subsequent descriptors comprising
@@ -1494,11 +1463,6 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
                BAD_RING(vq, "id %u is not a head!\n", id);
                return NULL;
        }
-       if (vq->buflen && unlikely(*len > vq->buflen[id])) {
-               BAD_RING(vq, "used len %d is larger than in buflen %u\n",
-                       *len, vq->buflen[id]);
-               return NULL;
-       }
 
        /* detach_buf_packed clears data, so grab it now. */
        ret = vq->packed.desc_state[id].data;
@@ -1704,7 +1668,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
        struct vring_virtqueue *vq;
        struct vring_packed_desc *ring;
        struct vring_packed_desc_event *driver, *device;
-       struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
        dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
        size_t ring_size_in_bytes, event_size_in_bytes;
 
@@ -1794,15 +1757,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
        if (!vq->packed.desc_extra)
                goto err_desc_extra;
 
-       if (!drv->suppress_used_validation || force_used_validation) {
-               vq->buflen = kmalloc_array(num, sizeof(*vq->buflen),
-                                          GFP_KERNEL);
-               if (!vq->buflen)
-                       goto err_buflen;
-       } else {
-               vq->buflen = NULL;
-       }
-
        /* No callback?  Tell other side not to bother us. */
        if (!callback) {
                vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
@@ -1815,8 +1769,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
        spin_unlock(&vdev->vqs_list_lock);
        return &vq->vq;
 
-err_buflen:
-       kfree(vq->packed.desc_extra);
 err_desc_extra:
        kfree(vq->packed.desc_state);
 err_desc_state:
@@ -2224,7 +2176,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
                                        void (*callback)(struct virtqueue *),
                                        const char *name)
 {
-       struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
        struct vring_virtqueue *vq;
 
        if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
@@ -2284,15 +2235,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
        if (!vq->split.desc_extra)
                goto err_extra;
 
-       if (!drv->suppress_used_validation || force_used_validation) {
-               vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen),
-                                          GFP_KERNEL);
-               if (!vq->buflen)
-                       goto err_buflen;
-       } else {
-               vq->buflen = NULL;
-       }
-
        /* Put everything in free lists. */
        vq->free_head = 0;
        memset(vq->split.desc_state, 0, vring.num *
@@ -2303,8 +2245,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
        spin_unlock(&vdev->vqs_list_lock);
        return &vq->vq;
 
-err_buflen:
-       kfree(vq->split.desc_extra);
 err_extra:
        kfree(vq->split.desc_state);
 err_state:
index a1b11c6..33e941e 100644 (file)
@@ -259,9 +259,15 @@ config XEN_SCSI_BACKEND
          if guests need generic access to SCSI devices.
 
 config XEN_PRIVCMD
-       tristate
+       tristate "Xen hypercall passthrough driver"
        depends on XEN
        default m
+       help
+         The hypercall passthrough driver allows privileged user programs to
+         perform Xen hypercalls. This driver is normally required for systems
+         running as Dom0 to perform privileged operations, but in some
+         disaggregated Xen setups this driver might be needed for other
+         domains, too.
 
 config XEN_ACPI_PROCESSOR
        tristate "Xen ACPI processor"
index 7984645..3c9ae15 100644 (file)
@@ -1275,6 +1275,7 @@ static struct xenbus_driver pvcalls_front_driver = {
        .probe = pvcalls_front_probe,
        .remove = pvcalls_front_remove,
        .otherend_changed = pvcalls_front_changed,
+       .not_essential = true,
 };
 
 static int __init pvcalls_frontend_init(void)
index bd003ca..fe360c3 100644 (file)
@@ -909,7 +909,7 @@ static struct notifier_block xenbus_resume_nb = {
 
 static int __init xenbus_init(void)
 {
-       int err = 0;
+       int err;
        uint64_t v = 0;
        xen_store_domain_type = XS_UNKNOWN;
 
@@ -949,6 +949,29 @@ static int __init xenbus_init(void)
                err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
                if (err)
                        goto out_error;
+               /*
+                * Uninitialized hvm_params are zero and return no error.
+                * Although it is theoretically possible to have
+                * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is
+                * not zero when valid. If zero, it means that Xenstore hasn't
+                * been properly initialized. Instead of attempting to map a
+                * wrong guest physical address return error.
+                *
+                * Also recognize all bits set as an invalid value.
+                */
+               if (!v || !~v) {
+                       err = -ENOENT;
+                       goto out_error;
+               }
+               /* Avoid truncation on 32-bit. */
+#if BITS_PER_LONG == 32
+               if (v > ULONG_MAX) {
+                       pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n",
+                              __func__, v);
+                       err = -EINVAL;
+                       goto out_error;
+               }
+#endif
                xen_store_gfn = (unsigned long)v;
                xen_store_interface =
                        xen_remap(xen_store_gfn << XEN_PAGE_SHIFT,
@@ -983,8 +1006,10 @@ static int __init xenbus_init(void)
         */
        proc_create_mount_point("xen");
 #endif
+       return 0;
 
 out_error:
+       xen_store_domain_type = XS_UNKNOWN;
        return err;
 }
 
index 4809446..07b010a 100644 (file)
@@ -211,19 +211,11 @@ static int is_device_connecting(struct device *dev, void *data, bool ignore_none
        if (drv && (dev->driver != drv))
                return 0;
 
-       if (ignore_nonessential) {
-               /* With older QEMU, for PVonHVM guests the guest config files
-                * could contain: vfb = [ 'vnc=1, vnclisten=0.0.0.0']
-                * which is nonsensical as there is no PV FB (there can be
-                * a PVKB) running as HVM guest. */
+       xendrv = to_xenbus_driver(dev->driver);
 
-               if ((strncmp(xendev->nodename, "device/vkbd", 11) == 0))
-                       return 0;
+       if (ignore_nonessential && xendrv->not_essential)
+               return 0;
 
-               if ((strncmp(xendev->nodename, "device/vfb", 10) == 0))
-                       return 0;
-       }
-       xendrv = to_xenbus_driver(dev->driver);
        return (xendev->state < XenbusStateConnected ||
                (xendev->state == XenbusStateConnected &&
                 xendrv->is_ready && !xendrv->is_ready(xendev)));
index 9febb80..0fb90cb 100644 (file)
@@ -290,6 +290,8 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
        *total_out = cur_out;
        *total_in = cur_in - start;
 out:
+       if (page_in)
+               put_page(page_in);
        *out_pages = DIV_ROUND_UP(cur_out, PAGE_SIZE);
        return ret;
 }
index b50da19..9e5d9e1 100644 (file)
@@ -152,5 +152,5 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.33"
+#define CIFS_VERSION   "2.34"
 #endif                         /* _CIFSFS_H */
index 67e4c55..18448db 100644 (file)
@@ -1271,10 +1271,8 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *
 {
        struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr;
 
-       if (ctx->nosharesock) {
-               server->nosharesock = true;
+       if (ctx->nosharesock)
                return 0;
-       }
 
        /* this server does not share socket */
        if (server->nosharesock)
@@ -1438,6 +1436,9 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx,
                goto out_err;
        }
 
+       if (ctx->nosharesock)
+               tcp_ses->nosharesock = true;
+
        tcp_ses->ops = ctx->ops;
        tcp_ses->vals = ctx->vals;
        cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
@@ -1561,6 +1562,10 @@ smbd_connected:
        /* fscache server cookies are based on primary channel only */
        if (!CIFS_SERVER_IS_CHAN(tcp_ses))
                cifs_fscache_get_client_cookie(tcp_ses);
+#ifdef CONFIG_CIFS_FSCACHE
+       else
+               tcp_ses->fscache = tcp_ses->primary_server->fscache;
+#endif /* CONFIG_CIFS_FSCACHE */
 
        /* queue echo request delayed work */
        queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
@@ -3045,12 +3050,6 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx)
                                cifs_dbg(VFS, "read only mount of RW share\n");
                        /* no need to log a RW mount of a typical RW share */
                }
-               /*
-                * The cookie is initialized from volume info returned above.
-                * Inside cifs_fscache_get_super_cookie it checks
-                * that we do not get super cookie twice.
-                */
-               cifs_fscache_get_super_cookie(tcon);
        }
 
        /*
@@ -3425,6 +3424,7 @@ static int connect_dfs_root(struct mount_ctx *mnt_ctx, struct dfs_cache_tgt_list
         */
        mount_put_conns(mnt_ctx);
        mount_get_dfs_conns(mnt_ctx);
+       set_root_ses(mnt_ctx);
 
        full_path = build_unc_path_to_root(ctx, cifs_sb, true);
        if (IS_ERR(full_path))
index 7e409a3..003c5f1 100644 (file)
  * Key layout of CIFS server cache index object
  */
 struct cifs_server_key {
-       struct {
-               uint16_t        family;         /* address family */
-               __be16          port;           /* IP port */
-       } hdr;
-       union {
-               struct in_addr  ipv4_addr;
-               struct in6_addr ipv6_addr;
-       };
+       __u64 conn_id;
 } __packed;
 
 /*
@@ -31,42 +24,23 @@ struct cifs_server_key {
  */
 void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
 {
-       const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr;
-       const struct sockaddr_in *addr = (struct sockaddr_in *) sa;
-       const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa;
        struct cifs_server_key key;
-       uint16_t key_len = sizeof(key.hdr);
-
-       memset(&key, 0, sizeof(key));
 
        /*
-        * Should not be a problem as sin_family/sin6_family overlays
-        * sa_family field
+        * Check if cookie was already initialized so don't reinitialize it.
+        * In the future, as we integrate with newer fscache features,
+        * we may want to instead add a check if cookie has changed
         */
-       key.hdr.family = sa->sa_family;
-       switch (sa->sa_family) {
-       case AF_INET:
-               key.hdr.port = addr->sin_port;
-               key.ipv4_addr = addr->sin_addr;
-               key_len += sizeof(key.ipv4_addr);
-               break;
-
-       case AF_INET6:
-               key.hdr.port = addr6->sin6_port;
-               key.ipv6_addr = addr6->sin6_addr;
-               key_len += sizeof(key.ipv6_addr);
-               break;
-
-       default:
-               cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
-               server->fscache = NULL;
+       if (server->fscache)
                return;
-       }
+
+       memset(&key, 0, sizeof(key));
+       key.conn_id = server->conn_id;
 
        server->fscache =
                fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
                                       &cifs_fscache_server_index_def,
-                                      &key, key_len,
+                                      &key, sizeof(key),
                                       NULL, 0,
                                       server, 0, true);
        cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
@@ -92,7 +66,7 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
         * In the future, as we integrate with newer fscache features,
         * we may want to instead add a check if cookie has changed
         */
-       if (tcon->fscache == NULL)
+       if (tcon->fscache)
                return;
 
        sharename = extract_sharename(tcon->treeName);
index 8284841..96d083d 100644 (file)
@@ -1376,6 +1376,13 @@ iget_no_retry:
                inode = ERR_PTR(rc);
        }
 
+       /*
+        * The cookie is initialized from volume info returned above.
+        * Inside cifs_fscache_get_super_cookie it checks
+        * that we do not get super cookie twice.
+        */
+       cifs_fscache_get_super_cookie(tcon);
+
 out:
        kfree(path);
        free_xid(xid);
index 8ad2993..af63548 100644 (file)
@@ -222,6 +222,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
        /* Auth */
        ctx.domainauto = ses->domainAuto;
        ctx.domainname = ses->domainName;
+       ctx.server_hostname = ses->server->hostname;
        ctx.username = ses->user_name;
        ctx.password = ses->password;
        ctx.sectype = ses->sectype;
index 2f5f2c4..8b36703 100644 (file)
@@ -142,7 +142,7 @@ static int
 smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
               struct TCP_Server_Info *server)
 {
-       int rc;
+       int rc = 0;
        struct nls_table *nls_codepage;
        struct cifs_ses *ses;
        int retries;
index 84da2c2..ec9a1d7 100644 (file)
@@ -150,7 +150,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
         * however in order to avoid some race conditions, add a
         * DBG_BUGON to observe this in advance.
         */
-       DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp);
+       DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
 
        /* last refcount should be connected with its managed pslot.  */
        erofs_workgroup_unfreeze(grp, 0);
@@ -165,15 +165,19 @@ static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
        unsigned int freed = 0;
        unsigned long index;
 
+       xa_lock(&sbi->managed_pslots);
        xa_for_each(&sbi->managed_pslots, index, grp) {
                /* try to shrink each valid workgroup */
                if (!erofs_try_to_release_workgroup(sbi, grp))
                        continue;
+               xa_unlock(&sbi->managed_pslots);
 
                ++freed;
                if (!--nr_shrink)
-                       break;
+                       return freed;
+               xa_lock(&sbi->managed_pslots);
        }
+       xa_unlock(&sbi->managed_pslots);
        return freed;
 }
 
index 8627dac..ad4a8bf 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -858,6 +858,10 @@ loop:
                        file = NULL;
                else if (!get_file_rcu_many(file, refs))
                        goto loop;
+               else if (files_lookup_fd_raw(files, fd) != file) {
+                       fput_many(file, refs);
+                       goto loop;
+               }
        }
        rcu_read_unlock();
 
index 79f7eda..cd54a52 100644 (file)
@@ -847,17 +847,17 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 
        replace_page_cache_page(oldpage, newpage);
 
+       get_page(newpage);
+
+       if (!(buf->flags & PIPE_BUF_FLAG_LRU))
+               lru_cache_add(newpage);
+
        /*
         * Release while we have extra ref on stolen page.  Otherwise
         * anon_pipe_buf_release() might think the page can be reused.
         */
        pipe_buf_release(cs->pipe, buf);
 
-       get_page(newpage);
-
-       if (!(buf->flags & PIPE_BUF_FLAG_LRU))
-               lru_cache_add(newpage);
-
        err = 0;
        spin_lock(&cs->req->waitq.lock);
        if (test_bit(FR_ABORTED, &cs->req->flags))
index 8dbd6fe..44a7a42 100644 (file)
@@ -1857,7 +1857,6 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
 
 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
 {
-       struct gfs2_holder mock_gh = { .gh_gl = gl, .gh_state = state, };
        unsigned long delay = 0;
        unsigned long holdtime;
        unsigned long now = jiffies;
@@ -1890,8 +1889,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
         * keep the glock until the last strong holder is done with it.
         */
        if (!find_first_strong_holder(gl)) {
-               if (state == LM_ST_UNLOCKED)
-                       mock_gh.gh_state = LM_ST_EXCLUSIVE;
+               struct gfs2_holder mock_gh = {
+                       .gh_gl = gl,
+                       .gh_state = (state == LM_ST_UNLOCKED) ?
+                                   LM_ST_EXCLUSIVE : state,
+                       .gh_iflags = BIT(HIF_HOLDER)
+               };
+
                demote_incompat_holders(gl, &mock_gh);
        }
        handle_callback(gl, state, delay, true);
index 6424b90..89905f4 100644 (file)
@@ -40,37 +40,6 @@ static const struct inode_operations gfs2_file_iops;
 static const struct inode_operations gfs2_dir_iops;
 static const struct inode_operations gfs2_symlink_iops;
 
-static int iget_test(struct inode *inode, void *opaque)
-{
-       u64 no_addr = *(u64 *)opaque;
-
-       return GFS2_I(inode)->i_no_addr == no_addr;
-}
-
-static int iget_set(struct inode *inode, void *opaque)
-{
-       u64 no_addr = *(u64 *)opaque;
-
-       GFS2_I(inode)->i_no_addr = no_addr;
-       inode->i_ino = no_addr;
-       return 0;
-}
-
-static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
-{
-       struct inode *inode;
-
-repeat:
-       inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr);
-       if (!inode)
-               return inode;
-       if (is_bad_inode(inode)) {
-               iput(inode);
-               goto repeat;
-       }
-       return inode;
-}
-
 /**
  * gfs2_set_iop - Sets inode operations
  * @inode: The inode with correct i_mode filled in
@@ -104,6 +73,22 @@ static void gfs2_set_iop(struct inode *inode)
        }
 }
 
+static int iget_test(struct inode *inode, void *opaque)
+{
+       u64 no_addr = *(u64 *)opaque;
+
+       return GFS2_I(inode)->i_no_addr == no_addr;
+}
+
+static int iget_set(struct inode *inode, void *opaque)
+{
+       u64 no_addr = *(u64 *)opaque;
+
+       GFS2_I(inode)->i_no_addr = no_addr;
+       inode->i_ino = no_addr;
+       return 0;
+}
+
 /**
  * gfs2_inode_lookup - Lookup an inode
  * @sb: The super block
@@ -132,12 +117,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 {
        struct inode *inode;
        struct gfs2_inode *ip;
-       struct gfs2_glock *io_gl = NULL;
        struct gfs2_holder i_gh;
        int error;
 
        gfs2_holder_mark_uninitialized(&i_gh);
-       inode = gfs2_iget(sb, no_addr);
+       inode = iget5_locked(sb, no_addr, iget_test, iget_set, &no_addr);
        if (!inode)
                return ERR_PTR(-ENOMEM);
 
@@ -145,22 +129,16 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 
        if (inode->i_state & I_NEW) {
                struct gfs2_sbd *sdp = GFS2_SB(inode);
+               struct gfs2_glock *io_gl;
 
                error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
                if (unlikely(error))
                        goto fail;
-               flush_delayed_work(&ip->i_gl->gl_work);
-
-               error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
-               if (unlikely(error))
-                       goto fail;
-               if (blktype != GFS2_BLKST_UNLINKED)
-                       gfs2_cancel_delete_work(io_gl);
 
                if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) {
                        /*
                         * The GL_SKIP flag indicates to skip reading the inode
-                        * block.  We read the inode with gfs2_inode_refresh
+                        * block.  We read the inode when instantiating it
                         * after possibly checking the block type.
                         */
                        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
@@ -181,24 +159,31 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
                        }
                }
 
-               glock_set_object(ip->i_gl, ip);
                set_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags);
-               error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
+
+               error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
                if (unlikely(error))
                        goto fail;
-               glock_set_object(ip->i_iopen_gh.gh_gl, ip);
+               if (blktype != GFS2_BLKST_UNLINKED)
+                       gfs2_cancel_delete_work(io_gl);
+               error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
                gfs2_glock_put(io_gl);
-               io_gl = NULL;
+               if (unlikely(error))
+                       goto fail;
 
                /* Lowest possible timestamp; will be overwritten in gfs2_dinode_in. */
                inode->i_atime.tv_sec = 1LL << (8 * sizeof(inode->i_atime.tv_sec) - 1);
                inode->i_atime.tv_nsec = 0;
 
+               glock_set_object(ip->i_gl, ip);
+
                if (type == DT_UNKNOWN) {
                        /* Inode glock must be locked already */
                        error = gfs2_instantiate(&i_gh);
-                       if (error)
+                       if (error) {
+                               glock_clear_object(ip->i_gl, ip);
                                goto fail;
+                       }
                } else {
                        ip->i_no_formal_ino = no_formal_ino;
                        inode->i_mode = DT2IF(type);
@@ -206,31 +191,23 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 
                if (gfs2_holder_initialized(&i_gh))
                        gfs2_glock_dq_uninit(&i_gh);
+               glock_set_object(ip->i_iopen_gh.gh_gl, ip);
 
                gfs2_set_iop(inode);
+               unlock_new_inode(inode);
        }
 
        if (no_formal_ino && ip->i_no_formal_ino &&
            no_formal_ino != ip->i_no_formal_ino) {
-               error = -ESTALE;
-               if (inode->i_state & I_NEW)
-                       goto fail;
                iput(inode);
-               return ERR_PTR(error);
+               return ERR_PTR(-ESTALE);
        }
 
-       if (inode->i_state & I_NEW)
-               unlock_new_inode(inode);
-
        return inode;
 
 fail:
-       if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
-               glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
+       if (gfs2_holder_initialized(&ip->i_iopen_gh))
                gfs2_glock_dq_uninit(&ip->i_iopen_gh);
-       }
-       if (io_gl)
-               gfs2_glock_put(io_gl);
        if (gfs2_holder_initialized(&i_gh))
                gfs2_glock_dq_uninit(&i_gh);
        iget_failed(inode);
@@ -730,18 +707,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
        if (error)
                goto fail_free_inode;
-       flush_delayed_work(&ip->i_gl->gl_work);
 
        error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
        if (error)
                goto fail_free_inode;
        gfs2_cancel_delete_work(io_gl);
 
+       error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr);
+       BUG_ON(error);
+
        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
        if (error)
                goto fail_gunlock2;
 
-       glock_set_object(ip->i_gl, ip);
        error = gfs2_trans_begin(sdp, blocks, 0);
        if (error)
                goto fail_gunlock2;
@@ -757,9 +735,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        if (error)
                goto fail_gunlock2;
 
+       glock_set_object(ip->i_gl, ip);
        glock_set_object(io_gl, ip);
        gfs2_set_iop(inode);
-       insert_inode_hash(inode);
 
        free_vfs_inode = 0; /* After this point, the inode is no longer
                               considered free. Any failures need to undo
@@ -801,17 +779,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        gfs2_glock_dq_uninit(ghs + 1);
        gfs2_glock_put(io_gl);
        gfs2_qa_put(dip);
+       unlock_new_inode(inode);
        return error;
 
 fail_gunlock3:
+       glock_clear_object(ip->i_gl, ip);
        glock_clear_object(io_gl, ip);
        gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 fail_gunlock2:
-       glock_clear_object(io_gl, ip);
        gfs2_glock_put(io_gl);
 fail_free_inode:
        if (ip->i_gl) {
-               glock_clear_object(ip->i_gl, ip);
                if (free_vfs_inode) /* else evict will do the put for us */
                        gfs2_glock_put(ip->i_gl);
        }
@@ -829,7 +807,10 @@ fail_gunlock:
                        mark_inode_dirty(inode);
                set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED,
                        &GFS2_I(inode)->i_flags);
-               iput(inode);
+               if (inode->i_state & I_NEW)
+                       iget_failed(inode);
+               else
+                       iput(inode);
        }
        if (gfs2_holder_initialized(ghs + 1))
                gfs2_glock_dq_uninit(ghs + 1);
index 3eba094..6b80a51 100644 (file)
@@ -180,8 +180,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        mapping->a_ops = &empty_aops;
        mapping->host = inode;
        mapping->flags = 0;
-       if (sb->s_type->fs_flags & FS_THP_SUPPORT)
-               __set_bit(AS_THP_SUPPORT, &mapping->flags);
        mapping->wb_err = 0;
        atomic_set(&mapping->i_mmap_writable, 0);
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
index 88202de..50cf9f9 100644 (file)
@@ -714,6 +714,13 @@ static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
 
 static inline bool io_should_retry_thread(long err)
 {
+       /*
+        * Prevent perpetual task_work retry, if the task (or its group) is
+        * exiting.
+        */
+       if (fatal_signal_pending(current))
+               return false;
+
        switch (err) {
        case -EAGAIN:
        case -ERESTARTSYS:
index b07196b..c4f2176 100644 (file)
@@ -1278,6 +1278,7 @@ static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl)
 
 static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
                          bool cancel_all)
+       __must_hold(&req->ctx->timeout_lock)
 {
        struct io_kiocb *req;
 
@@ -1293,6 +1294,44 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
        return false;
 }
 
+static bool io_match_linked(struct io_kiocb *head)
+{
+       struct io_kiocb *req;
+
+       io_for_each_link(req, head) {
+               if (req->flags & REQ_F_INFLIGHT)
+                       return true;
+       }
+       return false;
+}
+
+/*
+ * As io_match_task() but protected against racing with linked timeouts.
+ * User must not hold timeout_lock.
+ */
+static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
+                              bool cancel_all)
+{
+       bool matched;
+
+       if (task && head->task != task)
+               return false;
+       if (cancel_all)
+               return true;
+
+       if (head->flags & REQ_F_LINK_TIMEOUT) {
+               struct io_ring_ctx *ctx = head->ctx;
+
+               /* protect against races with linked timeouts */
+               spin_lock_irq(&ctx->timeout_lock);
+               matched = io_match_linked(head);
+               spin_unlock_irq(&ctx->timeout_lock);
+       } else {
+               matched = io_match_linked(head);
+       }
+       return matched;
+}
+
 static inline bool req_has_async_data(struct io_kiocb *req)
 {
        return req->flags & REQ_F_ASYNC_DATA;
@@ -1502,10 +1541,10 @@ static void io_prep_async_link(struct io_kiocb *req)
        if (req->flags & REQ_F_LINK_TIMEOUT) {
                struct io_ring_ctx *ctx = req->ctx;
 
-               spin_lock(&ctx->completion_lock);
+               spin_lock_irq(&ctx->timeout_lock);
                io_for_each_link(cur, req)
                        io_prep_async_work(cur);
-               spin_unlock(&ctx->completion_lock);
+               spin_unlock_irq(&ctx->timeout_lock);
        } else {
                io_for_each_link(cur, req)
                        io_prep_async_work(cur);
@@ -4327,6 +4366,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
                kfree(nxt);
                if (++i == nbufs)
                        return i;
+               cond_resched();
        }
        i++;
        kfree(buf);
@@ -5704,7 +5744,7 @@ static __cold bool io_poll_remove_all(struct io_ring_ctx *ctx,
 
                list = &ctx->cancel_hash[i];
                hlist_for_each_entry_safe(req, tmp, list, hash_node) {
-                       if (io_match_task(req, tsk, cancel_all))
+                       if (io_match_task_safe(req, tsk, cancel_all))
                                posted += io_poll_remove_one(req);
                }
        }
@@ -6156,6 +6196,9 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
                return -EFAULT;
 
+       if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
+               return -EINVAL;
+
        data->mode = io_translate_timeout_mode(flags);
        hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
 
@@ -6880,10 +6923,11 @@ static inline struct file *io_file_get(struct io_ring_ctx *ctx,
 static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
 {
        struct io_kiocb *prev = req->timeout.prev;
-       int ret;
+       int ret = -ENOENT;
 
        if (prev) {
-               ret = io_try_cancel_userdata(req, prev->user_data);
+               if (!(req->task->flags & PF_EXITING))
+                       ret = io_try_cancel_userdata(req, prev->user_data);
                io_req_complete_post(req, ret ?: -ETIME, 0);
                io_put_req(prev);
        } else {
@@ -9255,10 +9299,8 @@ static void io_destroy_buffers(struct io_ring_ctx *ctx)
        struct io_buffer *buf;
        unsigned long index;
 
-       xa_for_each(&ctx->io_buffers, index, buf) {
+       xa_for_each(&ctx->io_buffers, index, buf)
                __io_remove_buffers(ctx, buf, index, -1U);
-               cond_resched();
-       }
 }
 
 static void io_req_caches_free(struct io_ring_ctx *ctx)
@@ -9562,19 +9604,8 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
 {
        struct io_kiocb *req = container_of(work, struct io_kiocb, work);
        struct io_task_cancel *cancel = data;
-       bool ret;
 
-       if (!cancel->all && (req->flags & REQ_F_LINK_TIMEOUT)) {
-               struct io_ring_ctx *ctx = req->ctx;
-
-               /* protect against races with linked timeouts */
-               spin_lock(&ctx->completion_lock);
-               ret = io_match_task(req, cancel->task, cancel->all);
-               spin_unlock(&ctx->completion_lock);
-       } else {
-               ret = io_match_task(req, cancel->task, cancel->all);
-       }
-       return ret;
+       return io_match_task_safe(req, cancel->task, cancel->all);
 }
 
 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
@@ -9586,7 +9617,7 @@ static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
 
        spin_lock(&ctx->completion_lock);
        list_for_each_entry_reverse(de, &ctx->defer_list, list) {
-               if (io_match_task(de->req, task, cancel_all)) {
+               if (io_match_task_safe(de->req, task, cancel_all)) {
                        list_cut_position(&list, &ctx->defer_list, &de->list);
                        break;
                }
@@ -9764,7 +9795,7 @@ static __cold void io_uring_clean_tctx(struct io_uring_task *tctx)
        }
        if (wq) {
                /*
-                * Must be after io_uring_del_task_file() (removes nodes under
+                * Must be after io_uring_del_tctx_node() (removes nodes under
                 * uring_lock) to avoid race with io_uring_try_cancel_iowq().
                 */
                io_wq_put_and_exit(wq);
index 1753c26..71a36ae 100644 (file)
@@ -205,7 +205,16 @@ struct iomap_readpage_ctx {
        struct readahead_control *rac;
 };
 
-static loff_t iomap_read_inline_data(const struct iomap_iter *iter,
+/**
+ * iomap_read_inline_data - copy inline data into the page cache
+ * @iter: iteration structure
+ * @page: page to copy to
+ *
+ * Copy the inline data in @iter into @page and zero out the rest of the page.
+ * Only a single IOMAP_INLINE extent is allowed at the end of each file.
+ * Returns zero for success to complete the read, or the usual negative errno.
+ */
+static int iomap_read_inline_data(const struct iomap_iter *iter,
                struct page *page)
 {
        const struct iomap *iomap = iomap_iter_srcmap(iter);
@@ -214,7 +223,7 @@ static loff_t iomap_read_inline_data(const struct iomap_iter *iter,
        void *addr;
 
        if (PageUptodate(page))
-               return PAGE_SIZE - poff;
+               return 0;
 
        if (WARN_ON_ONCE(size > PAGE_SIZE - poff))
                return -EIO;
@@ -231,7 +240,7 @@ static loff_t iomap_read_inline_data(const struct iomap_iter *iter,
        memset(addr + size, 0, PAGE_SIZE - poff - size);
        kunmap_local(addr);
        iomap_set_range_uptodate(page, poff, PAGE_SIZE - poff);
-       return PAGE_SIZE - poff;
+       return 0;
 }
 
 static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter,
@@ -257,7 +266,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
        sector_t sector;
 
        if (iomap->type == IOMAP_INLINE)
-               return min(iomap_read_inline_data(iter, page), length);
+               return iomap_read_inline_data(iter, page);
 
        /* zero post-eof blocks as the page may be mapped */
        iop = iomap_page_create(iter->inode, page);
@@ -370,6 +379,8 @@ static loff_t iomap_readahead_iter(const struct iomap_iter *iter,
                        ctx->cur_page_in_bio = false;
                }
                ret = iomap_readpage_iter(iter, ctx, done);
+               if (ret <= 0)
+                       return ret;
        }
 
        return done;
@@ -580,15 +591,10 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 static int iomap_write_begin_inline(const struct iomap_iter *iter,
                struct page *page)
 {
-       int ret;
-
        /* needs more work for the tailpacking case; disable for now */
        if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0))
                return -EIO;
-       ret = iomap_read_inline_data(iter, page);
-       if (ret < 0)
-               return ret;
-       return 0;
+       return iomap_read_inline_data(iter, page);
 }
 
 static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
index 121f8e8..49c9da3 100644 (file)
@@ -1697,8 +1697,10 @@ int smb2_sess_setup(struct ksmbd_work *work)
        negblob_off = le16_to_cpu(req->SecurityBufferOffset);
        negblob_len = le16_to_cpu(req->SecurityBufferLength);
        if (negblob_off < offsetof(struct smb2_sess_setup_req, Buffer) ||
-           negblob_len < offsetof(struct negotiate_message, NegotiateFlags))
-               return -EINVAL;
+           negblob_len < offsetof(struct negotiate_message, NegotiateFlags)) {
+               rc = -EINVAL;
+               goto out_err;
+       }
 
        negblob = (struct negotiate_message *)((char *)&req->hdr.ProtocolId +
                        negblob_off);
@@ -4457,6 +4459,12 @@ static void get_file_stream_info(struct ksmbd_work *work,
                         &stat);
        file_info = (struct smb2_file_stream_info *)rsp->Buffer;
 
+       buf_free_len =
+               smb2_calc_max_out_buf_len(work, 8,
+                                         le32_to_cpu(req->OutputBufferLength));
+       if (buf_free_len < 0)
+               goto out;
+
        xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
        if (xattr_list_len < 0) {
                goto out;
@@ -4465,12 +4473,6 @@ static void get_file_stream_info(struct ksmbd_work *work,
                goto out;
        }
 
-       buf_free_len =
-               smb2_calc_max_out_buf_len(work, 8,
-                                         le32_to_cpu(req->OutputBufferLength));
-       if (buf_free_len < 0)
-               goto out;
-
        while (idx < xattr_list_len) {
                stream_name = xattr_list + idx;
                streamlen = strlen(stream_name);
@@ -4496,8 +4498,10 @@ static void get_file_stream_info(struct ksmbd_work *work,
                                     ":%s", &stream_name[XATTR_NAME_STREAM_LEN]);
 
                next = sizeof(struct smb2_file_stream_info) + streamlen * 2;
-               if (next > buf_free_len)
+               if (next > buf_free_len) {
+                       kfree(stream_buf);
                        break;
+               }
 
                file_info = (struct smb2_file_stream_info *)&rsp->Buffer[nbytes];
                streamlen  = smbConvertToUTF16((__le16 *)file_info->StreamName,
@@ -4514,6 +4518,7 @@ static void get_file_stream_info(struct ksmbd_work *work,
                file_info->NextEntryOffset = cpu_to_le32(next);
        }
 
+out:
        if (!S_ISDIR(stat.mode) &&
            buf_free_len >= sizeof(struct smb2_file_stream_info) + 7 * 2) {
                file_info = (struct smb2_file_stream_info *)
@@ -4522,14 +4527,13 @@ static void get_file_stream_info(struct ksmbd_work *work,
                                              "::$DATA", 7, conn->local_nls, 0);
                streamlen *= 2;
                file_info->StreamNameLength = cpu_to_le32(streamlen);
-               file_info->StreamSize = 0;
-               file_info->StreamAllocationSize = 0;
+               file_info->StreamSize = cpu_to_le64(stat.size);
+               file_info->StreamAllocationSize = cpu_to_le64(stat.blocks << 9);
                nbytes += sizeof(struct smb2_file_stream_info) + streamlen;
        }
 
        /* last entry offset should be 0 */
        file_info->NextEntryOffset = 0;
-out:
        kvfree(xattr_list);
 
        rsp->OutputBufferLength = cpu_to_le32(nbytes);
@@ -5068,7 +5072,7 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
        if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
                              PROTECTED_DACL_SECINFO |
                              UNPROTECTED_DACL_SECINFO)) {
-               pr_err("Unsupported addition info: 0x%x)\n",
+               ksmbd_debug(SMB, "Unsupported addition info: 0x%x)\n",
                       addition_info);
 
                pntsd->revision = cpu_to_le16(1);
index 9320a42..7046f9b 100644 (file)
@@ -1008,8 +1008,8 @@ out:
 }
 EXPORT_SYMBOL(netfs_readpage);
 
-/**
- * netfs_skip_folio_read - prep a folio for writing without reading first
+/*
+ * Prepare a folio for writing without reading first
  * @folio: The folio being prepared
  * @pos: starting position for the write
  * @len: length of write
index dd53704..fda530d 100644 (file)
@@ -219,6 +219,7 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
                                          NFS_INO_DATA_INVAL_DEFER);
        else if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
                nfsi->cache_validity &= ~NFS_INO_DATA_INVAL_DEFER;
+       trace_nfs_set_cache_invalid(inode, 0);
 }
 EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
 
index 08355b6..8b21ff1 100644 (file)
@@ -289,7 +289,9 @@ static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len)
        loff_t newsize = pos + len;
        loff_t end = newsize - 1;
 
-       truncate_pagecache_range(inode, pos, end);
+       WARN_ON_ONCE(invalidate_inode_pages2_range(inode->i_mapping,
+                               pos >> PAGE_SHIFT, end >> PAGE_SHIFT));
+
        spin_lock(&inode->i_lock);
        if (newsize > i_size_read(inode))
                i_size_write(inode, newsize);
index c8bad73..271e5f9 100644 (file)
@@ -1434,8 +1434,7 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
        status = decode_clone(xdr);
        if (status)
                goto out;
-       status = decode_getfattr(xdr, res->dst_fattr, res->server);
-
+       decode_getfattr(xdr, res->dst_fattr, res->server);
 out:
        res->rpc_status = status;
        return status;
index ecc4594..f63dfa0 100644 (file)
@@ -1998,6 +1998,10 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
                dprintk("%s: exit with error %d for server %s\n",
                                __func__, -EPROTONOSUPPORT, clp->cl_hostname);
                return -EPROTONOSUPPORT;
+       case -ENOSPC:
+               if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
+                       nfs_mark_client_ready(clp, -EIO);
+               return -EIO;
        case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
                                 * in nfs4_exchange_id */
        default:
index 21dac84..b3aee26 100644 (file)
@@ -162,6 +162,7 @@ DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit);
 DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
 DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
 DEFINE_NFS_INODE_EVENT(nfs_access_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_set_cache_invalid);
 
 TRACE_EVENT(nfs_access_exit,
                TP_PROTO(
index 1667a7e..f93e69a 100644 (file)
@@ -52,6 +52,7 @@ config NTFS_DEBUG
 config NTFS_RW
        bool "NTFS write support"
        depends on NTFS_FS
+       depends on PAGE_SIZE_LESS_THAN_64KB
        help
          This enables the partial, but safe, write support in the NTFS driver.
 
index fbc9d81..23523b8 100644 (file)
@@ -1077,21 +1077,18 @@ xfs_attr_node_hasname(
 
        state = xfs_da_state_alloc(args);
        if (statep != NULL)
-               *statep = NULL;
+               *statep = state;
 
        /*
         * Search to see if name exists, and get back a pointer to it.
         */
        error = xfs_da3_node_lookup_int(state, &retval);
-       if (error) {
-               xfs_da_state_free(state);
-               return error;
-       }
+       if (error)
+               retval = error;
 
-       if (statep != NULL)
-               *statep = state;
-       else
+       if (!statep)
                xfs_da_state_free(state);
+
        return retval;
 }
 
@@ -1112,7 +1109,7 @@ xfs_attr_node_addname_find_attr(
         */
        retval = xfs_attr_node_hasname(args, &dac->da_state);
        if (retval != -ENOATTR && retval != -EEXIST)
-               return retval;
+               goto error;
 
        if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE))
                goto error;
@@ -1337,7 +1334,7 @@ int xfs_attr_node_removename_setup(
 
        error = xfs_attr_node_hasname(args, state);
        if (error != -EEXIST)
-               return error;
+               goto out;
        error = 0;
 
        ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL);
index e147200..da4af21 100644 (file)
@@ -289,22 +289,6 @@ xfs_perag_clear_inode_tag(
        trace_xfs_perag_clear_inode_tag(mp, pag->pag_agno, tag, _RET_IP_);
 }
 
-static inline void
-xfs_inew_wait(
-       struct xfs_inode        *ip)
-{
-       wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT);
-       DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
-
-       do {
-               prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
-               if (!xfs_iflags_test(ip, XFS_INEW))
-                       break;
-               schedule();
-       } while (true);
-       finish_wait(wq, &wait.wq_entry);
-}
-
 /*
  * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
  * part of the structure. This is made more complex by the fact we store
@@ -368,18 +352,13 @@ xfs_iget_recycle(
        ASSERT(!rwsem_is_locked(&inode->i_rwsem));
        error = xfs_reinit_inode(mp, inode);
        if (error) {
-               bool    wake;
-
                /*
                 * Re-initializing the inode failed, and we are in deep
                 * trouble.  Try to re-add it to the reclaim list.
                 */
                rcu_read_lock();
                spin_lock(&ip->i_flags_lock);
-               wake = !!__xfs_iflags_test(ip, XFS_INEW);
                ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
-               if (wake)
-                       wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
                ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
                spin_unlock(&ip->i_flags_lock);
                rcu_read_unlock();
index 64b9bf3..6771f35 100644 (file)
@@ -3122,7 +3122,6 @@ xfs_rename(
         * appropriately.
         */
        if (flags & RENAME_WHITEOUT) {
-               ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
                error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
                if (error)
                        return error;
index e635a3d..c447bf0 100644 (file)
@@ -231,8 +231,7 @@ static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip)
 #define XFS_IRECLAIM           (1 << 0) /* started reclaiming this inode */
 #define XFS_ISTALE             (1 << 1) /* inode has been staled */
 #define XFS_IRECLAIMABLE       (1 << 2) /* inode can be reclaimed */
-#define __XFS_INEW_BIT         3        /* inode has just been allocated */
-#define XFS_INEW               (1 << __XFS_INEW_BIT)
+#define XFS_INEW               (1 << 3) /* inode has just been allocated */
 #define XFS_IPRESERVE_DM_FIELDS        (1 << 4) /* has legacy DMAPI fields set */
 #define XFS_ITRUNCATED         (1 << 5) /* truncated down so flush-on-close */
 #define XFS_IDIRTY_RELEASE     (1 << 6) /* dirty release already seen */
@@ -492,7 +491,6 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
        xfs_iflags_clear(ip, XFS_INEW);
        barrier();
        unlock_new_inode(VFS_I(ip));
-       wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
 }
 
 static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
index fedc0df..4f07afa 100644 (file)
@@ -50,13 +50,7 @@ static inline void flush_dcache_page(struct page *page)
 {
 }
 
-static inline void flush_dcache_folio(struct folio *folio) { }
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
-#endif
-
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
-void flush_dcache_folio(struct folio *folio);
 #endif
 
 #ifndef flush_dcache_mmap_lock
index 668d007..b28f879 100644 (file)
@@ -1182,7 +1182,6 @@ int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname,
 
 struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
                                            struct fwnode_handle *child);
-struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode);
 
 struct acpi_probe_entry;
 typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *,
@@ -1287,12 +1286,6 @@ acpi_get_next_subnode(const struct fwnode_handle *fwnode,
        return NULL;
 }
 
-static inline struct fwnode_handle *
-acpi_node_get_parent(const struct fwnode_handle *fwnode)
-{
-       return NULL;
-}
-
 static inline struct fwnode_handle *
 acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
                             struct fwnode_handle *prev)
diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h
new file mode 100644 (file)
index 0000000..fef8b60
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CACHEFLUSH_H
+#define _LINUX_CACHEFLUSH_H
+
+#include <asm/cacheflush.h>
+
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+void flush_dcache_folio(struct folio *folio);
+#endif
+#else
+static inline void flush_dcache_folio(struct folio *folio)
+{
+}
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0
+#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
+
+#endif /* _LINUX_CACHEFLUSH_H */
index 1cb616f..bbf812c 100644 (file)
@@ -2518,7 +2518,6 @@ struct file_system_type {
 #define FS_USERNS_MOUNT                8       /* Can be mounted by userns root */
 #define FS_DISALLOW_NOTIFY_PERM        16      /* Disable fanotify permission events */
 #define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
-#define FS_THP_SUPPORT         8192    /* Remove once all fs converted */
 #define FS_RENAME_DOES_D_MOVE  32768   /* FS will handle d_move() during rename() internally. */
        int (*init_fs_context)(struct fs_context *);
        const struct fs_parameter_spec *parameters;
index 25aff0f..39bb9b4 100644 (file)
@@ -5,12 +5,11 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/cacheflush.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
 
-#include <asm/cacheflush.h>
-
 #include "highmem-internal.h"
 
 /**
@@ -231,10 +230,10 @@ static inline void tag_clear_highpage(struct page *page)
  * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
  * If we pass in a head page, we can zero up to the size of the compound page.
  */
-#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#ifdef CONFIG_HIGHMEM
 void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
                unsigned start2, unsigned end2);
-#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
+#else
 static inline void zero_user_segments(struct page *page,
                unsigned start1, unsigned end1,
                unsigned start2, unsigned end2)
@@ -254,7 +253,7 @@ static inline void zero_user_segments(struct page *page,
        for (i = 0; i < compound_nr(page); i++)
                flush_dcache_page(page + i);
 }
-#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
+#endif
 
 static inline void zero_user_segment(struct page *page,
        unsigned start, unsigned end)
@@ -364,4 +363,42 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len)
        kunmap_local(addr);
 }
 
+/**
+ * folio_zero_segments() - Zero two byte ranges in a folio.
+ * @folio: The folio to write to.
+ * @start1: The first byte to zero.
+ * @xend1: One more than the last byte in the first range.
+ * @start2: The first byte to zero in the second range.
+ * @xend2: One more than the last byte in the second range.
+ */
+static inline void folio_zero_segments(struct folio *folio,
+               size_t start1, size_t xend1, size_t start2, size_t xend2)
+{
+       zero_user_segments(&folio->page, start1, xend1, start2, xend2);
+}
+
+/**
+ * folio_zero_segment() - Zero a byte range in a folio.
+ * @folio: The folio to write to.
+ * @start: The first byte to zero.
+ * @xend: One more than the last byte to zero.
+ */
+static inline void folio_zero_segment(struct folio *folio,
+               size_t start, size_t xend)
+{
+       zero_user_segments(&folio->page, start, xend, 0, 0);
+}
+
+/**
+ * folio_zero_range() - Zero a byte range in a folio.
+ * @folio: The folio to write to.
+ * @start: The first byte to zero.
+ * @length: The number of bytes to zero.
+ */
+static inline void folio_zero_range(struct folio *folio,
+               size_t start, size_t length)
+{
+       zero_user_segments(&folio->page, start, start + length, 0, 0);
+}
+
 #endif /* _LINUX_HIGHMEM_H */
index aee8ff4..f45f133 100644 (file)
@@ -9,7 +9,7 @@
 #define _INTEL_ISH_CLIENT_IF_H_
 
 #include <linux/device.h>
-#include <linux/uuid.h>
+#include <linux/mod_devicetable.h>
 
 struct ishtp_cl_device;
 struct ishtp_device;
@@ -40,7 +40,7 @@ enum cl_state {
 struct ishtp_cl_driver {
        struct device_driver driver;
        const char *name;
-       const guid_t *guid;
+       const struct ishtp_device_id *id;
        int (*probe)(struct ishtp_cl_device *dev);
        void (*remove)(struct ishtp_cl_device *dev);
        int (*reset)(struct ishtp_cl_device *dev);
index e974caf..8c8f7a4 100644 (file)
@@ -153,6 +153,8 @@ struct kretprobe {
        struct kretprobe_holder *rph;
 };
 
+#define KRETPROBE_MAX_DATA_SIZE        4096
+
 struct kretprobe_instance {
        union {
                struct freelist_node freelist;
index 120e5e9..4da8d4a 100644 (file)
@@ -27,9 +27,9 @@ struct kvm_dirty_ring {
        int index;
 };
 
-#if (KVM_DIRTY_LOG_PAGE_OFFSET == 0)
+#ifndef CONFIG_HAVE_KVM_DIRTY_RING
 /*
- * If KVM_DIRTY_LOG_PAGE_OFFSET not defined, kvm_dirty_ring.o should
+ * If CONFIG_HAVE_HVM_DIRTY_RING not defined, kvm_dirty_ring.o should
  * not be included as well, so define these nop functions for the arch.
  */
 static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
@@ -74,7 +74,7 @@ static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
        return true;
 }
 
-#else /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+#else /* CONFIG_HAVE_KVM_DIRTY_RING */
 
 u32 kvm_dirty_ring_get_rsvd_entries(void);
 int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
@@ -98,6 +98,6 @@ struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
 void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
 bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
 
-#endif /* KVM_DIRTY_LOG_PAGE_OFFSET == 0 */
+#endif /* CONFIG_HAVE_KVM_DIRTY_RING */
 
 #endif /* KVM_DIRTY_RING_H */
index c310648..f8ed799 100644 (file)
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <linux/notifier.h>
+#include <linux/hashtable.h>
+#include <linux/interval_tree.h>
+#include <linux/rbtree.h>
+#include <linux/xarray.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -310,7 +314,9 @@ struct kvm_vcpu {
        struct mutex mutex;
        struct kvm_run *run;
 
+#ifndef __KVM_HAVE_ARCH_WQP
        struct rcuwait wait;
+#endif
        struct pid __rcu *pid;
        int sigset_active;
        sigset_t sigset;
@@ -355,11 +361,13 @@ struct kvm_vcpu {
        struct kvm_dirty_ring dirty_ring;
 
        /*
-        * The index of the most recently used memslot by this vCPU. It's ok
-        * if this becomes stale due to memslot changes since we always check
-        * it is a valid slot.
+        * The most recently used memslot by this vCPU and the slots generation
+        * for which it is valid.
+        * No wraparound protection is needed since generations won't overflow in
+        * thousands of years, even assuming 1M memslot operations per second.
         */
-       int last_used_slot;
+       struct kvm_memory_slot *last_used_slot;
+       u64 last_used_slot_gen;
 };
 
 /* must be called with irqs disabled */
@@ -424,7 +432,26 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
  */
 #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)
 
+/*
+ * Since at idle each memslot belongs to two memslot sets it has to contain
+ * two embedded nodes for each data structure that it forms a part of.
+ *
+ * Two memslot sets (one active and one inactive) are necessary so the VM
+ * continues to run on one memslot set while the other is being modified.
+ *
+ * These two memslot sets normally point to the same set of memslots.
+ * They can, however, be desynchronized when performing a memslot management
+ * operation by replacing the memslot to be modified by its copy.
+ * After the operation is complete, both memslot sets once again point to
+ * the same, common set of memslot data.
+ *
+ * The memslots themselves are independent of each other so they can be
+ * individually added or deleted.
+ */
 struct kvm_memory_slot {
+       struct hlist_node id_node[2];
+       struct interval_tree_node hva_node[2];
+       struct rb_node gfn_node[2];
        gfn_t base_gfn;
        unsigned long npages;
        unsigned long *dirty_bitmap;
@@ -435,7 +462,7 @@ struct kvm_memory_slot {
        u16 as_id;
 };
 
-static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot)
+static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot)
 {
        return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
 }
@@ -519,18 +546,21 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
 }
 #endif
 
-/*
- * Note:
- * memslots are not sorted by id anymore, please use id_to_memslot()
- * to get the memslot by its id.
- */
 struct kvm_memslots {
        u64 generation;
-       /* The mapping table from slot id to the index in memslots[]. */
-       short id_to_index[KVM_MEM_SLOTS_NUM];
-       atomic_t last_used_slot;
-       int used_slots;
-       struct kvm_memory_slot memslots[];
+       atomic_long_t last_used_slot;
+       struct rb_root_cached hva_tree;
+       struct rb_root gfn_tree;
+       /*
+        * The mapping table from slot id to memslot.
+        *
+        * 7-bit bucket count matches the size of the old id to index array for
+        * 512 slots, while giving good performance with this slot count.
+        * Higher bucket counts bring only small performance improvements but
+        * always result in higher memory usage (even for lower memslot counts).
+        */
+       DECLARE_HASHTABLE(id_hash, 7);
+       int node_idx;
 };
 
 struct kvm {
@@ -551,8 +581,12 @@ struct kvm {
         */
        struct mutex slots_arch_lock;
        struct mm_struct *mm; /* userspace tied to this vm */
+       unsigned long nr_memslot_pages;
+       /* The two memslot sets - active and inactive (per address space) */
+       struct kvm_memslots __memslots[KVM_ADDRESS_SPACE_NUM][2];
+       /* The current active memslot set for each address space */
        struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
-       struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+       struct xarray vcpu_array;
 
        /* Used to wait for completion of MMU notifiers.  */
        spinlock_t mn_invalidate_lock;
@@ -701,19 +735,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 
        /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu.  */
        smp_rmb();
-       return kvm->vcpus[i];
+       return xa_load(&kvm->vcpu_array, i);
 }
 
-#define kvm_for_each_vcpu(idx, vcpup, kvm) \
-       for (idx = 0; \
-            idx < atomic_read(&kvm->online_vcpus) && \
-            (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
-            idx++)
+#define kvm_for_each_vcpu(idx, vcpup, kvm)                \
+       xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \
+                         (atomic_read(&kvm->online_vcpus) - 1))
 
 static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 {
        struct kvm_vcpu *vcpu = NULL;
-       int i;
+       unsigned long i;
 
        if (id < 0)
                return NULL;
@@ -727,13 +759,12 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
        return NULL;
 }
 
-#define kvm_for_each_memslot(memslot, slots)                           \
-       for (memslot = &slots->memslots[0];                             \
-            memslot < slots->memslots + slots->used_slots; memslot++)  \
-               if (WARN_ON_ONCE(!memslot->npages)) {                   \
-               } else
+static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
+{
+       return vcpu->vcpu_idx;
+}
 
-void kvm_vcpu_destroy(struct kvm_vcpu *vcpu);
+void kvm_destroy_vcpus(struct kvm *kvm);
 
 void vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
@@ -793,21 +824,124 @@ static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
        return __kvm_memslots(vcpu->kvm, as_id);
 }
 
+static inline bool kvm_memslots_empty(struct kvm_memslots *slots)
+{
+       return RB_EMPTY_ROOT(&slots->gfn_tree);
+}
+
+#define kvm_for_each_memslot(memslot, bkt, slots)                            \
+       hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \
+               if (WARN_ON_ONCE(!memslot->npages)) {                         \
+               } else
+
 static inline
 struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
 {
-       int index = slots->id_to_index[id];
        struct kvm_memory_slot *slot;
+       int idx = slots->node_idx;
 
-       if (index < 0)
-               return NULL;
+       hash_for_each_possible(slots->id_hash, slot, id_node[idx], id) {
+               if (slot->id == id)
+                       return slot;
+       }
+
+       return NULL;
+}
+
+/* Iterator used for walking memslots that overlap a gfn range. */
+struct kvm_memslot_iter {
+       struct kvm_memslots *slots;
+       struct rb_node *node;
+       struct kvm_memory_slot *slot;
+};
 
-       slot = &slots->memslots[index];
+static inline void kvm_memslot_iter_next(struct kvm_memslot_iter *iter)
+{
+       iter->node = rb_next(iter->node);
+       if (!iter->node)
+               return;
 
-       WARN_ON(slot->id != id);
-       return slot;
+       iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[iter->slots->node_idx]);
 }
 
+static inline void kvm_memslot_iter_start(struct kvm_memslot_iter *iter,
+                                         struct kvm_memslots *slots,
+                                         gfn_t start)
+{
+       int idx = slots->node_idx;
+       struct rb_node *tmp;
+       struct kvm_memory_slot *slot;
+
+       iter->slots = slots;
+
+       /*
+        * Find the so called "upper bound" of a key - the first node that has
+        * its key strictly greater than the searched one (the start gfn in our case).
+        */
+       iter->node = NULL;
+       for (tmp = slots->gfn_tree.rb_node; tmp; ) {
+               slot = container_of(tmp, struct kvm_memory_slot, gfn_node[idx]);
+               if (start < slot->base_gfn) {
+                       iter->node = tmp;
+                       tmp = tmp->rb_left;
+               } else {
+                       tmp = tmp->rb_right;
+               }
+       }
+
+       /*
+        * Find the slot with the lowest gfn that can possibly intersect with
+        * the range, so we'll ideally have slot start <= range start
+        */
+       if (iter->node) {
+               /*
+                * A NULL previous node means that the very first slot
+                * already has a higher start gfn.
+                * In this case slot start > range start.
+                */
+               tmp = rb_prev(iter->node);
+               if (tmp)
+                       iter->node = tmp;
+       } else {
+               /* a NULL node below means no slots */
+               iter->node = rb_last(&slots->gfn_tree);
+       }
+
+       if (iter->node) {
+               iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[idx]);
+
+               /*
+                * It is possible in the slot start < range start case that the
+                * found slot ends before or at range start (slot end <= range start)
+                * and so it does not overlap the requested range.
+                *
+                * In such non-overlapping case the next slot (if it exists) will
+                * already have slot start > range start, otherwise the logic above
+                * would have found it instead of the current slot.
+                */
+               if (iter->slot->base_gfn + iter->slot->npages <= start)
+                       kvm_memslot_iter_next(iter);
+       }
+}
+
+static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_t end)
+{
+       if (!iter->node)
+               return false;
+
+       /*
+        * If this slot starts beyond or at the end of the range so does
+        * every next one
+        */
+       return iter->slot->base_gfn < end;
+}
+
+/* Iterate over each memslot at least partially intersecting [start, end) range */
+#define kvm_for_each_memslot_in_gfn_range(iter, slots, start, end)     \
+       for (kvm_memslot_iter_start(iter, slots, start);                \
+            kvm_memslot_iter_is_valid(iter, end);                      \
+            kvm_memslot_iter_next(iter))
+
 /*
  * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
  * - create a new memory slot
@@ -833,11 +967,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
-                               struct kvm_memory_slot *memslot,
-                               const struct kvm_userspace_memory_region *mem,
+                               const struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *new,
                                enum kvm_mr_change change);
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-                               const struct kvm_userspace_memory_region *mem,
                                struct kvm_memory_slot *old,
                                const struct kvm_memory_slot *new,
                                enum kvm_mr_change change);
@@ -863,9 +996,9 @@ void kvm_set_page_accessed(struct page *page);
 kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
                      bool *writable);
-kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
-kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
+kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
+kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn);
+kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
                               bool atomic, bool *async, bool write_fault,
                               bool *writable, hva_t *hva);
 
@@ -942,7 +1075,7 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
-void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn);
+void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
@@ -969,7 +1102,8 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 void kvm_sigset_activate(struct kvm_vcpu *vcpu);
 void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
 
-void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_vcpu_halt(struct kvm_vcpu *vcpu);
+bool kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
@@ -1152,6 +1286,20 @@ static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
 #endif
 }
 
+/*
+ * Wake a vCPU if necessary, but don't do any stats/metadata updates.  Returns
+ * true if the vCPU was blocking and was awakened, false otherwise.
+ */
+static inline bool __kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
+{
+       return !!rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
+}
+
+static inline bool kvm_vcpu_is_blocking(struct kvm_vcpu *vcpu)
+{
+       return rcuwait_active(kvm_arch_vcpu_get_wait(vcpu));
+}
+
 #ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED
 /*
  * returns true if the virtual interrupt controller is initialized and
@@ -1205,25 +1353,15 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
 
 /*
- * Returns a pointer to the memslot at slot_index if it contains gfn.
+ * Returns a pointer to the memslot if it contains gfn.
  * Otherwise returns NULL.
  */
 static inline struct kvm_memory_slot *
-try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
+try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 {
-       struct kvm_memory_slot *slot;
-
-       if (slot_index < 0 || slot_index >= slots->used_slots)
+       if (!slot)
                return NULL;
 
-       /*
-        * slot_index can come from vcpu->last_used_slot which is not kept
-        * in sync with userspace-controllable memslot deletion. So use nospec
-        * to prevent the CPU from speculating past the end of memslots[].
-        */
-       slot_index = array_index_nospec(slot_index, slots->used_slots);
-       slot = &slots->memslots[slot_index];
-
        if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages)
                return slot;
        else
@@ -1231,63 +1369,63 @@ try_get_memslot(struct kvm_memslots *slots, int slot_index, gfn_t gfn)
 }
 
 /*
- * Returns a pointer to the memslot that contains gfn and records the index of
- * the slot in index. Otherwise returns NULL.
+ * Returns a pointer to the memslot that contains gfn. Otherwise returns NULL.
  *
- * IMPORTANT: Slots are sorted from highest GFN to lowest GFN!
+ * With "approx" set returns the memslot also when the address falls
+ * in a hole. In that case one of the memslots bordering the hole is
+ * returned.
  */
 static inline struct kvm_memory_slot *
-search_memslots(struct kvm_memslots *slots, gfn_t gfn, int *index)
+search_memslots(struct kvm_memslots *slots, gfn_t gfn, bool approx)
 {
-       int start = 0, end = slots->used_slots;
-       struct kvm_memory_slot *memslots = slots->memslots;
        struct kvm_memory_slot *slot;
-
-       if (unlikely(!slots->used_slots))
-               return NULL;
-
-       while (start < end) {
-               int slot = start + (end - start) / 2;
-
-               if (gfn >= memslots[slot].base_gfn)
-                       end = slot;
-               else
-                       start = slot + 1;
-       }
-
-       slot = try_get_memslot(slots, start, gfn);
-       if (slot) {
-               *index = start;
-               return slot;
+       struct rb_node *node;
+       int idx = slots->node_idx;
+
+       slot = NULL;
+       for (node = slots->gfn_tree.rb_node; node; ) {
+               slot = container_of(node, struct kvm_memory_slot, gfn_node[idx]);
+               if (gfn >= slot->base_gfn) {
+                       if (gfn < slot->base_gfn + slot->npages)
+                               return slot;
+                       node = node->rb_right;
+               } else
+                       node = node->rb_left;
        }
 
-       return NULL;
+       return approx ? slot : NULL;
 }
 
-/*
- * __gfn_to_memslot() and its descendants are here because it is called from
- * non-modular code in arch/powerpc/kvm/book3s_64_vio{,_hv}.c. gfn_to_memslot()
- * itself isn't here as an inline because that would bloat other code too much.
- */
 static inline struct kvm_memory_slot *
-__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
+____gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn, bool approx)
 {
        struct kvm_memory_slot *slot;
-       int slot_index = atomic_read(&slots->last_used_slot);
 
-       slot = try_get_memslot(slots, slot_index, gfn);
+       slot = (struct kvm_memory_slot *)atomic_long_read(&slots->last_used_slot);
+       slot = try_get_memslot(slot, gfn);
        if (slot)
                return slot;
 
-       slot = search_memslots(slots, gfn, &slot_index);
+       slot = search_memslots(slots, gfn, approx);
        if (slot) {
-               atomic_set(&slots->last_used_slot, slot_index);
+               atomic_long_set(&slots->last_used_slot, (unsigned long)slot);
                return slot;
        }
 
        return NULL;
 }
 
+/*
+ * __gfn_to_memslot() and its descendants are here to allow arch code to inline
+ * the lookups in hot paths.  gfn_to_memslot() itself isn't here as an inline
+ * because that would bloat other code too much.
+ */
+static inline struct kvm_memory_slot *
+__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
+{
+       return ____gfn_to_memslot(slots, gfn, false);
+}
+
 static inline unsigned long
 __gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
@@ -1463,7 +1601,8 @@ struct _kvm_stats_desc {
        STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist,        \
                        HALT_POLL_HIST_COUNT),                                 \
        STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist,             \
-                       HALT_POLL_HIST_COUNT)
+                       HALT_POLL_HIST_COUNT),                                 \
+       STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking)
 
 extern struct dentry *kvm_debugfs_dir;
 
index 234eab0..888ef12 100644 (file)
@@ -87,6 +87,7 @@ struct kvm_vcpu_stat_generic {
        u64 halt_poll_success_hist[HALT_POLL_HIST_COUNT];
        u64 halt_poll_fail_hist[HALT_POLL_HIST_COUNT];
        u64 halt_wait_hist[HALT_POLL_HIST_COUNT];
+       u64 blocking;
 };
 
 #define KVM_STATS_NAME_SIZE    48
index 3636df9..fbaab44 100644 (file)
@@ -9698,7 +9698,10 @@ struct mlx5_ifc_mcam_access_reg_bits {
        u8         regs_84_to_68[0x11];
        u8         tracer_registers[0x4];
 
-       u8         regs_63_to_32[0x20];
+       u8         regs_63_to_46[0x12];
+       u8         mrtc[0x1];
+       u8         regs_44_to_32[0xd];
+
        u8         regs_31_to_0[0x20];
 };
 
index ae2e75d..4bb7197 100644 (file)
@@ -895,4 +895,18 @@ struct dfl_device_id {
        kernel_ulong_t driver_data;
 };
 
+/* ISHTP (Integrated Sensor Hub Transport Protocol) */
+
+#define ISHTP_MODULE_PREFIX    "ishtp:"
+
+/**
+ * struct ishtp_device_id - ISHTP device identifier
+ * @guid: GUID of the device.
+ * @driver_data: pointer to driver specific data
+ */
+struct ishtp_device_id {
+       guid_t guid;
+       kernel_ulong_t driver_data;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
index 3ec4249..be5cb33 100644 (file)
@@ -4404,7 +4404,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 {
        spin_lock(&txq->_xmit_lock);
-       txq->xmit_lock_owner = cpu;
+       /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+       WRITE_ONCE(txq->xmit_lock_owner, cpu);
 }
 
 static inline bool __netif_tx_acquire(struct netdev_queue *txq)
@@ -4421,26 +4422,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq)
 static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 {
        spin_lock_bh(&txq->_xmit_lock);
-       txq->xmit_lock_owner = smp_processor_id();
+       /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+       WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
 }
 
 static inline bool __netif_tx_trylock(struct netdev_queue *txq)
 {
        bool ok = spin_trylock(&txq->_xmit_lock);
-       if (likely(ok))
-               txq->xmit_lock_owner = smp_processor_id();
+
+       if (likely(ok)) {
+               /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+               WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
+       }
        return ok;
 }
 
 static inline void __netif_tx_unlock(struct netdev_queue *txq)
 {
-       txq->xmit_lock_owner = -1;
+       /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+       WRITE_ONCE(txq->xmit_lock_owner, -1);
        spin_unlock(&txq->_xmit_lock);
 }
 
 static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
 {
-       txq->xmit_lock_owner = -1;
+       /* Pairs with READ_ONCE() in __dev_queue_xmit() */
+       WRITE_ONCE(txq->xmit_lock_owner, -1);
        spin_unlock_bh(&txq->_xmit_lock);
 }
 
index 52ec4b5..b5f14d5 100644 (file)
@@ -686,13 +686,13 @@ static inline bool test_set_page_writeback(struct page *page)
 
 __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
 
-/* Whether there are one or multiple pages in a folio */
-static inline bool folio_test_single(struct folio *folio)
-{
-       return !folio_test_head(folio);
-}
-
-static inline bool folio_test_multi(struct folio *folio)
+/**
+ * folio_test_large() - Does this folio contain more than one page?
+ * @folio: The folio to test.
+ *
+ * Return: True if the folio is larger than one page.
+ */
+static inline bool folio_test_large(struct folio *folio)
 {
        return folio_test_head(folio);
 }
index 1a0c646..6052464 100644 (file)
@@ -84,7 +84,7 @@ enum mapping_flags {
        AS_EXITING      = 4,    /* final truncate in progress */
        /* writeback related tags are not used */
        AS_NO_WRITEBACK_TAGS = 5,
-       AS_THP_SUPPORT = 6,     /* THPs supported */
+       AS_LARGE_FOLIO_SUPPORT = 6,
 };
 
 /**
@@ -176,9 +176,25 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
        m->gfp_mask = mask;
 }
 
-static inline bool mapping_thp_support(struct address_space *mapping)
+/**
+ * mapping_set_large_folios() - Indicate the file supports large folios.
+ * @mapping: The file.
+ *
+ * The filesystem should call this function in its inode constructor to
+ * indicate that the VFS can use large folios to cache the contents of
+ * the file.
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_set_large_folios(struct address_space *mapping)
+{
+       __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+}
+
+static inline bool mapping_large_folio_support(struct address_space *mapping)
 {
-       return test_bit(AS_THP_SUPPORT, &mapping->flags);
+       return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 }
 
 static inline int filemap_nr_thps(struct address_space *mapping)
@@ -193,7 +209,7 @@ static inline int filemap_nr_thps(struct address_space *mapping)
 static inline void filemap_nr_thps_inc(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
-       if (!mapping_thp_support(mapping))
+       if (!mapping_large_folio_support(mapping))
                atomic_inc(&mapping->nr_thps);
 #else
        WARN_ON_ONCE(1);
@@ -203,7 +219,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping)
 static inline void filemap_nr_thps_dec(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
-       if (!mapping_thp_support(mapping))
+       if (!mapping_large_folio_support(mapping))
                atomic_dec(&mapping->nr_thps);
 #else
        WARN_ON_ONCE(1);
index ae04968..9afd34a 100644 (file)
@@ -37,6 +37,7 @@
 #define PTP_MSGTYPE_PDELAY_RESP 0x3
 
 #define PTP_EV_PORT 319
+#define PTP_GEN_PORT 320
 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */
 
 #define OFF_PTP_SOURCE_UUID    22 /* PTPv1 only */
index 6c9f19a..ce3c582 100644 (file)
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void task_cputime(struct task_struct *t,
+extern bool task_cputime(struct task_struct *t,
                         u64 *utime, u64 *stime);
 extern u64 task_gtime(struct task_struct *t);
 #else
-static inline void task_cputime(struct task_struct *t,
+static inline bool task_cputime(struct task_struct *t,
                                u64 *utime, u64 *stime)
 {
        *utime = t->utime;
        *stime = t->stime;
+       return false;
 }
 
 static inline u64 task_gtime(struct task_struct *t)
index bf21591..0cda618 100644 (file)
@@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key)
 }
 
 u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
-#endif
 
 u64 siphash_1u64(const u64 a, const siphash_key_t *key);
 u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
@@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
 static inline u64 siphash(const void *data, size_t len,
                          const siphash_key_t *key)
 {
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-       if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+       if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+           !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
                return __siphash_unaligned(data, len, key);
-#endif
        return ___siphash_aligned(data, len, key);
 }
 
@@ -96,10 +93,8 @@ typedef struct {
 
 u32 __hsiphash_aligned(const void *data, size_t len,
                       const hsiphash_key_t *key);
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
                         const hsiphash_key_t *key);
-#endif
 
 u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
 u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
@@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
 static inline u32 hsiphash(const void *data, size_t len,
                           const hsiphash_key_t *key)
 {
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-       if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+       if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+           !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
                return __hsiphash_unaligned(data, len, key);
-#endif
        return ___hsiphash_aligned(data, len, key);
 }
 
index 44d0e09..41edbc0 100644 (file)
@@ -152,7 +152,6 @@ size_t virtio_max_dma_size(struct virtio_device *vdev);
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
- * @suppress_used_validation: set to not have core validate used length
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @scan: optional function to call after successful probe; intended
  *    for virtio-scsi to invoke a scan.
@@ -169,7 +168,6 @@ struct virtio_driver {
        unsigned int feature_table_size;
        const unsigned int *feature_table_legacy;
        unsigned int feature_table_size_legacy;
-       bool suppress_used_validation;
        int (*validate)(struct virtio_device *dev);
        int (*probe)(struct virtio_device *dev);
        void (*scan)(struct virtio_device *dev);
index 4202c60..7994455 100644 (file)
@@ -133,7 +133,7 @@ static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
        if (unlikely(READ_ONCE(sk->sk_napi_id) != skb->napi_id))
                WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
 #endif
-       sk_rx_queue_set(sk, skb);
+       sk_rx_queue_update(sk, skb);
 }
 
 static inline void __sk_mark_napi_id_once(struct sock *sk, unsigned int napi_id)
index 6763467..df6622a 100644 (file)
@@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache)
        dst_cache->reset_ts = jiffies;
 }
 
+/**
+ *     dst_cache_reset_now - invalidate the cache contents immediately
+ *     @dst_cache: the cache
+ *
+ *     The caller must be sure there are no concurrent users, as this frees
+ *     all dst_cache users immediately, rather than waiting for the next
+ *     per-cpu usage like dst_cache_reset does. Most callers should use the
+ *     higher speed lazily-freed dst_cache_reset function instead.
+ */
+void dst_cache_reset_now(struct dst_cache *dst_cache);
+
 /**
  *     dst_cache_init - initialize the cache, allocating the required storage
  *     @dst_cache: the cache
index 4b10676..bd07484 100644 (file)
@@ -69,7 +69,7 @@ struct fib_rules_ops {
        int                     (*action)(struct fib_rule *,
                                          struct flowi *, int,
                                          struct fib_lookup_arg *);
-       bool                    (*suppress)(struct fib_rule *,
+       bool                    (*suppress)(struct fib_rule *, int,
                                            struct fib_lookup_arg *);
        int                     (*match)(struct fib_rule *,
                                         struct flowi *, int);
@@ -218,7 +218,9 @@ INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule,
                            struct fib_lookup_arg *arg));
 
 INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule,
+                                               int flags,
                                                struct fib_lookup_arg *arg));
 INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule,
+                                               int flags,
                                                struct fib_lookup_arg *arg));
 #endif
index c412dde..83b8070 100644 (file)
@@ -485,6 +485,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
                 struct fib6_config *cfg, gfp_t gfp_flags,
                 struct netlink_ext_ack *extack);
 void fib6_nh_release(struct fib6_nh *fib6_nh);
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh);
 
 int call_fib6_entry_notifiers(struct net *net,
                              enum fib_event_type event_type,
index ab5348e..3417ba2 100644 (file)
@@ -438,7 +438,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 #ifdef CONFIG_IP_ROUTE_CLASSID
 static inline int fib_num_tclassid_users(struct net *net)
 {
-       return net->ipv4.fib_num_tclassid_users;
+       return atomic_read(&net->ipv4.fib_num_tclassid_users);
 }
 #else
 static inline int fib_num_tclassid_users(struct net *net)
index afbce90..45e0339 100644 (file)
@@ -47,6 +47,7 @@ struct ipv6_stub {
                            struct fib6_config *cfg, gfp_t gfp_flags,
                            struct netlink_ext_ack *extack);
        void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
+       void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh);
        void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
        int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify);
        void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
index 2f65701..6c5b2ef 100644 (file)
@@ -65,7 +65,7 @@ struct netns_ipv4 {
        bool                    fib_has_custom_local_routes;
        bool                    fib_offload_disabled;
 #ifdef CONFIG_IP_ROUTE_CLASSID
-       int                     fib_num_tclassid_users;
+       atomic_t                fib_num_tclassid_users;
 #endif
        struct hlist_head       *fib_table_hash;
        struct sock             *fibnl;
index ddcee12..145acb8 100644 (file)
@@ -19,6 +19,8 @@
  *
  */
 
+#include <linux/types.h>
+
 #define NL802154_GENL_NAME "nl802154"
 
 enum nl802154_commands {
@@ -150,10 +152,9 @@ enum nl802154_attrs {
 };
 
 enum nl802154_iftype {
-       /* for backwards compatibility TODO */
-       NL802154_IFTYPE_UNSPEC = -1,
+       NL802154_IFTYPE_UNSPEC = (~(__u32)0),
 
-       NL802154_IFTYPE_NODE,
+       NL802154_IFTYPE_NODE = 0,
        NL802154_IFTYPE_MONITOR,
        NL802154_IFTYPE_COORD,
 
index b32906e..bea21ff 100644 (file)
@@ -1913,18 +1913,31 @@ static inline int sk_tx_queue_get(const struct sock *sk)
        return -1;
 }
 
-static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+static inline void __sk_rx_queue_set(struct sock *sk,
+                                    const struct sk_buff *skb,
+                                    bool force_set)
 {
 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
        if (skb_rx_queue_recorded(skb)) {
                u16 rx_queue = skb_get_rx_queue(skb);
 
-               if (unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue))
+               if (force_set ||
+                   unlikely(READ_ONCE(sk->sk_rx_queue_mapping) != rx_queue))
                        WRITE_ONCE(sk->sk_rx_queue_mapping, rx_queue);
        }
 #endif
 }
 
+static inline void sk_rx_queue_set(struct sock *sk, const struct sk_buff *skb)
+{
+       __sk_rx_queue_set(sk, skb, true);
+}
+
+static inline void sk_rx_queue_update(struct sock *sk, const struct sk_buff *skb)
+{
+       __sk_rx_queue_set(sk, skb, false);
+}
+
 static inline void sk_rx_queue_clear(struct sock *sk)
 {
 #ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
@@ -2430,19 +2443,22 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
  * @sk: socket
  *
  * Use the per task page_frag instead of the per socket one for
- * optimization when we know that we're in the normal context and owns
+ * optimization when we know that we're in process context and own
  * everything that's associated with %current.
  *
- * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
- * inside other socket operations and end up recursing into sk_page_frag()
- * while it's already in use.
+ * Both direct reclaim and page faults can nest inside other
+ * socket operations and end up recursing into sk_page_frag()
+ * while it's already in use: explicitly avoid task page_frag
+ * usage if the caller is potentially doing any of them.
+ * This assumes that page fault handlers use the GFP_NOFS flags.
  *
  * Return: a per task page_frag if context allows that,
  * otherwise a per socket one.
  */
 static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-       if (gfpflags_normal_context(sk->sk_allocation))
+       if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) ==
+           (__GFP_DIRECT_RECLAIM | __GFP_FS))
                return &current->task_frag;
 
        return &sk->sk_frag;
index eeb1142..4d1dfa1 100644 (file)
@@ -703,6 +703,8 @@ int ocelot_vcap_filter_add(struct ocelot *ocelot,
                           struct netlink_ext_ack *extack);
 int ocelot_vcap_filter_del(struct ocelot *ocelot,
                           struct ocelot_vcap_filter *rule);
+int ocelot_vcap_filter_replace(struct ocelot *ocelot,
+                              struct ocelot_vcap_filter *filter);
 struct ocelot_vcap_filter *
 ocelot_vcap_block_find_filter_by_id(struct ocelot_vcap_block *block,
                                    unsigned long cookie, bool tc_offload);
index 31f4c4f..ac0893d 100644 (file)
@@ -147,7 +147,7 @@ struct snd_soc_acpi_link_adr {
  */
 /* Descriptor for SST ASoC machine driver */
 struct snd_soc_acpi_mach {
-       const u8 id[ACPI_ID_LEN];
+       u8 id[ACPI_ID_LEN];
        const struct snd_soc_acpi_codecs *comp_ids;
        const u32 link_mask;
        const struct snd_soc_acpi_link_adr *links;
index 3ba6331..c9048f3 100644 (file)
@@ -8,7 +8,7 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM rpcgss
 
-#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#if !defined(_TRACE_RPCGSS_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_RPCGSS_H
 
 #include <linux/tracepoint.h>
index a13e20c..0512fde 100644 (file)
@@ -196,6 +196,13 @@ struct drm_virtgpu_context_init {
        __u64 ctx_set_params;
 };
 
+/*
+ * Event code that's given when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is in
+ * effect.  The event size is sizeof(drm_event), since there is no additional
+ * payload.
+ */
+#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000
+
 #define DRM_IOCTL_VIRTGPU_MAP \
        DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map)
 
index 5da4ee2..c0c2f3e 100644 (file)
 #define ETH_P_IFE      0xED3E          /* ForCES inter-FE LFB type */
 #define ETH_P_AF_IUCV   0xFBFB         /* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
 
-#define ETH_P_802_3_MIN        0x0600          /* If the value in the ethernet type is less than this value
+#define ETH_P_802_3_MIN        0x0600          /* If the value in the ethernet type is more than this value
                                         * then the frame is Ethernet II. Else it is 802.3 */
 
 /*
index b94074c..b13eb86 100644 (file)
@@ -112,6 +112,7 @@ struct xenbus_driver {
        const char *name;       /* defaults to ids[0].devicetype */
        const struct xenbus_device_id *ids;
        bool allow_rebind; /* avoid setting xenstore closed during remove */
+       bool not_essential;     /* is not mandatory for boot progress */
        int (*probe)(struct xenbus_device *dev,
                     const struct xenbus_device_id *id);
        void (*otherend_changed)(struct xenbus_device *dev,
index 192e43a..407a256 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/smpboot.h>
 #include <linux/relay.h>
 #include <linux/slab.h>
+#include <linux/scs.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/cpuset.h>
 
@@ -587,6 +588,12 @@ static int bringup_cpu(unsigned int cpu)
        struct task_struct *idle = idle_thread_get(cpu);
        int ret;
 
+       /*
+        * Reset stale stack state from the last time this CPU was online.
+        */
+       scs_task_reset(idle);
+       kasan_unpoison_task_stack(idle);
+
        /*
         * Some architectures have to walk the irq descriptors to
         * setup the vector space for the cpu which comes online.
index 523106a..30d94f6 100644 (file)
@@ -9759,6 +9759,9 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
                                continue;
                        if (event->attr.config != entry->type)
                                continue;
+                       /* Cannot deliver synchronous signal to other task. */
+                       if (event->attr.sigtrap)
+                               continue;
                        if (perf_tp_event_match(event, &data, regs))
                                perf_swevent_event(event, count, &data, regs);
                }
index e9db0c8..21eccc9 100644 (file)
@@ -2086,6 +2086,9 @@ int register_kretprobe(struct kretprobe *rp)
                }
        }
 
+       if (rp->data_size > KRETPROBE_MAX_DATA_SIZE)
+               return -E2BIG;
+
        rp->kp.pre_handler = pre_handler_kretprobe;
        rp->kp.post_handler = NULL;
 
index c51387a..04a74d0 100644 (file)
  * atomic_long_cmpxchg() will be used to obtain writer lock.
  *
  * There are three places where the lock handoff bit may be set or cleared.
- * 1) rwsem_mark_wake() for readers.
- * 2) rwsem_try_write_lock() for writers.
- * 3) Error path of rwsem_down_write_slowpath().
+ * 1) rwsem_mark_wake() for readers            -- set, clear
+ * 2) rwsem_try_write_lock() for writers       -- set, clear
+ * 3) rwsem_del_waiter()                       -- clear
  *
  * For all the above cases, wait_lock will be held. A writer must also
  * be the first one in the wait_list to be eligible for setting the handoff
@@ -334,6 +334,9 @@ struct rwsem_waiter {
        struct task_struct *task;
        enum rwsem_waiter_type type;
        unsigned long timeout;
+
+       /* Writer only, not initialized in reader */
+       bool handoff_set;
 };
 #define rwsem_first_waiter(sem) \
        list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
@@ -344,12 +347,6 @@ enum rwsem_wake_type {
        RWSEM_WAKE_READ_OWNED   /* Waker thread holds the read lock */
 };
 
-enum writer_wait_state {
-       WRITER_NOT_FIRST,       /* Writer is not first in wait list */
-       WRITER_FIRST,           /* Writer is first in wait list     */
-       WRITER_HANDOFF          /* Writer is first & handoff needed */
-};
-
 /*
  * The typical HZ value is either 250 or 1000. So set the minimum waiting
  * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
@@ -365,6 +362,31 @@ enum writer_wait_state {
  */
 #define MAX_READERS_WAKEUP     0x100
 
+static inline void
+rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+{
+       lockdep_assert_held(&sem->wait_lock);
+       list_add_tail(&waiter->list, &sem->wait_list);
+       /* caller will set RWSEM_FLAG_WAITERS */
+}
+
+/*
+ * Remove a waiter from the wait_list and clear flags.
+ *
+ * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
+ * this function. Modify with care.
+ */
+static inline void
+rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
+{
+       lockdep_assert_held(&sem->wait_lock);
+       list_del(&waiter->list);
+       if (likely(!list_empty(&sem->wait_list)))
+               return;
+
+       atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
+}
+
 /*
  * handle the lock release when processes blocked on it that can now run
  * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
@@ -376,6 +398,8 @@ enum writer_wait_state {
  *   preferably when the wait_lock is released
  * - woken process blocks are discarded from the list after having task zeroed
  * - writers are only marked woken if downgrading is false
+ *
+ * Implies rwsem_del_waiter() for all woken readers.
  */
 static void rwsem_mark_wake(struct rw_semaphore *sem,
                            enum rwsem_wake_type wake_type,
@@ -490,18 +514,25 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
 
        adjustment = woken * RWSEM_READER_BIAS - adjustment;
        lockevent_cond_inc(rwsem_wake_reader, woken);
+
+       oldcount = atomic_long_read(&sem->count);
        if (list_empty(&sem->wait_list)) {
-               /* hit end of list above */
+               /*
+                * Combined with list_move_tail() above, this implies
+                * rwsem_del_waiter().
+                */
                adjustment -= RWSEM_FLAG_WAITERS;
+               if (oldcount & RWSEM_FLAG_HANDOFF)
+                       adjustment -= RWSEM_FLAG_HANDOFF;
+       } else if (woken) {
+               /*
+                * When we've woken a reader, we no longer need to force
+                * writers to give up the lock and we can clear HANDOFF.
+                */
+               if (oldcount & RWSEM_FLAG_HANDOFF)
+                       adjustment -= RWSEM_FLAG_HANDOFF;
        }
 
-       /*
-        * When we've woken a reader, we no longer need to force writers
-        * to give up the lock and we can clear HANDOFF.
-        */
-       if (woken && (atomic_long_read(&sem->count) & RWSEM_FLAG_HANDOFF))
-               adjustment -= RWSEM_FLAG_HANDOFF;
-
        if (adjustment)
                atomic_long_add(adjustment, &sem->count);
 
@@ -532,12 +563,12 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
  * race conditions between checking the rwsem wait list and setting the
  * sem->count accordingly.
  *
- * If wstate is WRITER_HANDOFF, it will make sure that either the handoff
- * bit is set or the lock is acquired with handoff bit cleared.
+ * Implies rwsem_del_waiter() on success.
  */
 static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
-                                       enum writer_wait_state wstate)
+                                       struct rwsem_waiter *waiter)
 {
+       bool first = rwsem_first_waiter(sem) == waiter;
        long count, new;
 
        lockdep_assert_held(&sem->wait_lock);
@@ -546,13 +577,19 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
        do {
                bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
 
-               if (has_handoff && wstate == WRITER_NOT_FIRST)
-                       return false;
+               if (has_handoff) {
+                       if (!first)
+                               return false;
+
+                       /* First waiter inherits a previously set handoff bit */
+                       waiter->handoff_set = true;
+               }
 
                new = count;
 
                if (count & RWSEM_LOCK_MASK) {
-                       if (has_handoff || (wstate != WRITER_HANDOFF))
+                       if (has_handoff || (!rt_task(waiter->task) &&
+                                           !time_after(jiffies, waiter->timeout)))
                                return false;
 
                        new |= RWSEM_FLAG_HANDOFF;
@@ -569,9 +606,17 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
         * We have either acquired the lock with handoff bit cleared or
         * set the handoff bit.
         */
-       if (new & RWSEM_FLAG_HANDOFF)
+       if (new & RWSEM_FLAG_HANDOFF) {
+               waiter->handoff_set = true;
+               lockevent_inc(rwsem_wlock_handoff);
                return false;
+       }
 
+       /*
+        * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
+        * success.
+        */
+       list_del(&waiter->list);
        rwsem_set_owner(sem);
        return true;
 }
@@ -956,7 +1001,7 @@ queue:
                }
                adjustment += RWSEM_FLAG_WAITERS;
        }
-       list_add_tail(&waiter.list, &sem->wait_list);
+       rwsem_add_waiter(sem, &waiter);
 
        /* we're now waiting on the lock, but no longer actively locking */
        count = atomic_long_add_return(adjustment, &sem->count);
@@ -1002,11 +1047,7 @@ queue:
        return sem;
 
 out_nolock:
-       list_del(&waiter.list);
-       if (list_empty(&sem->wait_list)) {
-               atomic_long_andnot(RWSEM_FLAG_WAITERS|RWSEM_FLAG_HANDOFF,
-                                  &sem->count);
-       }
+       rwsem_del_waiter(sem, &waiter);
        raw_spin_unlock_irq(&sem->wait_lock);
        __set_current_state(TASK_RUNNING);
        lockevent_inc(rwsem_rlock_fail);
@@ -1020,9 +1061,7 @@ static struct rw_semaphore *
 rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 {
        long count;
-       enum writer_wait_state wstate;
        struct rwsem_waiter waiter;
-       struct rw_semaphore *ret = sem;
        DEFINE_WAKE_Q(wake_q);
 
        /* do optimistic spinning and steal lock if possible */
@@ -1038,16 +1077,13 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
        waiter.task = current;
        waiter.type = RWSEM_WAITING_FOR_WRITE;
        waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+       waiter.handoff_set = false;
 
        raw_spin_lock_irq(&sem->wait_lock);
-
-       /* account for this before adding a new element to the list */
-       wstate = list_empty(&sem->wait_list) ? WRITER_FIRST : WRITER_NOT_FIRST;
-
-       list_add_tail(&waiter.list, &sem->wait_list);
+       rwsem_add_waiter(sem, &waiter);
 
        /* we're now waiting on the lock */
-       if (wstate == WRITER_NOT_FIRST) {
+       if (rwsem_first_waiter(sem) != &waiter) {
                count = atomic_long_read(&sem->count);
 
                /*
@@ -1083,13 +1119,16 @@ wait:
        /* wait until we successfully acquire the lock */
        set_current_state(state);
        for (;;) {
-               if (rwsem_try_write_lock(sem, wstate)) {
+               if (rwsem_try_write_lock(sem, &waiter)) {
                        /* rwsem_try_write_lock() implies ACQUIRE on success */
                        break;
                }
 
                raw_spin_unlock_irq(&sem->wait_lock);
 
+               if (signal_pending_state(state, current))
+                       goto out_nolock;
+
                /*
                 * After setting the handoff bit and failing to acquire
                 * the lock, attempt to spin on owner to accelerate lock
@@ -1098,7 +1137,7 @@ wait:
                 * In this case, we attempt to acquire the lock again
                 * without sleeping.
                 */
-               if (wstate == WRITER_HANDOFF) {
+               if (waiter.handoff_set) {
                        enum owner_state owner_state;
 
                        preempt_disable();
@@ -1109,66 +1148,26 @@ wait:
                                goto trylock_again;
                }
 
-               /* Block until there are no active lockers. */
-               for (;;) {
-                       if (signal_pending_state(state, current))
-                               goto out_nolock;
-
-                       schedule();
-                       lockevent_inc(rwsem_sleep_writer);
-                       set_current_state(state);
-                       /*
-                        * If HANDOFF bit is set, unconditionally do
-                        * a trylock.
-                        */
-                       if (wstate == WRITER_HANDOFF)
-                               break;
-
-                       if ((wstate == WRITER_NOT_FIRST) &&
-                           (rwsem_first_waiter(sem) == &waiter))
-                               wstate = WRITER_FIRST;
-
-                       count = atomic_long_read(&sem->count);
-                       if (!(count & RWSEM_LOCK_MASK))
-                               break;
-
-                       /*
-                        * The setting of the handoff bit is deferred
-                        * until rwsem_try_write_lock() is called.
-                        */
-                       if ((wstate == WRITER_FIRST) && (rt_task(current) ||
-                           time_after(jiffies, waiter.timeout))) {
-                               wstate = WRITER_HANDOFF;
-                               lockevent_inc(rwsem_wlock_handoff);
-                               break;
-                       }
-               }
+               schedule();
+               lockevent_inc(rwsem_sleep_writer);
+               set_current_state(state);
 trylock_again:
                raw_spin_lock_irq(&sem->wait_lock);
        }
        __set_current_state(TASK_RUNNING);
-       list_del(&waiter.list);
        raw_spin_unlock_irq(&sem->wait_lock);
        lockevent_inc(rwsem_wlock);
-
-       return ret;
+       return sem;
 
 out_nolock:
        __set_current_state(TASK_RUNNING);
        raw_spin_lock_irq(&sem->wait_lock);
-       list_del(&waiter.list);
-
-       if (unlikely(wstate == WRITER_HANDOFF))
-               atomic_long_add(-RWSEM_FLAG_HANDOFF,  &sem->count);
-
-       if (list_empty(&sem->wait_list))
-               atomic_long_andnot(RWSEM_FLAG_WAITERS, &sem->count);
-       else
+       rwsem_del_waiter(sem, &waiter);
+       if (!list_empty(&sem->wait_list))
                rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
        raw_spin_unlock_irq(&sem->wait_lock);
        wake_up_q(&wake_q);
        lockevent_inc(rwsem_wlock_fail);
-
        return ERR_PTR(-EINTR);
 }
 
@@ -1249,17 +1248,14 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 
        DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
 
-       /*
-        * Optimize for the case when the rwsem is not locked at all.
-        */
-       tmp = RWSEM_UNLOCKED_VALUE;
-       do {
+       tmp = atomic_long_read(&sem->count);
+       while (!(tmp & RWSEM_READ_FAILED_MASK)) {
                if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
-                                       tmp + RWSEM_READER_BIAS)) {
+                                                   tmp + RWSEM_READER_BIAS)) {
                        rwsem_set_reader_owned(sem);
                        return 1;
                }
-       } while (!(tmp & RWSEM_READ_FAILED_MASK));
+       }
        return 0;
 }
 
index 9ed9b74..e6af502 100644 (file)
@@ -693,7 +693,7 @@ static int load_image_and_restore(void)
                goto Unlock;
 
        error = swsusp_read(&flags);
-       swsusp_close(FMODE_READ);
+       swsusp_close(FMODE_READ | FMODE_EXCL);
        if (!error)
                error = hibernation_restore(flags & SF_PLATFORM_MODE);
 
@@ -983,7 +983,7 @@ static int software_resume(void)
        /* The snapshot device should not be opened while we're running */
        if (!hibernate_acquire()) {
                error = -EBUSY;
-               swsusp_close(FMODE_READ);
+               swsusp_close(FMODE_READ | FMODE_EXCL);
                goto Unlock;
        }
 
@@ -1018,7 +1018,7 @@ static int software_resume(void)
        pm_pr_dbg("Hibernation image not present or could not be loaded.\n");
        return error;
  Close_Finish:
-       swsusp_close(FMODE_READ);
+       swsusp_close(FMODE_READ | FMODE_EXCL);
        goto Finish;
 }
 
index 740723b..ad241b4 100644 (file)
@@ -177,7 +177,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
                if (res <= 0)
                        goto unlock;
        } else {
-               res = PAGE_SIZE - pg_offp;
+               res = PAGE_SIZE;
        }
 
        if (!data_of(data->handle)) {
index 3c9b0fd..7756310 100644 (file)
@@ -1918,7 +1918,7 @@ static void __init init_uclamp_rq(struct rq *rq)
                };
        }
 
-       rq->uclamp_flags = 0;
+       rq->uclamp_flags = UCLAMP_FLAG_IDLE;
 }
 
 static void __init init_uclamp(void)
@@ -6617,11 +6617,11 @@ static int __init setup_preempt_mode(char *str)
        int mode = sched_dynamic_mode(str);
        if (mode < 0) {
                pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
-               return 1;
+               return 0;
        }
 
        sched_dynamic_update(mode);
-       return 0;
+       return 1;
 }
 __setup("preempt=", setup_preempt_mode);
 
@@ -8619,9 +8619,6 @@ void __init init_idle(struct task_struct *idle, int cpu)
        idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY;
        kthread_set_per_cpu(idle, cpu);
 
-       scs_task_reset(idle);
-       kasan_unpoison_task_stack(idle);
-
 #ifdef CONFIG_SMP
        /*
         * It's possible that init_idle() gets called multiple times on a task,
@@ -8777,7 +8774,6 @@ void idle_task_exit(void)
                finish_arch_post_lock_switch();
        }
 
-       scs_task_reset(current);
        /* finish_cpu(), as ran on the BP, will clean up the active_mm state */
 }
 
index 872e481..9392aea 100644 (file)
@@ -615,7 +615,8 @@ void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
                .sum_exec_runtime = p->se.sum_exec_runtime,
        };
 
-       task_cputime(p, &cputime.utime, &cputime.stime);
+       if (task_cputime(p, &cputime.utime, &cputime.stime))
+               cputime.sum_exec_runtime = task_sched_runtime(p);
        cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 EXPORT_SYMBOL_GPL(task_cputime_adjusted);
@@ -828,19 +829,21 @@ u64 task_gtime(struct task_struct *t)
  * add up the pending nohz execution time since the last
  * cputime snapshot.
  */
-void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
+bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 {
        struct vtime *vtime = &t->vtime;
        unsigned int seq;
        u64 delta;
+       int ret;
 
        if (!vtime_accounting_enabled()) {
                *utime = t->utime;
                *stime = t->stime;
-               return;
+               return false;
        }
 
        do {
+               ret = false;
                seq = read_seqcount_begin(&vtime->seqcount);
 
                *utime = t->utime;
@@ -850,6 +853,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
                if (vtime->state < VTIME_SYS)
                        continue;
 
+               ret = true;
                delta = vtime_delta(vtime);
 
                /*
@@ -861,6 +865,8 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
                else
                        *utime += vtime->utime + delta;
        } while (read_seqcount_retry(&vtime->seqcount, seq));
+
+       return ret;
 }
 
 static int vtime_state_fetch(struct vtime *vtime, int cpu)
index 322b65d..41f4709 100644 (file)
@@ -595,7 +595,8 @@ void irq_enter_rcu(void)
 {
        __irq_enter_raw();
 
-       if (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET))
+       if (tick_nohz_full_cpu(smp_processor_id()) ||
+           (is_idle_task(current) && (irq_count() == HARDIRQ_OFFSET)))
                tick_irq_enter();
 
        account_hardirq_enter(current);
index 6bffe5a..17a283c 100644 (file)
@@ -1375,6 +1375,13 @@ static inline void tick_nohz_irq_enter(void)
        now = ktime_get();
        if (ts->idle_active)
                tick_nohz_stop_idle(ts, now);
+       /*
+        * If all CPUs are idle. We may need to update a stale jiffies value.
+        * Note nohz_full is a special case: a timekeeper is guaranteed to stay
+        * alive but it might be busy looping with interrupts disabled in some
+        * rare case (typically stop machine). So we must make sure we have a
+        * last resort.
+        */
        if (ts->tick_stopped)
                tick_nohz_update_jiffies(now);
 }
index 6b60ab9..38715aa 100644 (file)
@@ -1366,14 +1366,26 @@ __event_trigger_test_discard(struct trace_event_file *file,
        if (eflags & EVENT_FILE_FL_TRIGGER_COND)
                *tt = event_triggers_call(file, buffer, entry, event);
 
-       if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
-           (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
-            !filter_match_preds(file->filter, entry))) {
-               __trace_event_discard_commit(buffer, event);
-               return true;
-       }
+       if (likely(!(file->flags & (EVENT_FILE_FL_SOFT_DISABLED |
+                                   EVENT_FILE_FL_FILTERED |
+                                   EVENT_FILE_FL_PID_FILTER))))
+               return false;
+
+       if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
+               goto discard;
+
+       if (file->flags & EVENT_FILE_FL_FILTERED &&
+           !filter_match_preds(file->filter, entry))
+               goto discard;
+
+       if ((file->flags & EVENT_FILE_FL_PID_FILTER) &&
+           trace_event_ignore_this_pid(file))
+               goto discard;
 
        return false;
+ discard:
+       __trace_event_discard_commit(buffer, event);
+       return true;
 }
 
 /**
index 4021b9a..92be9cb 100644 (file)
@@ -2678,12 +2678,24 @@ static struct trace_event_file *
 trace_create_new_event(struct trace_event_call *call,
                       struct trace_array *tr)
 {
+       struct trace_pid_list *no_pid_list;
+       struct trace_pid_list *pid_list;
        struct trace_event_file *file;
+       unsigned int first;
 
        file = kmem_cache_alloc(file_cachep, GFP_TRACE);
        if (!file)
                return NULL;
 
+       pid_list = rcu_dereference_protected(tr->filtered_pids,
+                                            lockdep_is_held(&event_mutex));
+       no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
+                                            lockdep_is_held(&event_mutex));
+
+       if (!trace_pid_list_first(pid_list, &first) ||
+           !trace_pid_list_first(no_pid_list, &first))
+               file->flags |= EVENT_FILE_FL_PID_FILTER;
+
        file->event_call = call;
        file->tr = tr;
        atomic_set(&file->sm_ref, 0);
index 9555b8e..319f9c8 100644 (file)
@@ -3757,7 +3757,7 @@ static int check_synth_field(struct synth_event *event,
 
        if (strcmp(field->type, hist_field->type) != 0) {
                if (field->size != hist_field->size ||
-                   field->is_signed != hist_field->is_signed)
+                   (!field->is_string && field->is_signed != hist_field->is_signed))
                        return -EINVAL;
        }
 
index 0a5c0db..f5f0039 100644 (file)
@@ -1313,6 +1313,7 @@ static int uprobe_perf_open(struct trace_event_call *call,
                return 0;
 
        list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+               tu = container_of(pos, struct trace_uprobe, tp);
                err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
                if (err) {
                        uprobe_perf_close(call, event);
index 39bb56d..9628b55 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/jhash.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
+#include <linux/kmemleak.h>
 
 #include "tracing_map.h"
 #include "trace.h"
@@ -307,6 +308,7 @@ static void tracing_map_array_free(struct tracing_map_array *a)
        for (i = 0; i < a->n_pages; i++) {
                if (!a->pages[i])
                        break;
+               kmemleak_free(a->pages[i]);
                free_page((unsigned long)a->pages[i]);
        }
 
@@ -342,6 +344,7 @@ static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts,
                a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
                if (!a->pages[i])
                        goto free;
+               kmemleak_alloc(a->pages[i], PAGE_SIZE, 1, GFP_KERNEL);
        }
  out:
        return a;
index 9ef7ce1..5c12bde 100644 (file)
@@ -346,8 +346,9 @@ config FRAME_WARN
        int "Warn for stack frames larger than"
        range 0 8192
        default 2048 if GCC_PLUGIN_LATENT_ENTROPY
-       default 1536 if (!64BIT && (PARISC || XTENSA))
-       default 1024 if (!64BIT && !PARISC)
+       default 2048 if PARISC
+       default 1536 if (!64BIT && XTENSA)
+       default 1024 if !64BIT
        default 2048 if 64BIT
        help
          Tell gcc to warn at build time for stack frames larger than this.
index a90112e..72b9068 100644 (file)
@@ -49,6 +49,7 @@
        SIPROUND; \
        return (v0 ^ v1) ^ (v2 ^ v3);
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
 {
        const u8 *end = data + len - (len % sizeof(u64));
@@ -80,8 +81,8 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
        POSTAMBLE
 }
 EXPORT_SYMBOL(__siphash_aligned);
+#endif
 
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
 {
        const u8 *end = data + len - (len % sizeof(u64));
@@ -113,7 +114,6 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
        POSTAMBLE
 }
 EXPORT_SYMBOL(__siphash_unaligned);
-#endif
 
 /**
  * siphash_1u64 - compute 64-bit siphash PRF value of a u64
@@ -250,6 +250,7 @@ EXPORT_SYMBOL(siphash_3u32);
        HSIPROUND; \
        return (v0 ^ v1) ^ (v2 ^ v3);
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 {
        const u8 *end = data + len - (len % sizeof(u64));
@@ -280,8 +281,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
        HPOSTAMBLE
 }
 EXPORT_SYMBOL(__hsiphash_aligned);
+#endif
 
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
                         const hsiphash_key_t *key)
 {
@@ -313,7 +314,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
        HPOSTAMBLE
 }
 EXPORT_SYMBOL(__hsiphash_unaligned);
-#endif
 
 /**
  * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
@@ -418,6 +418,7 @@ EXPORT_SYMBOL(hsiphash_4u32);
        HSIPROUND; \
        return v1 ^ v3;
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 {
        const u8 *end = data + len - (len % sizeof(u32));
@@ -438,8 +439,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
        HPOSTAMBLE
 }
 EXPORT_SYMBOL(__hsiphash_aligned);
+#endif
 
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
                         const hsiphash_key_t *key)
 {
@@ -461,7 +462,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
        HPOSTAMBLE
 }
 EXPORT_SYMBOL(__hsiphash_unaligned);
-#endif
 
 /**
  * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
index ca9fa8c..7626790 100644 (file)
@@ -359,7 +359,6 @@ void kunmap_high(struct page *page)
 }
 EXPORT_SYMBOL(kunmap_high);
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
                unsigned start2, unsigned end2)
 {
@@ -416,7 +415,6 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
        BUG_ON((start1 | start2 | end1 | end2) != 0);
 }
 EXPORT_SYMBOL(zero_user_segments);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* CONFIG_HIGHMEM */
 
 #ifdef CONFIG_KMAP_LOCAL
index f025d23..abcd178 100644 (file)
@@ -4919,9 +4919,9 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
 
                move_huge_pte(vma, old_addr, new_addr, src_pte);
        }
-       i_mmap_unlock_write(mapping);
        flush_tlb_range(vma, old_end - len, old_end);
        mmu_notifier_invalidate_range_end(&range);
+       i_mmap_unlock_write(mapping);
 
        return len + old_addr - old_end;
 }
@@ -4939,6 +4939,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
        struct mmu_notifier_range range;
+       bool force_flush = false;
 
        WARN_ON(!is_vm_hugetlb_page(vma));
        BUG_ON(start & ~huge_page_mask(h));
@@ -4967,10 +4968,8 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                ptl = huge_pte_lock(h, mm, ptep);
                if (huge_pmd_unshare(mm, vma, &address, ptep)) {
                        spin_unlock(ptl);
-                       /*
-                        * We just unmapped a page of PMDs by clearing a PUD.
-                        * The caller's TLB flush range should cover this area.
-                        */
+                       tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
+                       force_flush = true;
                        continue;
                }
 
@@ -5027,6 +5026,22 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
        }
        mmu_notifier_invalidate_range_end(&range);
        tlb_end_vma(tlb, vma);
+
+       /*
+        * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We
+        * could defer the flush until now, since by holding i_mmap_rwsem we
+        * guaranteed that the last refernece would not be dropped. But we must
+        * do the flushing before we return, as otherwise i_mmap_rwsem will be
+        * dropped and the last reference to the shared PMDs page might be
+        * dropped as well.
+        *
+        * In theory we could defer the freeing of the PMD pages as well, but
+        * huge_pmd_unshare() relies on the exact page_count for the PMD page to
+        * detect sharing, so we cannot defer the release of the page either.
+        * Instead, do flush now.
+        */
+       if (force_flush)
+               tlb_flush_mmu_tlbonly(tlb);
 }
 
 void __unmap_hugepage_range_final(struct mmu_gather *tlb,
index 781605e..6863a83 100644 (file)
@@ -5558,7 +5558,7 @@ static int mem_cgroup_move_account(struct page *page,
 
        VM_BUG_ON(from == to);
        VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
-       VM_BUG_ON(compound && !folio_test_multi(folio));
+       VM_BUG_ON(compound && !folio_test_large(folio));
 
        /*
         * Prevent mem_cgroup_migrate() from looking at
index dc038ce..18f93c2 100644 (file)
@@ -2303,6 +2303,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
                INIT_LIST_HEAD(&info->swaplist);
                simple_xattrs_init(&info->xattrs);
                cache_no_acl(inode);
+               mapping_set_large_folios(inode->i_mapping);
 
                switch (mode & S_IFMT) {
                default:
@@ -3870,7 +3871,7 @@ static struct file_system_type shmem_fs_type = {
        .parameters     = shmem_fs_parameters,
 #endif
        .kill_sb        = kill_litter_super,
-       .fs_flags       = FS_USERNS_MOUNT | FS_THP_SUPPORT,
+       .fs_flags       = FS_USERNS_MOUNT,
 };
 
 int __init shmem_init(void)
index e58151a..741ba32 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -670,7 +670,7 @@ bool folio_mapped(struct folio *folio)
 {
        long i, nr;
 
-       if (folio_test_single(folio))
+       if (!folio_test_large(folio))
                return atomic_read(&folio->_mapcount) >= 0;
        if (atomic_read(folio_mapcount_ptr(folio)) >= 0)
                return true;
index a3a0a5e..abaa5d9 100644 (file)
@@ -184,9 +184,6 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
        if (err)
                goto out_unregister_netdev;
 
-       /* Account for reference in struct vlan_dev_priv */
-       dev_hold(real_dev);
-
        vlan_stacked_transfer_operstate(real_dev, dev, vlan);
        linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
 
index ab6dee2..a54535c 100644 (file)
@@ -615,6 +615,9 @@ static int vlan_dev_init(struct net_device *dev)
        if (!vlan->vlan_pcpu_stats)
                return -ENOMEM;
 
+       /* Get vlan's reference to real_dev */
+       dev_hold(real_dev);
+
        return 0;
 }
 
index 15ac064..2a352e6 100644 (file)
@@ -4210,7 +4210,10 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
        if (dev->flags & IFF_UP) {
                int cpu = smp_processor_id(); /* ok because BHs are off */
 
-               if (txq->xmit_lock_owner != cpu) {
+               /* Other cpus might concurrently change txq->xmit_lock_owner
+                * to -1 or to their cpu id, but not to our id.
+                */
+               if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
                        if (dev_xmit_recursion())
                                goto recursion_alert;
 
index be74ab4..0ccfd5f 100644 (file)
@@ -162,3 +162,22 @@ void dst_cache_destroy(struct dst_cache *dst_cache)
        free_percpu(dst_cache->cache);
 }
 EXPORT_SYMBOL_GPL(dst_cache_destroy);
+
+void dst_cache_reset_now(struct dst_cache *dst_cache)
+{
+       int i;
+
+       if (!dst_cache->cache)
+               return;
+
+       dst_cache->reset_ts = jiffies;
+       for_each_possible_cpu(i) {
+               struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i);
+               struct dst_entry *dst = idst->dst;
+
+               idst->cookie = 0;
+               idst->dst = NULL;
+               dst_release(dst);
+       }
+}
+EXPORT_SYMBOL_GPL(dst_cache_reset_now);
index 79df7cd..1bb567a 100644 (file)
@@ -323,7 +323,7 @@ jumped:
                if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress,
                                                              fib6_rule_suppress,
                                                              fib4_rule_suppress,
-                                                             rule, arg))
+                                                             rule, flags, arg))
                        continue;
 
                if (err != -EAGAIN) {
index 47931c8..72ba027 100644 (file)
@@ -1779,6 +1779,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl)
 {
        neigh_tables[index] = NULL;
        /* It is not clean... Fix it to unload IPv6 module safely */
+       cancel_delayed_work_sync(&tbl->managed_work);
        cancel_delayed_work_sync(&tbl->gc_work);
        del_timer_sync(&tbl->proxy_timer);
        pneigh_queue_purge(&tbl->proxy_queue);
index 65e9bc1..20bcf86 100644 (file)
@@ -1719,7 +1719,7 @@ static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
        struct ethtool_coalesce coalesce;
        int ret;
 
-       if (!dev->ethtool_ops->set_coalesce && !dev->ethtool_ops->get_coalesce)
+       if (!dev->ethtool_ops->set_coalesce || !dev->ethtool_ops->get_coalesce)
                return -EOPNOTSUPP;
 
        ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
index 9fe13e4..4d61ddd 100644 (file)
@@ -1582,7 +1582,7 @@ static int __net_init fib_net_init(struct net *net)
        int error;
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
-       net->ipv4.fib_num_tclassid_users = 0;
+       atomic_set(&net->ipv4.fib_num_tclassid_users, 0);
 #endif
        error = ip_fib_net_init(net);
        if (error < 0)
index ce54a30..d279cb8 100644 (file)
@@ -141,6 +141,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule,
 }
 
 INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule,
+                                               int flags,
                                                struct fib_lookup_arg *arg)
 {
        struct fib_result *result = (struct fib_result *) arg->result;
@@ -263,7 +264,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        if (tb[FRA_FLOW]) {
                rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
                if (rule4->tclassid)
-                       net->ipv4.fib_num_tclassid_users++;
+                       atomic_inc(&net->ipv4.fib_num_tclassid_users);
        }
 #endif
 
@@ -295,7 +296,7 @@ static int fib4_rule_delete(struct fib_rule *rule)
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
        if (((struct fib4_rule *)rule)->tclassid)
-               net->ipv4.fib_num_tclassid_users--;
+               atomic_dec(&net->ipv4.fib_num_tclassid_users);
 #endif
        net->ipv4.fib_has_custom_rules = true;
 
index 3364cb9..fde7797 100644 (file)
@@ -220,7 +220,7 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
 {
 #ifdef CONFIG_IP_ROUTE_CLASSID
        if (fib_nh->nh_tclassid)
-               net->ipv4.fib_num_tclassid_users--;
+               atomic_dec(&net->ipv4.fib_num_tclassid_users);
 #endif
        fib_nh_common_release(&fib_nh->nh_common);
 }
@@ -632,7 +632,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
 #ifdef CONFIG_IP_ROUTE_CLASSID
        nh->nh_tclassid = cfg->fc_flow;
        if (nh->nh_tclassid)
-               net->ipv4.fib_num_tclassid_users++;
+               atomic_inc(&net->ipv4.fib_num_tclassid_users);
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
        nh->fib_nh_weight = nh_weight;
index 9e81007..5dbd4b5 100644 (file)
@@ -1899,15 +1899,36 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
 /* if any FIB entries reference this nexthop, any dst entries
  * need to be regenerated
  */
-static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh,
+                             struct nexthop *replaced_nh)
 {
        struct fib6_info *f6i;
+       struct nh_group *nhg;
+       int i;
 
        if (!list_empty(&nh->fi_list))
                rt_cache_flush(net);
 
        list_for_each_entry(f6i, &nh->f6i_list, nh_list)
                ipv6_stub->fib6_update_sernum(net, f6i);
+
+       /* if an IPv6 group was replaced, we have to release all old
+        * dsts to make sure all refcounts are released
+        */
+       if (!replaced_nh->is_group)
+               return;
+
+       /* new dsts must use only the new nexthop group */
+       synchronize_net();
+
+       nhg = rtnl_dereference(replaced_nh->nh_grp);
+       for (i = 0; i < nhg->num_nh; i++) {
+               struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+               struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info);
+
+               if (nhi->family == AF_INET6)
+                       ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh);
+       }
 }
 
 static int replace_nexthop_grp(struct net *net, struct nexthop *old,
@@ -2247,7 +2268,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old,
                err = replace_nexthop_single(net, old, new, extack);
 
        if (!err) {
-               nh_rt_cache_flush(net, old);
+               nh_rt_cache_flush(net, old, new);
 
                __remove_nexthop(net, new, NULL);
                nexthop_put(new);
@@ -2544,11 +2565,15 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
        /* sets nh_dev if successful */
        err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
                                      extack);
-       if (err)
+       if (err) {
+               /* IPv6 is not enabled, don't call fib6_nh_release */
+               if (err == -EAFNOSUPPORT)
+                       goto out;
                ipv6_stub->fib6_nh_release(fib6_nh);
-       else
+       } else {
                nh->nh_flags = fib6_nh->fib_nh_flags;
-
+       }
+out:
        return err;
 }
 
index 5e9d9c5..e07837e 100644 (file)
@@ -330,8 +330,6 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
                return;
 
        if (tcp_in_slow_start(tp)) {
-               if (hystart && after(ack, ca->end_seq))
-                       bictcp_hystart_reset(sk);
                acked = tcp_slow_start(tp, acked);
                if (!acked)
                        return;
@@ -391,6 +389,9 @@ static void hystart_update(struct sock *sk, u32 delay)
        struct bictcp *ca = inet_csk_ca(sk);
        u32 threshold;
 
+       if (after(tp->snd_una, ca->end_seq))
+               bictcp_hystart_reset(sk);
+
        if (hystart_detect & HYSTART_ACK_TRAIN) {
                u32 now = bictcp_clock_us(sk);
 
index 0c4da16..dab4a04 100644 (file)
@@ -1026,6 +1026,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
        .ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
        .fib6_nh_init      = fib6_nh_init,
        .fib6_nh_release   = fib6_nh_release,
+       .fib6_nh_release_dsts = fib6_nh_release_dsts,
        .fib6_update_sernum = fib6_update_sernum_stub,
        .fib6_rt_update    = fib6_rt_update,
        .ip6_del_rt        = ip6_del_rt,
index 40f3e4f..dcedfe2 100644 (file)
@@ -267,6 +267,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_action(struct fib_rule *rule,
 }
 
 INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule,
+                                               int flags,
                                                struct fib_lookup_arg *arg)
 {
        struct fib6_result *res = arg->result;
@@ -294,8 +295,7 @@ INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule,
        return false;
 
 suppress_route:
-       if (!(arg->flags & FIB_LOOKUP_NOREF))
-               ip6_rt_put(rt);
+       ip6_rt_put_flags(rt, flags);
        return true;
 }
 
index 1b9827f..1cbd49d 100644 (file)
@@ -248,9 +248,9 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
                 * memcmp() alone below is sufficient, right?
                 */
                 if ((first_word & htonl(0xF00FFFFF)) ||
-                   !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
-                   !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
-                   *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+                    !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
+                    !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
+                    *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
 not_same_flow:
                        NAPI_GRO_CB(p)->same_flow = 0;
                        continue;
index 2f044a4..ff4e83e 100644 (file)
@@ -174,7 +174,7 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
        /* Policy lookup after SNAT yielded a new policy */
        if (skb_dst(skb)->xfrm) {
-               IPCB(skb)->flags |= IPSKB_REROUTED;
+               IP6CB(skb)->flags |= IP6SKB_REROUTED;
                return dst_output(net, sk, skb);
        }
 #endif
index 3ae25b8..42d60c7 100644 (file)
@@ -3680,6 +3680,25 @@ void fib6_nh_release(struct fib6_nh *fib6_nh)
        fib_nh_common_release(&fib6_nh->nh_common);
 }
 
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
+{
+       int cpu;
+
+       if (!fib6_nh->rt6i_pcpu)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               struct rt6_info *pcpu_rt, **ppcpu_rt;
+
+               ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+               pcpu_rt = xchg(ppcpu_rt, NULL);
+               if (pcpu_rt) {
+                       dst_dev_put(&pcpu_rt->dst);
+                       dst_release(&pcpu_rt->dst);
+               }
+       }
+}
+
 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
                                              gfp_t gfp_flags,
                                              struct netlink_ext_ack *extack)
index 46c4482..cdf09c2 100644 (file)
@@ -952,7 +952,7 @@ static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start,
 }
 
 static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
-                            unsigned int daddr_extent)
+                            unsigned int daddr_extent, unsigned char type)
 {
        struct net *net = dev_net(mdev->dev);
        struct mctp_route *rt, *tmp;
@@ -969,7 +969,8 @@ static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start,
 
        list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) {
                if (rt->dev == mdev &&
-                   rt->min == daddr_start && rt->max == daddr_end) {
+                   rt->min == daddr_start && rt->max == daddr_end &&
+                   rt->type == type) {
                        list_del_rcu(&rt->list);
                        /* TODO: immediate RTM_DELROUTE */
                        mctp_route_release(rt);
@@ -987,7 +988,7 @@ int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr)
 
 int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr)
 {
-       return mctp_route_remove(mdev, addr, 0);
+       return mctp_route_remove(mdev, addr, 0, RTN_LOCAL);
 }
 
 /* removes all entries for a given device */
@@ -1195,7 +1196,7 @@ static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
        if (rtm->rtm_type != RTN_UNICAST)
                return -EINVAL;
 
-       rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len);
+       rc = mctp_route_remove(mdev, daddr_start, rtm->rtm_dst_len, RTN_UNICAST);
        return rc;
 }
 
index cc6b880..7b79187 100644 (file)
@@ -12,7 +12,7 @@
 static netdev_tx_t mctp_test_dev_tx(struct sk_buff *skb,
                                    struct net_device *ndev)
 {
-       kfree(skb);
+       kfree_skb(skb);
        return NETDEV_TX_OK;
 }
 
index ffeb2df..0c7bde1 100644 (file)
@@ -409,7 +409,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
                goto err;
 
        /* Find the output device */
-       out_dev = rcu_dereference(nh->nh_dev);
+       out_dev = nh->nh_dev;
        if (!mpls_output_possible(out_dev))
                goto tx_err;
 
@@ -698,7 +698,7 @@ static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
            (dev->addr_len != nh->nh_via_alen))
                goto errout;
 
-       RCU_INIT_POINTER(nh->nh_dev, dev);
+       nh->nh_dev = dev;
 
        if (!(dev->flags & IFF_UP)) {
                nh->nh_flags |= RTNH_F_DEAD;
@@ -1491,26 +1491,53 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head)
        kfree(mdev);
 }
 
-static void mpls_ifdown(struct net_device *dev, int event)
+static int mpls_ifdown(struct net_device *dev, int event)
 {
        struct mpls_route __rcu **platform_label;
        struct net *net = dev_net(dev);
-       u8 alive, deleted;
        unsigned index;
 
        platform_label = rtnl_dereference(net->mpls.platform_label);
        for (index = 0; index < net->mpls.platform_labels; index++) {
                struct mpls_route *rt = rtnl_dereference(platform_label[index]);
+               bool nh_del = false;
+               u8 alive = 0;
 
                if (!rt)
                        continue;
 
-               alive = 0;
-               deleted = 0;
+               if (event == NETDEV_UNREGISTER) {
+                       u8 deleted = 0;
+
+                       for_nexthops(rt) {
+                               if (!nh->nh_dev || nh->nh_dev == dev)
+                                       deleted++;
+                               if (nh->nh_dev == dev)
+                                       nh_del = true;
+                       } endfor_nexthops(rt);
+
+                       /* if there are no more nexthops, delete the route */
+                       if (deleted == rt->rt_nhn) {
+                               mpls_route_update(net, index, NULL, NULL);
+                               continue;
+                       }
+
+                       if (nh_del) {
+                               size_t size = sizeof(*rt) + rt->rt_nhn *
+                                       rt->rt_nh_size;
+                               struct mpls_route *orig = rt;
+
+                               rt = kmalloc(size, GFP_KERNEL);
+                               if (!rt)
+                                       return -ENOMEM;
+                               memcpy(rt, orig, size);
+                       }
+               }
+
                change_nexthops(rt) {
                        unsigned int nh_flags = nh->nh_flags;
 
-                       if (rtnl_dereference(nh->nh_dev) != dev)
+                       if (nh->nh_dev != dev)
                                goto next;
 
                        switch (event) {
@@ -1523,23 +1550,22 @@ static void mpls_ifdown(struct net_device *dev, int event)
                                break;
                        }
                        if (event == NETDEV_UNREGISTER)
-                               RCU_INIT_POINTER(nh->nh_dev, NULL);
+                               nh->nh_dev = NULL;
 
                        if (nh->nh_flags != nh_flags)
                                WRITE_ONCE(nh->nh_flags, nh_flags);
 next:
                        if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
                                alive++;
-                       if (!rtnl_dereference(nh->nh_dev))
-                               deleted++;
                } endfor_nexthops(rt);
 
                WRITE_ONCE(rt->rt_nhn_alive, alive);
 
-               /* if there are no more nexthops, delete the route */
-               if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn)
-                       mpls_route_update(net, index, NULL, NULL);
+               if (nh_del)
+                       mpls_route_update(net, index, rt, NULL);
        }
+
+       return 0;
 }
 
 static void mpls_ifup(struct net_device *dev, unsigned int flags)
@@ -1559,14 +1585,12 @@ static void mpls_ifup(struct net_device *dev, unsigned int flags)
                alive = 0;
                change_nexthops(rt) {
                        unsigned int nh_flags = nh->nh_flags;
-                       struct net_device *nh_dev =
-                               rtnl_dereference(nh->nh_dev);
 
                        if (!(nh_flags & flags)) {
                                alive++;
                                continue;
                        }
-                       if (nh_dev != dev)
+                       if (nh->nh_dev != dev)
                                continue;
                        alive++;
                        nh_flags &= ~flags;
@@ -1597,8 +1621,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
                return NOTIFY_OK;
 
        switch (event) {
+               int err;
+
        case NETDEV_DOWN:
-               mpls_ifdown(dev, event);
+               err = mpls_ifdown(dev, event);
+               if (err)
+                       return notifier_from_errno(err);
                break;
        case NETDEV_UP:
                flags = dev_get_flags(dev);
@@ -1609,13 +1637,18 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
                break;
        case NETDEV_CHANGE:
                flags = dev_get_flags(dev);
-               if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+               if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
                        mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
-               else
-                       mpls_ifdown(dev, event);
+               } else {
+                       err = mpls_ifdown(dev, event);
+                       if (err)
+                               return notifier_from_errno(err);
+               }
                break;
        case NETDEV_UNREGISTER:
-               mpls_ifdown(dev, event);
+               err = mpls_ifdown(dev, event);
+               if (err)
+                       return notifier_from_errno(err);
                mdev = mpls_dev_get(dev);
                if (mdev) {
                        mpls_dev_sysctl_unregister(dev, mdev);
@@ -1626,8 +1659,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
        case NETDEV_CHANGENAME:
                mdev = mpls_dev_get(dev);
                if (mdev) {
-                       int err;
-
                        mpls_dev_sysctl_unregister(dev, mdev);
                        err = mpls_dev_sysctl_register(dev, mdev);
                        if (err)
@@ -1994,7 +2025,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
                    nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
                                nh->nh_via_alen))
                        goto nla_put_failure;
-               dev = rtnl_dereference(nh->nh_dev);
+               dev = nh->nh_dev;
                if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
                        goto nla_put_failure;
                if (nh->nh_flags & RTNH_F_LINKDOWN)
@@ -2012,7 +2043,7 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
                        goto nla_put_failure;
 
                for_nexthops(rt) {
-                       dev = rtnl_dereference(nh->nh_dev);
+                       dev = nh->nh_dev;
                        if (!dev)
                                continue;
 
@@ -2123,18 +2154,14 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
 static bool mpls_rt_uses_dev(struct mpls_route *rt,
                             const struct net_device *dev)
 {
-       struct net_device *nh_dev;
-
        if (rt->rt_nhn == 1) {
                struct mpls_nh *nh = rt->rt_nh;
 
-               nh_dev = rtnl_dereference(nh->nh_dev);
-               if (dev == nh_dev)
+               if (nh->nh_dev == dev)
                        return true;
        } else {
                for_nexthops(rt) {
-                       nh_dev = rtnl_dereference(nh->nh_dev);
-                       if (nh_dev == dev)
+                       if (nh->nh_dev == dev)
                                return true;
                } endfor_nexthops(rt);
        }
@@ -2222,7 +2249,7 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
                size_t nhsize = 0;
 
                for_nexthops(rt) {
-                       if (!rtnl_dereference(nh->nh_dev))
+                       if (!nh->nh_dev)
                                continue;
                        nhsize += nla_total_size(sizeof(struct rtnexthop));
                        /* RTA_VIA */
@@ -2468,7 +2495,7 @@ static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
            nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
                        nh->nh_via_alen))
                goto nla_put_failure;
-       dev = rtnl_dereference(nh->nh_dev);
+       dev = nh->nh_dev;
        if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
                goto nla_put_failure;
 
@@ -2507,7 +2534,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
                rt0 = mpls_rt_alloc(1, lo->addr_len, 0);
                if (IS_ERR(rt0))
                        goto nort0;
-               RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
+               rt0->rt_nh->nh_dev = lo;
                rt0->rt_protocol = RTPROT_KERNEL;
                rt0->rt_payload_type = MPT_IPV4;
                rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
@@ -2521,7 +2548,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
                rt2 = mpls_rt_alloc(1, lo->addr_len, 0);
                if (IS_ERR(rt2))
                        goto nort2;
-               RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
+               rt2->rt_nh->nh_dev = lo;
                rt2->rt_protocol = RTPROT_KERNEL;
                rt2->rt_payload_type = MPT_IPV6;
                rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
index 838cdfc..893df00 100644 (file)
@@ -87,7 +87,7 @@ enum mpls_payload_type {
 };
 
 struct mpls_nh { /* next hop label forwarding entry */
-       struct net_device __rcu *nh_dev;
+       struct net_device       *nh_dev;
 
        /* nh_flags is accessed under RCU in the packet path; it is
         * modified handling netdev events with rtnl lock held
index 7c3420a..fe98e4f 100644 (file)
@@ -422,28 +422,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
        return false;
 }
 
-/* MP_JOIN client subflow must wait for 4th ack before sending any data:
- * TCP can't schedule delack timer before the subflow is fully established.
- * MPTCP uses the delack timer to do 3rd ack retransmissions
- */
-static void schedule_3rdack_retransmission(struct sock *sk)
-{
-       struct inet_connection_sock *icsk = inet_csk(sk);
-       struct tcp_sock *tp = tcp_sk(sk);
-       unsigned long timeout;
-
-       /* reschedule with a timeout above RTT, as we must look only for drop */
-       if (tp->srtt_us)
-               timeout = tp->srtt_us << 1;
-       else
-               timeout = TCP_TIMEOUT_INIT;
-
-       WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
-       icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
-       icsk->icsk_ack.timeout = timeout;
-       sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
-}
-
 static void clear_3rdack_retransmission(struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
@@ -526,7 +504,15 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
                *size = TCPOLEN_MPTCP_MPJ_ACK;
                pr_debug("subflow=%p", subflow);
 
-               schedule_3rdack_retransmission(sk);
+               /* we can use the full delegate action helper only from BH context
+                * If we are in process context - sk is flushing the backlog at
+                * socket lock release time - just set the appropriate flag, will
+                * be handled by the release callback
+                */
+               if (sock_owned_by_user(sk))
+                       set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status);
+               else
+                       mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK);
                return true;
        }
        return false;
index b7e32e3..c82a76d 100644 (file)
@@ -1596,7 +1596,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
                        if (!xmit_ssk)
                                goto out;
                        if (xmit_ssk != ssk) {
-                               mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+                               mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
+                                                      MPTCP_DELEGATE_SEND);
                                goto out;
                        }
 
@@ -2943,7 +2944,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
                if (xmit_ssk == ssk)
                        __mptcp_subflow_push_pending(sk, ssk);
                else if (xmit_ssk)
-                       mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+                       mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
        } else {
                set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
        }
@@ -2993,18 +2994,50 @@ static void mptcp_release_cb(struct sock *sk)
        __mptcp_update_rmem(sk);
 }
 
+/* MP_JOIN client subflow must wait for 4th ack before sending any data:
+ * TCP can't schedule delack timer before the subflow is fully established.
+ * MPTCP uses the delack timer to do 3rd ack retransmissions
+ */
+static void schedule_3rdack_retransmission(struct sock *ssk)
+{
+       struct inet_connection_sock *icsk = inet_csk(ssk);
+       struct tcp_sock *tp = tcp_sk(ssk);
+       unsigned long timeout;
+
+       if (mptcp_subflow_ctx(ssk)->fully_established)
+               return;
+
+       /* reschedule with a timeout above RTT, as we must look only for drop */
+       if (tp->srtt_us)
+               timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1));
+       else
+               timeout = TCP_TIMEOUT_INIT;
+       timeout += jiffies;
+
+       WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
+       icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+       icsk->icsk_ack.timeout = timeout;
+       sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
+}
+
 void mptcp_subflow_process_delegated(struct sock *ssk)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
        struct sock *sk = subflow->conn;
 
-       mptcp_data_lock(sk);
-       if (!sock_owned_by_user(sk))
-               __mptcp_subflow_push_pending(sk, ssk);
-       else
-               set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
-       mptcp_data_unlock(sk);
-       mptcp_subflow_delegated_done(subflow);
+       if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+               mptcp_data_lock(sk);
+               if (!sock_owned_by_user(sk))
+                       __mptcp_subflow_push_pending(sk, ssk);
+               else
+                       set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+               mptcp_data_unlock(sk);
+               mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
+       }
+       if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
+               schedule_3rdack_retransmission(ssk);
+               mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
+       }
 }
 
 static int mptcp_hash(struct sock *sk)
index 67a61ac..d87cc04 100644 (file)
@@ -387,6 +387,7 @@ struct mptcp_delegated_action {
 DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
 
 #define MPTCP_DELEGATE_SEND            0
+#define MPTCP_DELEGATE_ACK             1
 
 /* MPTCP subflow context */
 struct mptcp_subflow_context {
@@ -492,23 +493,23 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
 
 void mptcp_subflow_process_delegated(struct sock *ssk);
 
-static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
+static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
 {
        struct mptcp_delegated_action *delegated;
        bool schedule;
 
+       /* the caller held the subflow bh socket lock */
+       lockdep_assert_in_softirq();
+
        /* The implied barrier pairs with mptcp_subflow_delegated_done(), and
         * ensures the below list check sees list updates done prior to status
         * bit changes
         */
-       if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+       if (!test_and_set_bit(action, &subflow->delegated_status)) {
                /* still on delegated list from previous scheduling */
                if (!list_empty(&subflow->delegated_node))
                        return;
 
-               /* the caller held the subflow bh socket lock */
-               lockdep_assert_in_softirq();
-
                delegated = this_cpu_ptr(&mptcp_delegated_actions);
                schedule = list_empty(&delegated->head);
                list_add_tail(&subflow->delegated_node, &delegated->head);
@@ -533,16 +534,16 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
 
 static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
 {
-       return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+       return !!READ_ONCE(subflow->delegated_status);
 }
 
-static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
+static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
 {
        /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
         * touching the status bit
         */
        smp_wmb();
-       clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+       clear_bit(action, &subflow->delegated_status);
 }
 
 int mptcp_is_enabled(const struct net *net);
index ba9ae48..dda8b76 100644 (file)
@@ -18,6 +18,8 @@
 #include "internal.h"
 #include "ncsi-pkt.h"
 
+static const int padding_bytes = 26;
+
 u32 ncsi_calculate_checksum(unsigned char *data, int len)
 {
        u32 checksum = 0;
@@ -213,12 +215,17 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb,
 {
        struct ncsi_cmd_oem_pkt *cmd;
        unsigned int len;
+       int payload;
+       /* NC-SI spec DSP_0222_1.2.0, section 8.2.2.2
+        * requires payload to be padded with 0 to
+        * 32-bit boundary before the checksum field.
+        * Ensure the padding bytes are accounted for in
+        * skb allocation
+        */
 
+       payload = ALIGN(nca->payload, 4);
        len = sizeof(struct ncsi_cmd_pkt_hdr) + 4;
-       if (nca->payload < 26)
-               len += 26;
-       else
-               len += nca->payload;
+       len += max(payload, padding_bytes);
 
        cmd = skb_put_zero(skb, len);
        memcpy(&cmd->mfr_id, nca->data, nca->payload);
@@ -272,6 +279,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
        struct net_device *dev = nd->dev;
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;
+       int payload;
        int len = hlen + tlen;
        struct sk_buff *skb;
        struct ncsi_request *nr;
@@ -281,14 +289,14 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
                return NULL;
 
        /* NCSI command packet has 16-bytes header, payload, 4 bytes checksum.
+        * Payload needs padding so that the checksum field following payload is
+        * aligned to 32-bit boundary.
         * The packet needs padding if its payload is less than 26 bytes to
         * meet 64 bytes minimal ethernet frame length.
         */
        len += sizeof(struct ncsi_cmd_pkt_hdr) + 4;
-       if (nca->payload < 26)
-               len += 26;
-       else
-               len += nca->payload;
+       payload = ALIGN(nca->payload, 4);
+       len += max(payload, padding_bytes);
 
        /* Allocate skb */
        skb = alloc_skb(len, GFP_ATOMIC);
index e93c937..51ad557 100644 (file)
@@ -1919,7 +1919,6 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
        struct ip_vs_proto_data *pd;
        struct ip_vs_conn *cp;
        int ret, pkts;
-       int conn_reuse_mode;
        struct sock *sk;
        int af = state->pf;
 
@@ -1997,15 +1996,16 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
        cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
                             ipvs, af, skb, &iph);
 
-       conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
-       if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+       if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+               int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
                bool old_ct = false, resched = false;
 
                if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
                    unlikely(!atomic_read(&cp->dest->weight))) {
                        resched = true;
                        old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
-               } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+               } else if (conn_reuse_mode &&
+                          is_new_conn_expected(cp, conn_reuse_mode)) {
                        old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
                        if (!atomic_read(&cp->n_control)) {
                                resched = true;
index f1e5443..c7708bd 100644 (file)
@@ -1011,11 +1011,9 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
                                                   CTA_TUPLE_REPLY,
                                                   filter->family,
                                                   &filter->zone,
-                                                  filter->orig_flags);
-               if (err < 0) {
-                       err = -EINVAL;
+                                                  filter->reply_flags);
+               if (err < 0)
                        goto err_filter;
-               }
        }
 
        return filter;
index d6bf1b2..b561e0a 100644 (file)
@@ -65,11 +65,11 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
                       sizeof(struct in6_addr));
                if (memcmp(&key->enc_ipv6.src, &in6addr_any,
                           sizeof(struct in6_addr)))
-                       memset(&key->enc_ipv6.src, 0xff,
+                       memset(&mask->enc_ipv6.src, 0xff,
                               sizeof(struct in6_addr));
                if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
                           sizeof(struct in6_addr)))
-                       memset(&key->enc_ipv6.dst, 0xff,
+                       memset(&mask->enc_ipv6.dst, 0xff,
                               sizeof(struct in6_addr));
                enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
                key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
index cbfe4e4..bd68993 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/icmpv6.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include <linux/ip.h>
 #include <net/sctp/checksum.h>
 
 static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
index 2f7cf5e..0f8bb0b 100644 (file)
@@ -85,9 +85,9 @@ static ssize_t idletimer_tg_show(struct device *dev,
        mutex_unlock(&list_mutex);
 
        if (time_after(expires, jiffies) || ktimespec.tv_sec > 0)
-               return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff);
+               return sysfs_emit(buf, "%ld\n", time_diff);
 
-       return snprintf(buf, PAGE_SIZE, "0\n");
+       return sysfs_emit(buf, "0\n");
 }
 
 static void idletimer_tg_work(struct work_struct *work)
index 4c57532..9eba2e6 100644 (file)
@@ -1852,6 +1852,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        if (msg->msg_flags & MSG_OOB)
                return -EOPNOTSUPP;
 
+       if (len == 0) {
+               pr_warn_once("Zero length message leads to an empty skb\n");
+               return -ENODATA;
+       }
+
        err = scm_send(sock, msg, &scm, true);
        if (err < 0)
                return err;
index abf19c0..5327d13 100644 (file)
@@ -500,7 +500,7 @@ void rds_tcp_tune(struct socket *sock)
                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
        }
        if (rtn->rcvbuf_size > 0) {
-               sk->sk_sndbuf = rtn->rcvbuf_size;
+               sk->sk_rcvbuf = rtn->rcvbuf_size;
                sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
        }
        release_sock(sk);
index dbea0bf..8120138 100644 (file)
@@ -135,16 +135,20 @@ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle)
        return bundle;
 }
 
+static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
+{
+       rxrpc_put_peer(bundle->params.peer);
+       kfree(bundle);
+}
+
 void rxrpc_put_bundle(struct rxrpc_bundle *bundle)
 {
        unsigned int d = bundle->debug_id;
        unsigned int u = atomic_dec_return(&bundle->usage);
 
        _debug("PUT B=%x %u", d, u);
-       if (u == 0) {
-               rxrpc_put_peer(bundle->params.peer);
-               kfree(bundle);
-       }
+       if (u == 0)
+               rxrpc_free_bundle(bundle);
 }
 
 /*
@@ -328,7 +332,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
        return candidate;
 
 found_bundle_free:
-       kfree(candidate);
+       rxrpc_free_bundle(candidate);
 found_bundle:
        rxrpc_get_bundle(bundle);
        spin_unlock(&local->client_bundles_lock);
index 68396d0..0298fe2 100644 (file)
@@ -299,6 +299,12 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
        return peer;
 }
 
+static void rxrpc_free_peer(struct rxrpc_peer *peer)
+{
+       rxrpc_put_local(peer->local);
+       kfree_rcu(peer, rcu);
+}
+
 /*
  * Set up a new incoming peer.  There shouldn't be any other matching peers
  * since we've already done a search in the list from the non-reentrant context
@@ -365,7 +371,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
                spin_unlock_bh(&rxnet->peer_hash_lock);
 
                if (peer)
-                       kfree(candidate);
+                       rxrpc_free_peer(candidate);
                else
                        peer = candidate;
        }
@@ -420,8 +426,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
        list_del_init(&peer->keepalive_link);
        spin_unlock_bh(&rxnet->peer_hash_lock);
 
-       rxrpc_put_local(peer->local);
-       kfree_rcu(peer, rcu);
+       rxrpc_free_peer(peer);
 }
 
 /*
@@ -457,8 +462,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
        if (n == 0) {
                hash_del_rcu(&peer->hash_link);
                list_del_init(&peer->keepalive_link);
-               rxrpc_put_local(peer->local);
-               kfree_rcu(peer, rcu);
+               rxrpc_free_peer(peer);
        }
 }
 
index 0eae9ff..e007fc7 100644 (file)
@@ -665,12 +665,14 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
                        q->classes[i].deficit = quanta[i];
                }
        }
+       for (i = q->nbands; i < oldbands; i++) {
+               qdisc_tree_flush_backlog(q->classes[i].qdisc);
+               if (i >= q->nstrict)
+                       list_del(&q->classes[i].alist);
+       }
        q->nstrict = nstrict;
        memcpy(q->prio2band, priomap, sizeof(priomap));
 
-       for (i = q->nbands; i < oldbands; i++)
-               qdisc_tree_flush_backlog(q->classes[i].qdisc);
-
        for (i = 0; i < q->nbands; i++)
                q->classes[i].quantum = quanta[i];
 
index b61c802..230072f 100644 (file)
@@ -585,7 +585,7 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
                 * to clcsocket->wq during the fallback.
                 */
                spin_lock_irqsave(&smc_wait->lock, flags);
-               spin_lock(&clc_wait->lock);
+               spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
                list_splice_init(&smc_wait->head, &clc_wait->head);
                spin_unlock(&clc_wait->lock);
                spin_unlock_irqrestore(&smc_wait->lock, flags);
@@ -2134,8 +2134,10 @@ static int smc_listen(struct socket *sock, int backlog)
        smc->clcsock->sk->sk_user_data =
                (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
        rc = kernel_listen(smc->clcsock, backlog);
-       if (rc)
+       if (rc) {
+               smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
                goto out;
+       }
        sk->sk_max_ack_backlog = backlog;
        sk->sk_ack_backlog = 0;
        sk->sk_state = SMC_LISTEN;
@@ -2368,8 +2370,10 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
 static int smc_shutdown(struct socket *sock, int how)
 {
        struct sock *sk = sock->sk;
+       bool do_shutdown = true;
        struct smc_sock *smc;
        int rc = -EINVAL;
+       int old_state;
        int rc1 = 0;
 
        smc = smc_sk(sk);
@@ -2396,7 +2400,11 @@ static int smc_shutdown(struct socket *sock, int how)
        }
        switch (how) {
        case SHUT_RDWR:         /* shutdown in both directions */
+               old_state = sk->sk_state;
                rc = smc_close_active(smc);
+               if (old_state == SMC_ACTIVE &&
+                   sk->sk_state == SMC_PEERCLOSEWAIT1)
+                       do_shutdown = false;
                break;
        case SHUT_WR:
                rc = smc_close_shutdown_write(smc);
@@ -2406,7 +2414,7 @@ static int smc_shutdown(struct socket *sock, int how)
                /* nothing more to do because peer is not involved */
                break;
        }
-       if (smc->clcsock)
+       if (do_shutdown && smc->clcsock)
                rc1 = kernel_sock_shutdown(smc->clcsock, how);
        /* map sock_shutdown_cmd constants to sk_shutdown value range */
        sk->sk_shutdown |= how + 1;
index 0f9ffba..292e4d9 100644 (file)
@@ -195,6 +195,7 @@ int smc_close_active(struct smc_sock *smc)
        int old_state;
        long timeout;
        int rc = 0;
+       int rc1 = 0;
 
        timeout = current->flags & PF_EXITING ?
                  0 : sock_flag(sk, SOCK_LINGER) ?
@@ -228,6 +229,15 @@ again:
                        /* send close request */
                        rc = smc_close_final(conn);
                        sk->sk_state = SMC_PEERCLOSEWAIT1;
+
+                       /* actively shutdown clcsock before peer close it,
+                        * prevent peer from entering TIME_WAIT state.
+                        */
+                       if (smc->clcsock && smc->clcsock->sk) {
+                               rc1 = kernel_sock_shutdown(smc->clcsock,
+                                                          SHUT_RDWR);
+                               rc = rc ? rc : rc1;
+                       }
                } else {
                        /* peer event has changed the state */
                        goto again;
@@ -354,9 +364,9 @@ static void smc_close_passive_work(struct work_struct *work)
        if (rxflags->peer_conn_abort) {
                /* peer has not received all data */
                smc_close_passive_abort_received(smc);
-               release_sock(&smc->sk);
+               release_sock(sk);
                cancel_delayed_work_sync(&conn->tx_work);
-               lock_sock(&smc->sk);
+               lock_sock(sk);
                goto wakeup;
        }
 
index 25ebd30..387d28b 100644 (file)
@@ -625,18 +625,17 @@ int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
 void smc_lgr_cleanup_early(struct smc_connection *conn)
 {
        struct smc_link_group *lgr = conn->lgr;
-       struct list_head *lgr_list;
        spinlock_t *lgr_lock;
 
        if (!lgr)
                return;
 
        smc_conn_free(conn);
-       lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
+       smc_lgr_list_head(lgr, &lgr_lock);
        spin_lock_bh(lgr_lock);
        /* do not use this link group for new connections */
-       if (!list_empty(lgr_list))
-               list_del_init(lgr_list);
+       if (!list_empty(&lgr->list))
+               list_del_init(&lgr->list);
        spin_unlock_bh(lgr_lock);
        __smc_lgr_terminate(lgr, true);
 }
@@ -1672,14 +1671,26 @@ static void smc_link_down_work(struct work_struct *work)
        mutex_unlock(&lgr->llc_conf_mutex);
 }
 
-/* Determine vlan of internal TCP socket.
- * @vlan_id: address to store the determined vlan id into
- */
+static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
+                                 struct netdev_nested_priv *priv)
+{
+       unsigned short *vlan_id = (unsigned short *)priv->data;
+
+       if (is_vlan_dev(lower_dev)) {
+               *vlan_id = vlan_dev_vlan_id(lower_dev);
+               return 1;
+       }
+
+       return 0;
+}
+
+/* Determine vlan of internal TCP socket. */
 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
 {
        struct dst_entry *dst = sk_dst_get(clcsock->sk);
+       struct netdev_nested_priv priv;
        struct net_device *ndev;
-       int i, nest_lvl, rc = 0;
+       int rc = 0;
 
        ini->vlan_id = 0;
        if (!dst) {
@@ -1697,20 +1708,9 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
                goto out_rel;
        }
 
+       priv.data = (void *)&ini->vlan_id;
        rtnl_lock();
-       nest_lvl = ndev->lower_level;
-       for (i = 0; i < nest_lvl; i++) {
-               struct list_head *lower = &ndev->adj_list.lower;
-
-               if (list_empty(lower))
-                       break;
-               lower = lower->next;
-               ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
-               if (is_vlan_dev(ndev)) {
-                       ini->vlan_id = vlan_dev_vlan_id(ndev);
-                       break;
-               }
-       }
+       netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
        rtnl_unlock();
 
 out_rel:
index ae48c9c..d8ee06a 100644 (file)
@@ -1720,15 +1720,15 @@ static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
 }
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-static struct lock_class_key xs_key[2];
-static struct lock_class_key xs_slock_key[2];
+static struct lock_class_key xs_key[3];
+static struct lock_class_key xs_slock_key[3];
 
 static inline void xs_reclassify_socketu(struct socket *sock)
 {
        struct sock *sk = sock->sk;
 
        sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC",
-               &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]);
+               &xs_slock_key[0], "sk_lock-AF_LOCAL-RPC", &xs_key[0]);
 }
 
 static inline void xs_reclassify_socket4(struct socket *sock)
@@ -1736,7 +1736,7 @@ static inline void xs_reclassify_socket4(struct socket *sock)
        struct sock *sk = sock->sk;
 
        sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC",
-               &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]);
+               &xs_slock_key[1], "sk_lock-AF_INET-RPC", &xs_key[1]);
 }
 
 static inline void xs_reclassify_socket6(struct socket *sock)
@@ -1744,7 +1744,7 @@ static inline void xs_reclassify_socket6(struct socket *sock)
        struct sock *sk = sock->sk;
 
        sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC",
-               &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]);
+               &xs_slock_key[2], "sk_lock-AF_INET6-RPC", &xs_key[2]);
 }
 
 static inline void xs_reclassify_socket(int family, struct socket *sock)
index acfba9f..6bc2879 100644 (file)
@@ -61,7 +61,7 @@ static DEFINE_MUTEX(tcpv6_prot_mutex);
 static const struct proto *saved_tcpv4_prot;
 static DEFINE_MUTEX(tcpv4_prot_mutex);
 static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
-static struct proto_ops tls_sw_proto_ops;
+static struct proto_ops tls_proto_ops[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG];
 static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
                         const struct proto *base);
 
@@ -71,6 +71,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx)
 
        WRITE_ONCE(sk->sk_prot,
                   &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]);
+       WRITE_ONCE(sk->sk_socket->ops,
+                  &tls_proto_ops[ip_ver][ctx->tx_conf][ctx->rx_conf]);
 }
 
 int wait_on_pending_writer(struct sock *sk, long *timeo)
@@ -669,8 +671,6 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
        if (tx) {
                ctx->sk_write_space = sk->sk_write_space;
                sk->sk_write_space = tls_write_space;
-       } else {
-               sk->sk_socket->ops = &tls_sw_proto_ops;
        }
        goto out;
 
@@ -728,6 +728,39 @@ struct tls_context *tls_ctx_create(struct sock *sk)
        return ctx;
 }
 
+static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
+                           const struct proto_ops *base)
+{
+       ops[TLS_BASE][TLS_BASE] = *base;
+
+       ops[TLS_SW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
+       ops[TLS_SW  ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked;
+
+       ops[TLS_BASE][TLS_SW  ] = ops[TLS_BASE][TLS_BASE];
+       ops[TLS_BASE][TLS_SW  ].splice_read     = tls_sw_splice_read;
+
+       ops[TLS_SW  ][TLS_SW  ] = ops[TLS_SW  ][TLS_BASE];
+       ops[TLS_SW  ][TLS_SW  ].splice_read     = tls_sw_splice_read;
+
+#ifdef CONFIG_TLS_DEVICE
+       ops[TLS_HW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
+       ops[TLS_HW  ][TLS_BASE].sendpage_locked = NULL;
+
+       ops[TLS_HW  ][TLS_SW  ] = ops[TLS_BASE][TLS_SW  ];
+       ops[TLS_HW  ][TLS_SW  ].sendpage_locked = NULL;
+
+       ops[TLS_BASE][TLS_HW  ] = ops[TLS_BASE][TLS_SW  ];
+
+       ops[TLS_SW  ][TLS_HW  ] = ops[TLS_SW  ][TLS_SW  ];
+
+       ops[TLS_HW  ][TLS_HW  ] = ops[TLS_HW  ][TLS_SW  ];
+       ops[TLS_HW  ][TLS_HW  ].sendpage_locked = NULL;
+#endif
+#ifdef CONFIG_TLS_TOE
+       ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base;
+#endif
+}
+
 static void tls_build_proto(struct sock *sk)
 {
        int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
@@ -739,6 +772,8 @@ static void tls_build_proto(struct sock *sk)
                mutex_lock(&tcpv6_prot_mutex);
                if (likely(prot != saved_tcpv6_prot)) {
                        build_protos(tls_prots[TLSV6], prot);
+                       build_proto_ops(tls_proto_ops[TLSV6],
+                                       sk->sk_socket->ops);
                        smp_store_release(&saved_tcpv6_prot, prot);
                }
                mutex_unlock(&tcpv6_prot_mutex);
@@ -749,6 +784,8 @@ static void tls_build_proto(struct sock *sk)
                mutex_lock(&tcpv4_prot_mutex);
                if (likely(prot != saved_tcpv4_prot)) {
                        build_protos(tls_prots[TLSV4], prot);
+                       build_proto_ops(tls_proto_ops[TLSV4],
+                                       sk->sk_socket->ops);
                        smp_store_release(&saved_tcpv4_prot, prot);
                }
                mutex_unlock(&tcpv4_prot_mutex);
@@ -959,10 +996,6 @@ static int __init tls_register(void)
        if (err)
                return err;
 
-       tls_sw_proto_ops = inet_stream_ops;
-       tls_sw_proto_ops.splice_read = tls_sw_splice_read;
-       tls_sw_proto_ops.sendpage_locked   = tls_sw_sendpage_locked;
-
        tls_device_init();
        tcp_register_ulp(&tcp_tls_ulp_ops);
 
index d815640..dfe623a 100644 (file)
@@ -521,7 +521,7 @@ static int tls_do_encryption(struct sock *sk,
        memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv,
               prot->iv_size + prot->salt_size);
 
-       xor_iv_with_seq(prot, rec->iv_data, tls_ctx->tx.rec_seq);
+       xor_iv_with_seq(prot, rec->iv_data + iv_offset, tls_ctx->tx.rec_seq);
 
        sge->offset += prot->prepend_size;
        sge->length -= prot->prepend_size;
@@ -1499,7 +1499,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
        else
                memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size);
 
-       xor_iv_with_seq(prot, iv, tls_ctx->rx.rec_seq);
+       xor_iv_with_seq(prot, iv + iv_offset, tls_ctx->rx.rec_seq);
 
        /* Prepare AAD */
        tls_make_aad(aad, rxm->full_len - prot->overhead_size +
@@ -2005,6 +2005,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
        struct sock *sk = sock->sk;
        struct sk_buff *skb;
        ssize_t copied = 0;
+       bool from_queue;
        int err = 0;
        long timeo;
        int chunk;
@@ -2014,25 +2015,28 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 
        timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
 
-       skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err);
-       if (!skb)
-               goto splice_read_end;
-
-       if (!ctx->decrypted) {
-               err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
-
-               /* splice does not support reading control messages */
-               if (ctx->control != TLS_RECORD_TYPE_DATA) {
-                       err = -EINVAL;
+       from_queue = !skb_queue_empty(&ctx->rx_list);
+       if (from_queue) {
+               skb = __skb_dequeue(&ctx->rx_list);
+       } else {
+               skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo,
+                                   &err);
+               if (!skb)
                        goto splice_read_end;
-               }
 
+               err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
                if (err < 0) {
                        tls_err_abort(sk, -EBADMSG);
                        goto splice_read_end;
                }
-               ctx->decrypted = 1;
        }
+
+       /* splice does not support reading control messages */
+       if (ctx->control != TLS_RECORD_TYPE_DATA) {
+               err = -EINVAL;
+               goto splice_read_end;
+       }
+
        rxm = strp_msg(skb);
 
        chunk = min_t(unsigned int, rxm->full_len, len);
@@ -2040,7 +2044,17 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
        if (copied < 0)
                goto splice_read_end;
 
-       tls_sw_advance_skb(sk, skb, copied);
+       if (!from_queue) {
+               ctx->recv_pkt = NULL;
+               __strp_unpause(&ctx->strp);
+       }
+       if (chunk < rxm->full_len) {
+               __skb_queue_head(&ctx->rx_list, skb);
+               rxm->offset += len;
+               rxm->full_len -= len;
+       } else {
+               consume_skb(skb);
+       }
 
 splice_read_end:
        release_sock(sk);
index 78e08e8..b0bfc78 100644 (file)
@@ -2882,9 +2882,6 @@ static int unix_shutdown(struct socket *sock, int mode)
 
        unix_state_lock(sk);
        sk->sk_shutdown |= mode;
-       if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
-           mode == SHUTDOWN_MASK)
-               sk->sk_state = TCP_CLOSE;
        other = unix_peer(sk);
        if (other)
                sock_hold(other);
index cc36256..c0d3bcb 100644 (file)
@@ -259,5 +259,8 @@ int main(void)
        DEVID_FIELD(dfl_device_id, type);
        DEVID_FIELD(dfl_device_id, feature_id);
 
+       DEVID(ishtp_device_id);
+       DEVID_FIELD(ishtp_device_id, guid);
+
        return 0;
 }
index 49aba86..5258247 100644 (file)
@@ -115,6 +115,17 @@ static inline void add_uuid(char *str, uuid_le uuid)
                uuid.b[12], uuid.b[13], uuid.b[14], uuid.b[15]);
 }
 
+static inline void add_guid(char *str, guid_t guid)
+{
+       int len = strlen(str);
+
+       sprintf(str + len, "%02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
+               guid.b[3], guid.b[2], guid.b[1], guid.b[0],
+               guid.b[5], guid.b[4], guid.b[7], guid.b[6],
+               guid.b[8], guid.b[9], guid.b[10], guid.b[11],
+               guid.b[12], guid.b[13], guid.b[14], guid.b[15]);
+}
+
 /**
  * Check that sizeof(device_id type) are consistent with size of section
  * in .o file. If in-consistent then userspace and kernel does not agree
@@ -1380,6 +1391,18 @@ static int do_mhi_entry(const char *filename, void *symval, char *alias)
        return 1;
 }
 
+/* Looks like: ishtp:{guid} */
+static int do_ishtp_entry(const char *filename, void *symval, char *alias)
+{
+       DEF_FIELD(symval, ishtp_device_id, guid);
+
+       strcpy(alias, ISHTP_MODULE_PREFIX "{");
+       add_guid(alias, guid);
+       strcat(alias, "}");
+
+       return 1;
+}
+
 static int do_auxiliary_entry(const char *filename, void *symval, char *alias)
 {
        DEF_FIELD_ADDR(symval, auxiliary_device_id, name);
@@ -1499,6 +1522,7 @@ static const struct devtable devtable[] = {
        {"auxiliary", SIZE_auxiliary_device_id, do_auxiliary_entry},
        {"ssam", SIZE_ssam_device_id, do_ssam_entry},
        {"dfl", SIZE_dfl_device_id, do_dfl_entry},
+       {"ishtp", SIZE_ishtp_device_id, do_ishtp_entry},
 };
 
 /* Create MODULE_ALIAS() statements.
index 727c3b4..0ae4e4e 100644 (file)
@@ -31,13 +31,20 @@ static u32 hashtab_compute_size(u32 nel)
 
 int hashtab_init(struct hashtab *h, u32 nel_hint)
 {
-       h->size = hashtab_compute_size(nel_hint);
+       u32 size = hashtab_compute_size(nel_hint);
+
+       /* should already be zeroed, but better be safe */
        h->nel = 0;
-       if (!h->size)
-               return 0;
+       h->size = 0;
+       h->htable = NULL;
 
-       h->htable = kcalloc(h->size, sizeof(*h->htable), GFP_KERNEL);
-       return h->htable ? 0 : -ENOMEM;
+       if (size) {
+               h->htable = kcalloc(size, sizeof(*h->htable), GFP_KERNEL);
+               if (!h->htable)
+                       return -ENOMEM;
+               h->size = size;
+       }
+       return 0;
 }
 
 int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst,
index b9ac9e9..4208fa8 100644 (file)
@@ -252,6 +252,11 @@ static const struct config_entry config_table[] = {
                .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
                .device = 0x02c8,
        },
+       {
+               .flags = FLAG_SOF,
+               .device = 0x02c8,
+               .codec_hid = "ESSX8336",
+       },
 /* Cometlake-H */
        {
                .flags = FLAG_SOF,
@@ -276,6 +281,11 @@ static const struct config_entry config_table[] = {
                .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
                .device = 0x06c8,
        },
+               {
+               .flags = FLAG_SOF,
+               .device = 0x06c8,
+               .codec_hid = "ESSX8336",
+       },
 #endif
 
 /* Icelake */
@@ -299,6 +309,15 @@ static const struct config_entry config_table[] = {
        },
 #endif
 
+/* JasperLake */
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE)
+       {
+               .flags = FLAG_SOF,
+               .device = 0x4dc8,
+               .codec_hid = "ESSX8336",
+       },
+#endif
+
 /* Tigerlake */
 #if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE)
        {
index ea20236..9a678b5 100644 (file)
@@ -3218,7 +3218,6 @@ static int snd_cmipci_probe(struct pci_dev *pci,
 {
        static int dev;
        struct snd_card *card;
-       struct cmipci *cm;
        int err;
 
        if (dev >= SNDRV_CARDS)
@@ -3229,10 +3228,9 @@ static int snd_cmipci_probe(struct pci_dev *pci,
        }
 
        err = snd_devm_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
-                               sizeof(*cm), &card);
+                               sizeof(struct cmipci), &card);
        if (err < 0)
                return err;
-       cm = card->private_data;
        
        switch (pci->device) {
        case PCI_DEVICE_ID_CMEDIA_CM8738:
index da6e635..d074727 100644 (file)
 
 #define BLANK_SLOT             4094
 
-static int amixer_master(struct rsc *rsc)
+static void amixer_master(struct rsc *rsc)
 {
        rsc->conj = 0;
-       return rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0];
+       rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0];
 }
 
-static int amixer_next_conj(struct rsc *rsc)
+static void amixer_next_conj(struct rsc *rsc)
 {
        rsc->conj++;
-       return container_of(rsc, struct amixer, rsc)->idx[rsc->conj];
 }
 
 static int amixer_index(const struct rsc *rsc)
@@ -331,16 +330,15 @@ int amixer_mgr_destroy(struct amixer_mgr *amixer_mgr)
 
 /* SUM resource management */
 
-static int sum_master(struct rsc *rsc)
+static void sum_master(struct rsc *rsc)
 {
        rsc->conj = 0;
-       return rsc->idx = container_of(rsc, struct sum, rsc)->idx[0];
+       rsc->idx = container_of(rsc, struct sum, rsc)->idx[0];
 }
 
-static int sum_next_conj(struct rsc *rsc)
+static void sum_next_conj(struct rsc *rsc)
 {
        rsc->conj++;
-       return container_of(rsc, struct sum, rsc)->idx[rsc->conj];
 }
 
 static int sum_index(const struct rsc *rsc)
index f589da0..7fc7200 100644 (file)
@@ -51,12 +51,12 @@ static const struct daio_rsc_idx idx_20k2[NUM_DAIOTYP] = {
        [SPDIFIO] = {.left = 0x05, .right = 0x85},
 };
 
-static int daio_master(struct rsc *rsc)
+static void daio_master(struct rsc *rsc)
 {
        /* Actually, this is not the resource index of DAIO.
         * For DAO, it is the input mapper index. And, for DAI,
         * it is the output time-slot index. */
-       return rsc->conj = rsc->idx;
+       rsc->conj = rsc->idx;
 }
 
 static int daio_index(const struct rsc *rsc)
@@ -64,19 +64,19 @@ static int daio_index(const struct rsc *rsc)
        return rsc->conj;
 }
 
-static int daio_out_next_conj(struct rsc *rsc)
+static void daio_out_next_conj(struct rsc *rsc)
 {
-       return rsc->conj += 2;
+       rsc->conj += 2;
 }
 
-static int daio_in_next_conj_20k1(struct rsc *rsc)
+static void daio_in_next_conj_20k1(struct rsc *rsc)
 {
-       return rsc->conj += 0x200;
+       rsc->conj += 0x200;
 }
 
-static int daio_in_next_conj_20k2(struct rsc *rsc)
+static void daio_in_next_conj_20k2(struct rsc *rsc)
 {
-       return rsc->conj += 0x100;
+       rsc->conj += 0x100;
 }
 
 static const struct rsc_ops daio_out_rsc_ops = {
index 81ad269..be1d3e6 100644 (file)
@@ -109,18 +109,17 @@ static int audio_ring_slot(const struct rsc *rsc)
     return (rsc->conj << 4) + offset_in_audio_slot_block[rsc->type];
 }
 
-static int rsc_next_conj(struct rsc *rsc)
+static void rsc_next_conj(struct rsc *rsc)
 {
        unsigned int i;
        for (i = 0; (i < 8) && (!(rsc->msr & (0x1 << i))); )
                i++;
        rsc->conj += (AUDIO_SLOT_BLOCK_NUM >> i);
-       return rsc->conj;
 }
 
-static int rsc_master(struct rsc *rsc)
+static void rsc_master(struct rsc *rsc)
 {
-       return rsc->conj = rsc->idx;
+       rsc->conj = rsc->idx;
 }
 
 static const struct rsc_ops rsc_generic_ops = {
index fdbfd80..58553bd 100644 (file)
@@ -39,8 +39,8 @@ struct rsc {
 };
 
 struct rsc_ops {
-       int (*master)(struct rsc *rsc); /* Move to master resource */
-       int (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */
+       void (*master)(struct rsc *rsc); /* Move to master resource */
+       void (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */
        int (*index)(const struct rsc *rsc); /* Return the index of resource */
        /* Return the output slot number */
        int (*output_slot)(const struct rsc *rsc);
index bd4697b..4a94b47 100644 (file)
@@ -590,16 +590,15 @@ int src_mgr_destroy(struct src_mgr *src_mgr)
 
 /* SRCIMP resource manager operations */
 
-static int srcimp_master(struct rsc *rsc)
+static void srcimp_master(struct rsc *rsc)
 {
        rsc->conj = 0;
-       return rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0];
+       rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0];
 }
 
-static int srcimp_next_conj(struct rsc *rsc)
+static void srcimp_next_conj(struct rsc *rsc)
 {
        rsc->conj++;
-       return container_of(rsc, struct srcimp, rsc)->idx[rsc->conj];
 }
 
 static int srcimp_index(const struct rsc *rsc)
index fe51163..1b46b59 100644 (file)
@@ -335,7 +335,10 @@ enum {
                                        ((pci)->device == 0x0c0c) || \
                                        ((pci)->device == 0x0d0c) || \
                                        ((pci)->device == 0x160c) || \
-                                       ((pci)->device == 0x490d))
+                                       ((pci)->device == 0x490d) || \
+                                       ((pci)->device == 0x4f90) || \
+                                       ((pci)->device == 0x4f91) || \
+                                       ((pci)->device == 0x4f92))
 
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
 
@@ -2473,6 +2476,13 @@ static const struct pci_device_id azx_ids[] = {
        /* DG1 */
        { PCI_DEVICE(0x8086, 0x490d),
          .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+       /* DG2 */
+       { PCI_DEVICE(0x8086, 0x4f90),
+         .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+       { PCI_DEVICE(0x8086, 0x4f91),
+         .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+       { PCI_DEVICE(0x8086, 0x4f92),
+         .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
        /* Alderlake-S */
        { PCI_DEVICE(0x8086, 0x7ad0),
          .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
index ea8ab8b..d22c96e 100644 (file)
@@ -438,6 +438,15 @@ int snd_hda_codec_set_pin_target(struct hda_codec *codec, hda_nid_t nid,
 #define for_each_hda_codec_node(nid, codec) \
        for ((nid) = (codec)->core.start_nid; (nid) < (codec)->core.end_nid; (nid)++)
 
+/* Set the codec power_state flag to indicate to allow unsol event handling;
+ * see hda_codec_unsol_event() in hda_bind.c.  Calling this might confuse the
+ * state tracking, so use with care.
+ */
+static inline void snd_hda_codec_allow_unsol_events(struct hda_codec *codec)
+{
+       codec->core.dev.power.power_state = PMSG_ON;
+}
+
 /*
  * get widget capabilities
  */
index 31ff11a..039b9f2 100644 (file)
@@ -750,6 +750,11 @@ static void cs42l42_resume(struct sub_codec *cs42l42)
        if (cs42l42->full_scale_vol)
                cs8409_i2c_write(cs42l42, 0x2001, 0x01);
 
+       /* we have to explicitly allow unsol event handling even during the
+        * resume phase so that the jack event is processed properly
+        */
+       snd_hda_codec_allow_unsol_events(cs42l42->codec);
+
        cs42l42_enable_jack_detect(cs42l42);
 }
 
index 65d2c55..415701b 100644 (file)
@@ -4380,10 +4380,11 @@ HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI",     patch_i915_icl_hdmi),
 HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI",  patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI",        patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI",  patch_i915_tgl_hdmi),
-HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI", patch_i915_tgl_hdmi),
+HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI",        patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi),
 HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI",        patch_i915_icl_hdmi),
+HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
 HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI",        patch_i915_byt_hdmi),
 HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI",   patch_i915_byt_hdmi),
index 2f1727f..9ce7457 100644 (file)
@@ -6521,6 +6521,27 @@ static void alc256_fixup_tongfang_reset_persistent_settings(struct hda_codec *co
        alc_write_coef_idx(codec, 0x45, 0x5089);
 }
 
+static const struct coef_fw alc233_fixup_no_audio_jack_coefs[] = {
+       WRITE_COEF(0x1a, 0x9003), WRITE_COEF(0x1b, 0x0e2b), WRITE_COEF(0x37, 0xfe06),
+       WRITE_COEF(0x38, 0x4981), WRITE_COEF(0x45, 0xd489), WRITE_COEF(0x46, 0x0074),
+       WRITE_COEF(0x49, 0x0149),
+       {}
+};
+
+static void alc233_fixup_no_audio_jack(struct hda_codec *codec,
+                                      const struct hda_fixup *fix,
+                                      int action)
+{
+       /*
+        * The audio jack input and output is not detected on the ASRock NUC Box
+        * 1100 series when cold booting without this fix. Warm rebooting from a
+        * certain other OS makes the audio functional, as COEF settings are
+        * preserved in this case. This fix sets these altered COEF values as
+        * the default.
+        */
+       alc_process_coef_fw(codec, alc233_fixup_no_audio_jack_coefs);
+}
+
 enum {
        ALC269_FIXUP_GPIO2,
        ALC269_FIXUP_SONY_VAIO,
@@ -6740,6 +6761,7 @@ enum {
        ALC287_FIXUP_13S_GEN2_SPEAKERS,
        ALC256_FIXUP_TONGFANG_RESET_PERSISTENT_SETTINGS,
        ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE,
+       ALC233_FIXUP_NO_AUDIO_JACK,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8460,6 +8482,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC,
        },
+       [ALC233_FIXUP_NO_AUDIO_JACK] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc233_fixup_no_audio_jack,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8639,6 +8665,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8728, "HP EliteBook 840 G7", ALC285_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+       SND_PCI_QUIRK(0x103c, 0x8735, "HP ProBook 435 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
        SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT),
        SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED),
@@ -8894,6 +8921,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
+       SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK),
        SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS),
        SND_PCI_QUIRK(0x1b35, 0x1235, "CZC B20", ALC269_FIXUP_CZC_B20),
        SND_PCI_QUIRK(0x1b35, 0x1236, "CZC TMI", ALC269_FIXUP_CZC_TMI),
index 90a921f..3fa9974 100644 (file)
@@ -42,34 +42,6 @@ static const struct spi_device_id cs35l41_id_spi[] = {
 
 MODULE_DEVICE_TABLE(spi, cs35l41_id_spi);
 
-static void cs35l41_spi_otp_setup(struct cs35l41_private *cs35l41,
-                                 bool is_pre_setup, unsigned int *freq)
-{
-       struct spi_device *spi;
-       u32 orig_spi_freq;
-
-       spi = to_spi_device(cs35l41->dev);
-
-       if (!spi) {
-               dev_err(cs35l41->dev, "%s: No SPI device\n", __func__);
-               return;
-       }
-
-       if (is_pre_setup) {
-               orig_spi_freq = spi->max_speed_hz;
-               if (orig_spi_freq > CS35L41_SPI_MAX_FREQ_OTP) {
-                       spi->max_speed_hz = CS35L41_SPI_MAX_FREQ_OTP;
-                       spi_setup(spi);
-               }
-               *freq = orig_spi_freq;
-       } else {
-               if (spi->max_speed_hz != *freq) {
-                       spi->max_speed_hz = *freq;
-                       spi_setup(spi);
-               }
-       }
-}
-
 static int cs35l41_spi_probe(struct spi_device *spi)
 {
        const struct regmap_config *regmap_config = &cs35l41_regmap_spi;
@@ -81,6 +53,9 @@ static int cs35l41_spi_probe(struct spi_device *spi)
        if (!cs35l41)
                return -ENOMEM;
 
+       spi->max_speed_hz = CS35L41_SPI_MAX_FREQ;
+       spi_setup(spi);
+
        spi_set_drvdata(spi, cs35l41);
        cs35l41->regmap = devm_regmap_init_spi(spi, regmap_config);
        if (IS_ERR(cs35l41->regmap)) {
@@ -91,7 +66,6 @@ static int cs35l41_spi_probe(struct spi_device *spi)
 
        cs35l41->dev = &spi->dev;
        cs35l41->irq = spi->irq;
-       cs35l41->otp_setup = cs35l41_spi_otp_setup;
 
        return cs35l41_probe(cs35l41, pdata);
 }
index 94ed21d..9c4d481 100644 (file)
@@ -302,7 +302,6 @@ static int cs35l41_otp_unpack(void *data)
        const struct cs35l41_otp_packed_element_t *otp_map;
        struct cs35l41_private *cs35l41 = data;
        int bit_offset, word_offset, ret, i;
-       unsigned int orig_spi_freq;
        unsigned int bit_sum = 8;
        u32 otp_val, otp_id_reg;
        u32 *otp_mem;
@@ -326,9 +325,6 @@ static int cs35l41_otp_unpack(void *data)
                goto err_otp_unpack;
        }
 
-       if (cs35l41->otp_setup)
-               cs35l41->otp_setup(cs35l41, true, &orig_spi_freq);
-
        ret = regmap_bulk_read(cs35l41->regmap, CS35L41_OTP_MEM0, otp_mem,
                               CS35L41_OTP_SIZE_WORDS);
        if (ret < 0) {
@@ -336,9 +332,6 @@ static int cs35l41_otp_unpack(void *data)
                goto err_otp_unpack;
        }
 
-       if (cs35l41->otp_setup)
-               cs35l41->otp_setup(cs35l41, false, &orig_spi_freq);
-
        otp_map = otp_map_match->map;
 
        bit_offset = otp_map_match->bit_offset;
@@ -612,6 +605,12 @@ static const struct snd_soc_dapm_widget cs35l41_dapm_widgets[] = {
        SND_SOC_DAPM_AIF_OUT("ASPTX3", NULL, 0, CS35L41_SP_ENABLES, 2, 0),
        SND_SOC_DAPM_AIF_OUT("ASPTX4", NULL, 0, CS35L41_SP_ENABLES, 3, 0),
 
+       SND_SOC_DAPM_SIGGEN("VSENSE"),
+       SND_SOC_DAPM_SIGGEN("ISENSE"),
+       SND_SOC_DAPM_SIGGEN("VP"),
+       SND_SOC_DAPM_SIGGEN("VBST"),
+       SND_SOC_DAPM_SIGGEN("TEMP"),
+
        SND_SOC_DAPM_ADC("VMON ADC", NULL, CS35L41_PWR_CTRL2, 12, 0),
        SND_SOC_DAPM_ADC("IMON ADC", NULL, CS35L41_PWR_CTRL2, 13, 0),
        SND_SOC_DAPM_ADC("VPMON ADC", NULL, CS35L41_PWR_CTRL2, 8, 0),
@@ -623,12 +622,6 @@ static const struct snd_soc_dapm_widget cs35l41_dapm_widgets[] = {
                               cs35l41_main_amp_event,
                               SND_SOC_DAPM_POST_PMD |  SND_SOC_DAPM_POST_PMU),
 
-       SND_SOC_DAPM_INPUT("VP"),
-       SND_SOC_DAPM_INPUT("VBST"),
-       SND_SOC_DAPM_INPUT("ISENSE"),
-       SND_SOC_DAPM_INPUT("VSENSE"),
-       SND_SOC_DAPM_INPUT("TEMP"),
-
        SND_SOC_DAPM_MUX("ASP TX1 Source", SND_SOC_NOPM, 0, 0, &asp_tx1_mux),
        SND_SOC_DAPM_MUX("ASP TX2 Source", SND_SOC_NOPM, 0, 0, &asp_tx2_mux),
        SND_SOC_DAPM_MUX("ASP TX3 Source", SND_SOC_NOPM, 0, 0, &asp_tx3_mux),
@@ -674,8 +667,8 @@ static const struct snd_soc_dapm_route cs35l41_audio_map[] = {
        {"VMON ADC", NULL, "VSENSE"},
        {"IMON ADC", NULL, "ISENSE"},
        {"VPMON ADC", NULL, "VP"},
-       {"TEMPMON ADC", NULL, "TEMP"},
        {"VBSTMON ADC", NULL, "VBST"},
+       {"TEMPMON ADC", NULL, "TEMP"},
 
        {"ASPRX1", NULL, "AMP Playback"},
        {"ASPRX2", NULL, "AMP Playback"},
index 6cffe8a..48485b0 100644 (file)
 #define CS35L41_FS2_WINDOW_MASK                0x00FFF800
 #define CS35L41_FS2_WINDOW_SHIFT       12
 
-#define CS35L41_SPI_MAX_FREQ_OTP       4000000
+#define CS35L41_SPI_MAX_FREQ           4000000
 
 #define CS35L41_RX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE)
 #define CS35L41_TX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE)
@@ -764,8 +764,6 @@ struct cs35l41_private {
        int irq;
        /* GPIO for /RST */
        struct gpio_desc *reset_gpio;
-       void (*otp_setup)(struct cs35l41_private *cs35l41, bool is_pre_setup,
-                         unsigned int *freq);
 };
 
 int cs35l41_probe(struct cs35l41_private *cs35l41,
index 2bed5cf..aec5127 100644 (file)
@@ -2188,7 +2188,7 @@ static int rx_macro_config_classh(struct snd_soc_component *component,
                snd_soc_component_update_bits(component,
                                CDC_RX_CLSH_DECAY_CTRL,
                                CDC_RX_CLSH_DECAY_RATE_MASK, 0x0);
-               snd_soc_component_update_bits(component,
+               snd_soc_component_write_field(component,
                                CDC_RX_RX1_RX_PATH_CFG0,
                                CDC_RX_RXn_CLSH_EN_MASK, 0x1);
                break;
index 943d7d9..03f24ed 100644 (file)
@@ -539,3 +539,4 @@ module_platform_driver(rk817_codec_driver);
 MODULE_DESCRIPTION("ASoC RK817 codec driver");
 MODULE_AUTHOR("binyuan <kevan.lan@rock-chips.com>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:rk817-codec");
index 297af7f..b62301a 100644 (file)
@@ -1311,13 +1311,54 @@ static int rt1011_r0_load_info(struct snd_kcontrol *kcontrol,
        .put = rt1011_r0_load_mode_put \
 }
 
-static const char * const rt1011_i2s_ref_texts[] = {
-       "Left Channel", "Right Channel"
+static const char * const rt1011_i2s_ref[] = {
+       "None", "Left Channel", "Right Channel"
 };
 
-static SOC_ENUM_SINGLE_DECL(rt1011_i2s_ref_enum,
-                           RT1011_TDM1_SET_1, 7,
-                           rt1011_i2s_ref_texts);
+static SOC_ENUM_SINGLE_DECL(rt1011_i2s_ref_enum, 0, 0,
+       rt1011_i2s_ref);
+
+static int rt1011_i2s_ref_put(struct snd_kcontrol *kcontrol,
+               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component =
+               snd_soc_kcontrol_component(kcontrol);
+       struct rt1011_priv *rt1011 =
+               snd_soc_component_get_drvdata(component);
+
+       rt1011->i2s_ref = ucontrol->value.enumerated.item[0];
+       switch (rt1011->i2s_ref) {
+       case RT1011_I2S_REF_LEFT_CH:
+               regmap_write(rt1011->regmap, RT1011_TDM_TOTAL_SET, 0x0240);
+               regmap_write(rt1011->regmap, RT1011_TDM1_SET_2, 0x8);
+               regmap_write(rt1011->regmap, RT1011_TDM1_SET_1, 0x1022);
+               regmap_write(rt1011->regmap, RT1011_ADCDAT_OUT_SOURCE, 0x4);
+               break;
+       case RT1011_I2S_REF_RIGHT_CH:
+               regmap_write(rt1011->regmap, RT1011_TDM_TOTAL_SET, 0x0240);
+               regmap_write(rt1011->regmap, RT1011_TDM1_SET_2, 0x8);
+               regmap_write(rt1011->regmap, RT1011_TDM1_SET_1, 0x10a2);
+               regmap_write(rt1011->regmap, RT1011_ADCDAT_OUT_SOURCE, 0x4);
+               break;
+       default:
+               dev_info(component->dev, "I2S Reference: Do nothing\n");
+       }
+
+       return 0;
+}
+
+static int rt1011_i2s_ref_get(struct snd_kcontrol *kcontrol,
+               struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component =
+               snd_soc_kcontrol_component(kcontrol);
+       struct rt1011_priv *rt1011 =
+               snd_soc_component_get_drvdata(component);
+
+       ucontrol->value.enumerated.item[0] = rt1011->i2s_ref;
+
+       return 0;
+}
 
 static const struct snd_kcontrol_new rt1011_snd_controls[] = {
        /* I2S Data In Selection */
@@ -1358,7 +1399,8 @@ static const struct snd_kcontrol_new rt1011_snd_controls[] = {
        SOC_SINGLE("R0 Temperature", RT1011_STP_INITIAL_RESISTANCE_TEMP,
                2, 255, 0),
        /* I2S Reference */
-       SOC_ENUM("I2S Reference", rt1011_i2s_ref_enum),
+       SOC_ENUM_EXT("I2S Reference", rt1011_i2s_ref_enum,
+               rt1011_i2s_ref_get, rt1011_i2s_ref_put),
 };
 
 static int rt1011_is_sys_clk_from_pll(struct snd_soc_dapm_widget *source,
@@ -2017,6 +2059,7 @@ static int rt1011_probe(struct snd_soc_component *component)
 
        schedule_work(&rt1011->cali_work);
 
+       rt1011->i2s_ref = 0;
        rt1011->bq_drc_params = devm_kcalloc(component->dev,
                RT1011_ADVMODE_NUM, sizeof(struct rt1011_bq_drc_params *),
                GFP_KERNEL);
index 68fadc1..4d6e749 100644 (file)
@@ -654,6 +654,12 @@ enum {
        RT1011_AIFS
 };
 
+enum {
+       RT1011_I2S_REF_NONE,
+       RT1011_I2S_REF_LEFT_CH,
+       RT1011_I2S_REF_RIGHT_CH,
+};
+
 /* BiQual & DRC related settings */
 #define RT1011_BQ_DRC_NUM 128
 struct rt1011_bq_drc_params {
@@ -692,6 +698,7 @@ struct rt1011_priv {
        unsigned int r0_reg, cali_done;
        unsigned int r0_calib, temperature_calib;
        int recv_spk_mode;
+       int i2s_ref;
 };
 
 #endif         /* end of _RT1011_H_ */
index 983347b..20e0f90 100644 (file)
@@ -198,6 +198,7 @@ static int rt5682_i2c_probe(struct i2c_client *i2c,
        }
 
        mutex_init(&rt5682->calibrate_mutex);
+       mutex_init(&rt5682->jdet_mutex);
        rt5682_calibrate(rt5682);
 
        rt5682_apply_patch_list(rt5682, &i2c->dev);
index 78b4cb5..04cb747 100644 (file)
@@ -48,6 +48,8 @@ static const struct reg_sequence patch_list[] = {
        {RT5682_SAR_IL_CMD_6, 0x0110},
        {RT5682_CHARGE_PUMP_1, 0x0210},
        {RT5682_HP_LOGIC_CTRL_2, 0x0007},
+       {RT5682_SAR_IL_CMD_2, 0xac00},
+       {RT5682_CBJ_CTRL_7, 0x0104},
 };
 
 void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev)
@@ -940,6 +942,10 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
                snd_soc_component_update_bits(component,
                        RT5682_HP_CHARGE_PUMP_1,
                        RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0);
+               rt5682_enable_push_button_irq(component, false);
+               snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
+                       RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
+               usleep_range(55000, 60000);
                snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
                        RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_HIGH);
 
@@ -1092,6 +1098,7 @@ void rt5682_jack_detect_handler(struct work_struct *work)
        while (!rt5682->component->card->instantiated)
                usleep_range(10000, 15000);
 
+       mutex_lock(&rt5682->jdet_mutex);
        mutex_lock(&rt5682->calibrate_mutex);
 
        val = snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL)
@@ -1165,6 +1172,7 @@ void rt5682_jack_detect_handler(struct work_struct *work)
        }
 
        mutex_unlock(&rt5682->calibrate_mutex);
+       mutex_unlock(&rt5682->jdet_mutex);
 }
 EXPORT_SYMBOL_GPL(rt5682_jack_detect_handler);
 
@@ -1514,6 +1522,7 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w,
 {
        struct snd_soc_component *component =
                snd_soc_dapm_to_component(w->dapm);
+       struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
 
        switch (event) {
        case SND_SOC_DAPM_PRE_PMU:
@@ -1525,12 +1534,17 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w,
                        RT5682_DEPOP_1, 0x60, 0x60);
                snd_soc_component_update_bits(component,
                        RT5682_DAC_ADC_DIG_VOL1, 0x00c0, 0x0080);
+
+               mutex_lock(&rt5682->jdet_mutex);
+
                snd_soc_component_update_bits(component, RT5682_HP_CTRL_2,
                        RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN,
                        RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN);
                usleep_range(5000, 10000);
                snd_soc_component_update_bits(component, RT5682_CHARGE_PUMP_1,
                        RT5682_CP_SW_SIZE_MASK, RT5682_CP_SW_SIZE_L);
+
+               mutex_unlock(&rt5682->jdet_mutex);
                break;
 
        case SND_SOC_DAPM_POST_PMD:
@@ -2942,10 +2956,7 @@ static int rt5682_suspend(struct snd_soc_component *component)
 
        cancel_delayed_work_sync(&rt5682->jack_detect_work);
        cancel_delayed_work_sync(&rt5682->jd_check_work);
-       if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) {
-               snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
-                       RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK,
-                       RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG);
+       if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) {
                val = snd_soc_component_read(component,
                                RT5682_CBJ_CTRL_2) & RT5682_JACK_TYPE_MASK;
 
@@ -2967,10 +2978,17 @@ static int rt5682_suspend(struct snd_soc_component *component)
                /* enter SAR ADC power saving mode */
                snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
                        RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK |
-                       RT5682_SAR_BUTDET_RST_MASK | RT5682_SAR_SEL_MB1_MB2_MASK, 0);
+                       RT5682_SAR_SEL_MB1_MB2_MASK, 0);
+               usleep_range(5000, 6000);
+               snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
+                       RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK,
+                       RT5682_CTRL_MB1_REG | RT5682_CTRL_MB2_REG);
+               usleep_range(10000, 12000);
                snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
-                       RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_BUTDET_RST_MASK,
-                       RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV | RT5682_SAR_BUTDET_RST_NORMAL);
+                       RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK,
+                       RT5682_SAR_BUTT_DET_EN | RT5682_SAR_BUTDET_POW_SAV);
+               snd_soc_component_update_bits(component, RT5682_HP_CHARGE_PUMP_1,
+                       RT5682_OSW_L_MASK | RT5682_OSW_R_MASK, 0);
        }
 
        regcache_cache_only(rt5682->regmap, true);
@@ -2988,10 +3006,11 @@ static int rt5682_resume(struct snd_soc_component *component)
        regcache_cache_only(rt5682->regmap, false);
        regcache_sync(rt5682->regmap);
 
-       if (rt5682->hs_jack && rt5682->jack_type == SND_JACK_HEADSET) {
+       if (rt5682->hs_jack && (rt5682->jack_type & SND_JACK_HEADSET) == SND_JACK_HEADSET) {
                snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
                        RT5682_SAR_BUTDET_MODE_MASK | RT5682_SAR_SEL_MB1_MB2_MASK,
                        RT5682_SAR_BUTDET_POW_NORM | RT5682_SAR_SEL_MB1_MB2_AUTO);
+               usleep_range(5000, 6000);
                snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
                        RT5682_MB1_PATH_MASK | RT5682_MB2_PATH_MASK,
                        RT5682_CTRL_MB1_FSM | RT5682_CTRL_MB2_FSM);
@@ -2999,8 +3018,9 @@ static int rt5682_resume(struct snd_soc_component *component)
                        RT5682_PWR_CBJ, RT5682_PWR_CBJ);
        }
 
+       rt5682->jack_type = 0;
        mod_delayed_work(system_power_efficient_wq,
-               &rt5682->jack_detect_work, msecs_to_jiffies(250));
+               &rt5682->jack_detect_work, msecs_to_jiffies(0));
 
        return 0;
 }
index d93829c..c917c76 100644 (file)
@@ -1463,6 +1463,7 @@ struct rt5682_priv {
 
        int jack_type;
        int irq_work_delay_time;
+       struct mutex jdet_mutex;
 };
 
 extern const char *rt5682_supply_names[RT5682_NUM_SUPPLIES];
index f957498..7aa1772 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <linux/bits.h>
+#include <linux/bitfield.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #define RT9120_REG_ERRRPT      0x10
 #define RT9120_REG_MSVOL       0x20
 #define RT9120_REG_SWRESET     0x40
+#define RT9120_REG_INTERCFG    0x63
 #define RT9120_REG_INTERNAL0   0x65
 #define RT9120_REG_INTERNAL1   0x69
 #define RT9120_REG_UVPOPT      0x6C
+#define RT9120_REG_DIGCFG      0xF8
 
 #define RT9120_VID_MASK                GENMASK(15, 8)
 #define RT9120_SWRST_MASK      BIT(7)
 #define RT9120_CFG_WORDLEN_24  24
 #define RT9120_CFG_WORDLEN_32  32
 #define RT9120_DVDD_UVSEL_MASK GENMASK(5, 4)
+#define RT9120_AUTOSYNC_MASK   BIT(6)
 
-#define RT9120_VENDOR_ID       0x4200
+#define RT9120_VENDOR_ID       0x42
+#define RT9120S_VENDOR_ID      0x43
 #define RT9120_RESET_WAITMS    20
 #define RT9120_CHIPON_WAITMS   20
 #define RT9120_AMPON_WAITMS    50
                                 SNDRV_PCM_FMTBIT_S24_LE |\
                                 SNDRV_PCM_FMTBIT_S32_LE)
 
+enum {
+       CHIP_IDX_RT9120 = 0,
+       CHIP_IDX_RT9120S,
+       CHIP_IDX_MAX
+};
+
 struct rt9120_data {
        struct device *dev;
        struct regmap *regmap;
+       int chip_idx;
 };
 
 /* 11bit [min,max,step] = [-103.9375dB, 24dB, 0.0625dB] */
@@ -149,8 +161,12 @@ static int rt9120_codec_probe(struct snd_soc_component *comp)
        snd_soc_component_init_regmap(comp, data->regmap);
 
        /* Internal setting */
-       snd_soc_component_write(comp, RT9120_REG_INTERNAL1, 0x03);
-       snd_soc_component_write(comp, RT9120_REG_INTERNAL0, 0x69);
+       if (data->chip_idx == CHIP_IDX_RT9120S) {
+               snd_soc_component_write(comp, RT9120_REG_INTERCFG, 0xde);
+               snd_soc_component_write(comp, RT9120_REG_INTERNAL0, 0x66);
+       } else
+               snd_soc_component_write(comp, RT9120_REG_INTERNAL0, 0x04);
+
        return 0;
 }
 
@@ -201,8 +217,8 @@ static int rt9120_hw_params(struct snd_pcm_substream *substream,
                            struct snd_soc_dai *dai)
 {
        struct snd_soc_component *comp = dai->component;
-       unsigned int param_width, param_slot_width;
-       int width;
+       unsigned int param_width, param_slot_width, auto_sync;
+       int width, fs;
 
        switch (width = params_width(param)) {
        case 16:
@@ -240,6 +256,16 @@ static int rt9120_hw_params(struct snd_pcm_substream *substream,
 
        snd_soc_component_update_bits(comp, RT9120_REG_I2SWL,
                                      RT9120_AUDWL_MASK, param_slot_width);
+
+       fs = width * params_channels(param);
+       /* If fs is divided by 48, disable auto sync */
+       if (fs % 48 == 0)
+               auto_sync = 0;
+       else
+               auto_sync = RT9120_AUTOSYNC_MASK;
+
+       snd_soc_component_update_bits(comp, RT9120_REG_DIGCFG,
+                                     RT9120_AUTOSYNC_MASK, auto_sync);
        return 0;
 }
 
@@ -279,9 +305,11 @@ static const struct regmap_range rt9120_rd_yes_ranges[] = {
        regmap_reg_range(0x20, 0x27),
        regmap_reg_range(0x30, 0x38),
        regmap_reg_range(0x3A, 0x40),
+       regmap_reg_range(0x63, 0x63),
        regmap_reg_range(0x65, 0x65),
        regmap_reg_range(0x69, 0x69),
-       regmap_reg_range(0x6C, 0x6C)
+       regmap_reg_range(0x6C, 0x6C),
+       regmap_reg_range(0xF8, 0xF8)
 };
 
 static const struct regmap_access_table rt9120_rd_table = {
@@ -297,9 +325,11 @@ static const struct regmap_range rt9120_wr_yes_ranges[] = {
        regmap_reg_range(0x30, 0x38),
        regmap_reg_range(0x3A, 0x3D),
        regmap_reg_range(0x40, 0x40),
+       regmap_reg_range(0x63, 0x63),
        regmap_reg_range(0x65, 0x65),
        regmap_reg_range(0x69, 0x69),
-       regmap_reg_range(0x6C, 0x6C)
+       regmap_reg_range(0x6C, 0x6C),
+       regmap_reg_range(0xF8, 0xF8)
 };
 
 static const struct regmap_access_table rt9120_wr_table = {
@@ -370,7 +400,7 @@ static int rt9120_reg_write(void *context, unsigned int reg, unsigned int val)
 static const struct regmap_config rt9120_regmap_config = {
        .reg_bits = 8,
        .val_bits = 32,
-       .max_register = RT9120_REG_UVPOPT,
+       .max_register = RT9120_REG_DIGCFG,
 
        .reg_read = rt9120_reg_read,
        .reg_write = rt9120_reg_write,
@@ -388,8 +418,16 @@ static int rt9120_check_vendor_info(struct rt9120_data *data)
        if (ret)
                return ret;
 
-       if ((devid & RT9120_VID_MASK) != RT9120_VENDOR_ID) {
-               dev_err(data->dev, "DEVID not correct [0x%04x]\n", devid);
+       devid = FIELD_GET(RT9120_VID_MASK, devid);
+       switch (devid) {
+       case RT9120_VENDOR_ID:
+               data->chip_idx = CHIP_IDX_RT9120;
+               break;
+       case RT9120S_VENDOR_ID:
+               data->chip_idx = CHIP_IDX_RT9120S;
+               break;
+       default:
+               dev_err(data->dev, "DEVID not correct [0x%0x]\n", devid);
                return -ENODEV;
        }
 
index c496b35..4f568ab 100644 (file)
@@ -1896,9 +1896,8 @@ static int wcd934x_hw_params(struct snd_pcm_substream *substream,
        }
 
        wcd->dai[dai->id].sconfig.rate = params_rate(params);
-       wcd934x_slim_set_hw_params(wcd, &wcd->dai[dai->id], substream->stream);
 
-       return 0;
+       return wcd934x_slim_set_hw_params(wcd, &wcd->dai[dai->id], substream->stream);
 }
 
 static int wcd934x_hw_free(struct snd_pcm_substream *substream,
index 52de7d1..67151c7 100644 (file)
@@ -1174,6 +1174,9 @@ static bool wcd938x_readonly_register(struct device *dev, unsigned int reg)
        case WCD938X_DIGITAL_INTR_STATUS_0:
        case WCD938X_DIGITAL_INTR_STATUS_1:
        case WCD938X_DIGITAL_INTR_STATUS_2:
+       case WCD938X_DIGITAL_INTR_CLEAR_0:
+       case WCD938X_DIGITAL_INTR_CLEAR_1:
+       case WCD938X_DIGITAL_INTR_CLEAR_2:
        case WCD938X_DIGITAL_SWR_HM_TEST_0:
        case WCD938X_DIGITAL_SWR_HM_TEST_1:
        case WCD938X_DIGITAL_EFUSE_T_DATA_0:
index d4f0d72..6cb01a8 100644 (file)
@@ -617,8 +617,9 @@ static int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl)
        switch (cs_dsp->fw_ver) {
        case 0:
        case 1:
-               snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s %x",
-                        cs_dsp->name, region_name, cs_ctl->alg_region.alg);
+               ret = scnprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN,
+                               "%s %s %x", cs_dsp->name, region_name,
+                               cs_ctl->alg_region.alg);
                break;
        case 2:
                ret = scnprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN,
index f104962..77219c3 100644 (file)
@@ -248,6 +248,75 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
                                        SOF_BT_OFFLOAD_SSP(2) |
                                        SOF_SSP_BT_OFFLOAD_PRESENT),
        },
+       {
+               .callback = sof_sdw_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0AF3"),
+               },
+               /* No Jack */
+               .driver_data = (void *)(SOF_SDW_TGL_HDMI |
+                                       SOF_SDW_FOUR_SPK),
+       },
+       {
+               .callback = sof_sdw_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B00")
+               },
+               .driver_data = (void *)(SOF_SDW_TGL_HDMI |
+                                       RT711_JD2 |
+                                       SOF_SDW_FOUR_SPK),
+       },
+       {
+               .callback = sof_sdw_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B01")
+               },
+               .driver_data = (void *)(SOF_SDW_TGL_HDMI |
+                                       RT711_JD2 |
+                                       SOF_SDW_FOUR_SPK),
+       },
+       {
+               .callback = sof_sdw_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B11")
+               },
+               .driver_data = (void *)(SOF_SDW_TGL_HDMI |
+                                       RT711_JD2 |
+                                       SOF_SDW_FOUR_SPK),
+       },
+       {
+               .callback = sof_sdw_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B12")
+               },
+               .driver_data = (void *)(SOF_SDW_TGL_HDMI |
+                                       RT711_JD2 |
+                                       SOF_SDW_FOUR_SPK),
+       },
+       {
+               .callback = sof_sdw_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B13"),
+               },
+               /* No Jack */
+               .driver_data = (void *)SOF_SDW_TGL_HDMI,
+       },
+       {
+               .callback = sof_sdw_quirk_cb,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0B29"),
+               },
+               .driver_data = (void *)(SOF_SDW_TGL_HDMI |
+                                       RT711_JD2 |
+                                       SOF_SDW_FOUR_SPK),
+       },
        {}
 };
 
index 06f5034..b61a778 100644 (file)
@@ -74,6 +74,15 @@ static const struct snd_soc_acpi_adr_device rt711_sdca_0_adr[] = {
        }
 };
 
+static const struct snd_soc_acpi_adr_device rt711_sdca_2_adr[] = {
+       {
+               .adr = 0x000230025D071101ull,
+               .num_endpoints = 1,
+               .endpoints = &single_endpoint,
+               .name_prefix = "rt711"
+       }
+};
+
 static const struct snd_soc_acpi_adr_device rt1316_1_group1_adr[] = {
        {
                .adr = 0x000131025D131601ull, /* unique ID is set for some reason */
@@ -101,6 +110,24 @@ static const struct snd_soc_acpi_adr_device rt1316_3_group1_adr[] = {
        }
 };
 
+static const struct snd_soc_acpi_adr_device rt1316_0_group2_adr[] = {
+       {
+               .adr = 0x000031025D131601ull,
+               .num_endpoints = 1,
+               .endpoints = &spk_l_endpoint,
+               .name_prefix = "rt1316-1"
+       }
+};
+
+static const struct snd_soc_acpi_adr_device rt1316_1_group2_adr[] = {
+       {
+               .adr = 0x000130025D131601ull,
+               .num_endpoints = 1,
+               .endpoints = &spk_r_endpoint,
+               .name_prefix = "rt1316-2"
+       }
+};
+
 static const struct snd_soc_acpi_adr_device rt1316_2_single_adr[] = {
        {
                .adr = 0x000230025D131601ull,
@@ -209,6 +236,63 @@ static const struct snd_soc_acpi_link_adr adl_sdca_3_in_1[] = {
        {}
 };
 
+static const struct snd_soc_acpi_link_adr adl_sdw_rt711_link2_rt1316_link01_rt714_link3[] = {
+       {
+               .mask = BIT(2),
+               .num_adr = ARRAY_SIZE(rt711_sdca_2_adr),
+               .adr_d = rt711_sdca_2_adr,
+       },
+       {
+               .mask = BIT(0),
+               .num_adr = ARRAY_SIZE(rt1316_0_group2_adr),
+               .adr_d = rt1316_0_group2_adr,
+       },
+       {
+               .mask = BIT(1),
+               .num_adr = ARRAY_SIZE(rt1316_1_group2_adr),
+               .adr_d = rt1316_1_group2_adr,
+       },
+       {
+               .mask = BIT(3),
+               .num_adr = ARRAY_SIZE(rt714_3_adr),
+               .adr_d = rt714_3_adr,
+       },
+       {}
+};
+
+static const struct snd_soc_acpi_link_adr adl_sdw_rt1316_link12_rt714_link0[] = {
+       {
+               .mask = BIT(1),
+               .num_adr = ARRAY_SIZE(rt1316_1_group1_adr),
+               .adr_d = rt1316_1_group1_adr,
+       },
+       {
+               .mask = BIT(2),
+               .num_adr = ARRAY_SIZE(rt1316_2_group1_adr),
+               .adr_d = rt1316_2_group1_adr,
+       },
+       {
+               .mask = BIT(0),
+               .num_adr = ARRAY_SIZE(rt714_0_adr),
+               .adr_d = rt714_0_adr,
+       },
+       {}
+};
+
+static const struct snd_soc_acpi_link_adr adl_sdw_rt1316_link2_rt714_link3[] = {
+       {
+               .mask = BIT(2),
+               .num_adr = ARRAY_SIZE(rt1316_2_single_adr),
+               .adr_d = rt1316_2_single_adr,
+       },
+       {
+               .mask = BIT(3),
+               .num_adr = ARRAY_SIZE(rt714_3_adr),
+               .adr_d = rt714_3_adr,
+       },
+       {}
+};
+
 static const struct snd_soc_acpi_link_adr adl_sdw_rt1316_link2_rt714_link0[] = {
        {
                .mask = BIT(2),
@@ -339,6 +423,27 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_sdw_machines[] = {
                .drv_name = "sof_sdw",
                .sof_tplg_filename = "sof-adl-rt711-l0-rt1316-l13-rt714-l2.tplg",
        },
+       {
+               .link_mask = 0xF, /* 4 active links required */
+               .links = adl_sdw_rt711_link2_rt1316_link01_rt714_link3,
+               .drv_name = "sof_sdw",
+               .sof_fw_filename = "sof-adl.ri",
+               .sof_tplg_filename = "sof-adl-rt711-l2-rt1316-l01-rt714-l3.tplg",
+       },
+       {
+               .link_mask = 0xC, /* rt1316 on link2 & rt714 on link3 */
+               .links = adl_sdw_rt1316_link2_rt714_link3,
+               .drv_name = "sof_sdw",
+               .sof_fw_filename = "sof-adl.ri",
+               .sof_tplg_filename = "sof-adl-rt1316-l2-mono-rt714-l3.tplg",
+       },
+       {
+               .link_mask = 0x7, /* rt714 on link0 & two rt1316s on link1 and link2 */
+               .links = adl_sdw_rt1316_link12_rt714_link0,
+               .drv_name = "sof_sdw",
+               .sof_fw_filename = "sof-adl.ri",
+               .sof_tplg_filename = "sof-adl-rt1316-l12-rt714-l0.tplg",
+       },
        {
                .link_mask = 0x5, /* 2 active links required */
                .links = adl_sdw_rt1316_link2_rt714_link0,
index b4eb0c9..4eebc79 100644 (file)
@@ -81,6 +81,12 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_cml_machines[] = {
                .sof_fw_filename = "sof-cml.ri",
                .sof_tplg_filename = "sof-cml-da7219-max98390.tplg",
        },
+       {
+               .id = "ESSX8336",
+               .drv_name = "sof-essx8336",
+               .sof_fw_filename = "sof-cml.ri",
+               .sof_tplg_filename = "sof-cml-es8336.tplg",
+       },
        {},
 };
 EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_cml_machines);
index 6350390..3149493 100644 (file)
@@ -1054,6 +1054,7 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev)
        int irq_id;
        struct mtk_base_afe *afe;
        struct mt8173_afe_private *afe_priv;
+       struct snd_soc_component *comp_pcm, *comp_hdmi;
 
        ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(33));
        if (ret)
@@ -1142,23 +1143,55 @@ static int mt8173_afe_pcm_dev_probe(struct platform_device *pdev)
        if (ret)
                goto err_pm_disable;
 
-       ret = devm_snd_soc_register_component(&pdev->dev,
-                                        &mt8173_afe_pcm_dai_component,
-                                        mt8173_afe_pcm_dais,
-                                        ARRAY_SIZE(mt8173_afe_pcm_dais));
+       comp_pcm = devm_kzalloc(&pdev->dev, sizeof(*comp_pcm), GFP_KERNEL);
+       if (!comp_pcm) {
+               ret = -ENOMEM;
+               goto err_pm_disable;
+       }
+
+       ret = snd_soc_component_initialize(comp_pcm,
+                                          &mt8173_afe_pcm_dai_component,
+                                          &pdev->dev);
        if (ret)
                goto err_pm_disable;
 
-       ret = devm_snd_soc_register_component(&pdev->dev,
-                                        &mt8173_afe_hdmi_dai_component,
-                                        mt8173_afe_hdmi_dais,
-                                        ARRAY_SIZE(mt8173_afe_hdmi_dais));
+#ifdef CONFIG_DEBUG_FS
+       comp_pcm->debugfs_prefix = "pcm";
+#endif
+
+       ret = snd_soc_add_component(comp_pcm,
+                                   mt8173_afe_pcm_dais,
+                                   ARRAY_SIZE(mt8173_afe_pcm_dais));
+       if (ret)
+               goto err_pm_disable;
+
+       comp_hdmi = devm_kzalloc(&pdev->dev, sizeof(*comp_hdmi), GFP_KERNEL);
+       if (!comp_hdmi) {
+               ret = -ENOMEM;
+               goto err_pm_disable;
+       }
+
+       ret = snd_soc_component_initialize(comp_hdmi,
+                                          &mt8173_afe_hdmi_dai_component,
+                                          &pdev->dev);
        if (ret)
                goto err_pm_disable;
 
+#ifdef CONFIG_DEBUG_FS
+       comp_hdmi->debugfs_prefix = "hdmi";
+#endif
+
+       ret = snd_soc_add_component(comp_hdmi,
+                                   mt8173_afe_hdmi_dais,
+                                   ARRAY_SIZE(mt8173_afe_hdmi_dais));
+       if (ret)
+               goto err_cleanup_components;
+
        dev_info(&pdev->dev, "MT8173 AFE driver initialized.\n");
        return 0;
 
+err_cleanup_components:
+       snd_soc_unregister_component(&pdev->dev);
 err_pm_disable:
        pm_runtime_disable(&pdev->dev);
        return ret;
@@ -1166,6 +1199,8 @@ err_pm_disable:
 
 static int mt8173_afe_pcm_dev_remove(struct platform_device *pdev)
 {
+       snd_soc_unregister_component(&pdev->dev);
+
        pm_runtime_disable(&pdev->dev);
        if (!pm_runtime_status_suspended(&pdev->dev))
                mt8173_afe_runtime_suspend(&pdev->dev);
index c28ebf8..2cbf679 100644 (file)
@@ -30,15 +30,15 @@ static struct mt8173_rt5650_platform_data mt8173_rt5650_priv = {
 };
 
 static const struct snd_soc_dapm_widget mt8173_rt5650_widgets[] = {
-       SND_SOC_DAPM_SPK("Speaker", NULL),
+       SND_SOC_DAPM_SPK("Ext Spk", NULL),
        SND_SOC_DAPM_MIC("Int Mic", NULL),
        SND_SOC_DAPM_HP("Headphone", NULL),
        SND_SOC_DAPM_MIC("Headset Mic", NULL),
 };
 
 static const struct snd_soc_dapm_route mt8173_rt5650_routes[] = {
-       {"Speaker", NULL, "SPOL"},
-       {"Speaker", NULL, "SPOR"},
+       {"Ext Spk", NULL, "SPOL"},
+       {"Ext Spk", NULL, "SPOR"},
        {"DMIC L1", NULL, "Int Mic"},
        {"DMIC R1", NULL, "Int Mic"},
        {"Headphone", NULL, "HPOL"},
@@ -48,7 +48,7 @@ static const struct snd_soc_dapm_route mt8173_rt5650_routes[] = {
 };
 
 static const struct snd_kcontrol_new mt8173_rt5650_controls[] = {
-       SOC_DAPM_PIN_SWITCH("Speaker"),
+       SOC_DAPM_PIN_SWITCH("Ext Spk"),
        SOC_DAPM_PIN_SWITCH("Int Mic"),
        SOC_DAPM_PIN_SWITCH("Headphone"),
        SOC_DAPM_PIN_SWITCH("Headset Mic"),
index 4f693a2..3ee8bfc 100644 (file)
@@ -550,6 +550,10 @@ struct audio_hw_clk_cfg {
        uint32_t clock_root;
 } __packed;
 
+struct audio_hw_clk_rel_cfg {
+       uint32_t clock_id;
+} __packed;
+
 #define PARAM_ID_HW_EP_POWER_MODE_CFG  0x8001176
 #define AR_HW_EP_POWER_MODE_0  0 /* default */
 #define AR_HW_EP_POWER_MODE_1  1 /* XO Shutdown allowed */
index 3d831b6..72c5719 100644 (file)
@@ -390,7 +390,7 @@ struct q6copp *q6adm_open(struct device *dev, int port_id, int path, int rate,
        int ret = 0;
 
        if (port_id < 0) {
-               dev_err(dev, "Invalid port_id 0x%x\n", port_id);
+               dev_err(dev, "Invalid port_id %d\n", port_id);
                return ERR_PTR(-EINVAL);
        }
 
@@ -508,7 +508,7 @@ int q6adm_matrix_map(struct device *dev, int path,
                int port_idx = payload_map.port_id[i];
 
                if (port_idx < 0) {
-                       dev_err(dev, "Invalid port_id 0x%x\n",
+                       dev_err(dev, "Invalid port_id %d\n",
                                payload_map.port_id[i]);
                        kfree(pkt);
                        return -EINVAL;
index 46f3655..b74b677 100644 (file)
@@ -269,9 +269,7 @@ static int q6asm_dai_prepare(struct snd_soc_component *component,
 
        if (ret < 0) {
                dev_err(dev, "%s: q6asm_open_write failed\n", __func__);
-               q6asm_audio_client_free(prtd->audio_client);
-               prtd->audio_client = NULL;
-               return -ENOMEM;
+               goto open_err;
        }
 
        prtd->session_id = q6asm_get_session_id(prtd->audio_client);
@@ -279,7 +277,7 @@ static int q6asm_dai_prepare(struct snd_soc_component *component,
                              prtd->session_id, substream->stream);
        if (ret) {
                dev_err(dev, "%s: stream reg failed ret:%d\n", __func__, ret);
-               return ret;
+               goto routing_err;
        }
 
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
@@ -301,10 +299,19 @@ static int q6asm_dai_prepare(struct snd_soc_component *component,
        }
        if (ret < 0)
                dev_info(dev, "%s: CMD Format block failed\n", __func__);
+       else
+               prtd->state = Q6ASM_STREAM_RUNNING;
 
-       prtd->state = Q6ASM_STREAM_RUNNING;
+       return ret;
 
-       return 0;
+routing_err:
+       q6asm_cmd(prtd->audio_client, prtd->stream_id,  CMD_CLOSE);
+open_err:
+       q6asm_unmap_memory_regions(substream->stream, prtd->audio_client);
+       q6asm_audio_client_free(prtd->audio_client);
+       prtd->audio_client = NULL;
+
+       return ret;
 }
 
 static int q6asm_dai_trigger(struct snd_soc_component *component,
index 82c40f2..cda33de 100644 (file)
@@ -42,6 +42,12 @@ struct prm_cmd_request_rsc {
        struct audio_hw_clk_cfg clock_id;
 } __packed;
 
+struct prm_cmd_release_rsc {
+       struct apm_module_param_data param_data;
+       uint32_t num_clk_id;
+       struct audio_hw_clk_rel_cfg clock_id;
+} __packed;
+
 static int q6prm_send_cmd_sync(struct q6prm *prm, struct gpr_pkt *pkt, uint32_t rsp_opcode)
 {
        return audioreach_send_cmd_sync(prm->dev, prm->gdev, &prm->result, &prm->lock,
@@ -102,8 +108,8 @@ int q6prm_unvote_lpass_core_hw(struct device *dev, uint32_t hw_block_id, uint32_
 }
 EXPORT_SYMBOL_GPL(q6prm_unvote_lpass_core_hw);
 
-int q6prm_set_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_root,
-                         unsigned int freq)
+static int q6prm_request_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_root,
+                                    unsigned int freq)
 {
        struct q6prm *prm = dev_get_drvdata(dev->parent);
        struct apm_module_param_data *param_data;
@@ -138,6 +144,49 @@ int q6prm_set_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_
 
        return rc;
 }
+
+static int q6prm_release_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_root,
+                         unsigned int freq)
+{
+       struct q6prm *prm = dev_get_drvdata(dev->parent);
+       struct apm_module_param_data *param_data;
+       struct prm_cmd_release_rsc *rel;
+       gpr_device_t *gdev = prm->gdev;
+       struct gpr_pkt *pkt;
+       int rc;
+
+       pkt = audioreach_alloc_cmd_pkt(sizeof(*rel), PRM_CMD_RELEASE_HW_RSC, 0, gdev->svc.id,
+                                      GPR_PRM_MODULE_IID);
+       if (IS_ERR(pkt))
+               return PTR_ERR(pkt);
+
+       rel = (void *)pkt + GPR_HDR_SIZE + APM_CMD_HDR_SIZE;
+
+       param_data = &rel->param_data;
+
+       param_data->module_instance_id = GPR_PRM_MODULE_IID;
+       param_data->error_code = 0;
+       param_data->param_id = PARAM_ID_RSC_AUDIO_HW_CLK;
+       param_data->param_size = sizeof(*rel) - APM_MODULE_PARAM_DATA_SIZE;
+
+       rel->num_clk_id = 1;
+       rel->clock_id.clock_id = clk_id;
+
+       rc = q6prm_send_cmd_sync(prm, pkt, PRM_CMD_RSP_RELEASE_HW_RSC);
+
+       kfree(pkt);
+
+       return rc;
+}
+
+int q6prm_set_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_root,
+                         unsigned int freq)
+{
+       if (freq)
+               return q6prm_request_lpass_clock(dev, clk_id, clk_attr, clk_attr, freq);
+
+       return q6prm_release_lpass_clock(dev, clk_id, clk_attr, clk_attr, freq);
+}
 EXPORT_SYMBOL_GPL(q6prm_set_lpass_clock);
 
 static int prm_callback(struct gpr_resp_pkt *data, void *priv, int op)
index 3390ebe..cd74681 100644 (file)
@@ -372,6 +372,12 @@ int q6routing_stream_open(int fedai_id, int perf_mode,
        }
 
        session = &routing_data->sessions[stream_id - 1];
+       if (session->port_id < 0) {
+               dev_err(routing_data->dev, "Routing not setup for MultiMedia%d Session\n",
+                       session->fedai_id);
+               return -EINVAL;
+       }
+
        pdata = &routing_data->port_data[session->port_id];
 
        mutex_lock(&routing_data->lock);
@@ -495,7 +501,11 @@ static int msm_routing_put_audio_mixer(struct snd_kcontrol *kcontrol,
                session->port_id = be_id;
                snd_soc_dapm_mixer_update_power(dapm, kcontrol, 1, update);
        } else {
-               session->port_id = -1;
+               if (session->port_id == be_id) {
+                       session->port_id = -1;
+                       return 0;
+               }
+
                snd_soc_dapm_mixer_update_power(dapm, kcontrol, 0, update);
        }
 
index 16c6e02..03e0d4e 100644 (file)
@@ -102,7 +102,7 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod,
        struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
 
        if (dmaen->chan)
-               dmaengine_terminate_sync(dmaen->chan);
+               dmaengine_terminate_async(dmaen->chan);
 
        return 0;
 }
index 2ae99b4..cbd7ea4 100644 (file)
@@ -20,8 +20,10 @@ static bool snd_soc_acpi_id_present(struct snd_soc_acpi_mach *machine)
 
        if (comp_ids) {
                for (i = 0; i < comp_ids->num_codecs; i++) {
-                       if (acpi_dev_present(comp_ids->codecs[i], NULL, -1))
+                       if (acpi_dev_present(comp_ids->codecs[i], NULL, -1)) {
+                               strscpy(machine->id, comp_ids->codecs[i], ACPI_ID_LEN);
                                return true;
+                       }
                }
        }
 
index 2892b0a..b06c568 100644 (file)
@@ -2559,8 +2559,13 @@ static struct snd_soc_dapm_widget *dapm_find_widget(
        return NULL;
 }
 
-static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
-                               const char *pin, int status)
+/*
+ * set the DAPM pin status:
+ * returns 1 when the value has been updated, 0 when unchanged, or a negative
+ * error code; called from kcontrol put callback
+ */
+static int __snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
+                                 const char *pin, int status)
 {
        struct snd_soc_dapm_widget *w = dapm_find_widget(dapm, pin, true);
        int ret = 0;
@@ -2586,6 +2591,18 @@ static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
        return ret;
 }
 
+/*
+ * similar as __snd_soc_dapm_set_pin(), but returns 0 when successful;
+ * called from several API functions below
+ */
+static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm,
+                               const char *pin, int status)
+{
+       int ret = __snd_soc_dapm_set_pin(dapm, pin, status);
+
+       return ret < 0 ? ret : 0;
+}
+
 /**
  * snd_soc_dapm_sync_unlocked - scan and power dapm paths
  * @dapm: DAPM context
@@ -3589,10 +3606,10 @@ int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol,
        const char *pin = (const char *)kcontrol->private_value;
        int ret;
 
-       if (ucontrol->value.integer.value[0])
-               ret = snd_soc_dapm_enable_pin(&card->dapm, pin);
-       else
-               ret = snd_soc_dapm_disable_pin(&card->dapm, pin);
+       mutex_lock_nested(&card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
+       ret = __snd_soc_dapm_set_pin(&card->dapm, pin,
+                                    !!ucontrol->value.integer.value[0]);
+       mutex_unlock(&card->dapm_mutex);
 
        snd_soc_dapm_sync(&card->dapm);
        return ret;
index 557e22c..f5b9e66 100644 (file)
@@ -2700,6 +2700,7 @@ EXPORT_SYMBOL_GPL(snd_soc_tplg_component_load);
 /* remove dynamic controls from the component driver */
 int snd_soc_tplg_component_remove(struct snd_soc_component *comp)
 {
+       struct snd_card *card = comp->card->snd_card;
        struct snd_soc_dobj *dobj, *next_dobj;
        int pass = SOC_TPLG_PASS_END;
 
@@ -2707,6 +2708,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp)
        while (pass >= SOC_TPLG_PASS_START) {
 
                /* remove mixer controls */
+               down_write(&card->controls_rwsem);
                list_for_each_entry_safe(dobj, next_dobj, &comp->dobj_list,
                        list) {
 
@@ -2745,6 +2747,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp)
                                break;
                        }
                }
+               up_write(&card->controls_rwsem);
                pass--;
        }
 
index 6bb4db8..041c546 100644 (file)
@@ -47,7 +47,7 @@ config SND_SOC_SOF_OF
          Say Y if you need this option. If unsure select "N".
 
 config SND_SOC_SOF_COMPRESS
-       tristate
+       bool
        select SND_SOC_COMPRESS
 
 config SND_SOC_SOF_DEBUG_PROBES
index 58bb89a..bb1dfe4 100644 (file)
@@ -69,7 +69,7 @@ static void snd_sof_refresh_control(struct snd_sof_control *scontrol)
 {
        struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
        struct snd_soc_component *scomp = scontrol->scomp;
-       enum sof_ipc_ctrl_type ctrl_type;
+       u32 ipc_cmd;
        int ret;
 
        if (!scontrol->comp_data_dirty)
@@ -79,9 +79,9 @@ static void snd_sof_refresh_control(struct snd_sof_control *scontrol)
                return;
 
        if (scontrol->cmd == SOF_CTRL_CMD_BINARY)
-               ctrl_type = SOF_IPC_COMP_GET_DATA;
+               ipc_cmd = SOF_IPC_COMP_GET_DATA;
        else
-               ctrl_type = SOF_IPC_COMP_GET_VALUE;
+               ipc_cmd = SOF_IPC_COMP_GET_VALUE;
 
        /* set the ABI header values */
        cdata->data->magic = SOF_ABI_MAGIC;
@@ -89,7 +89,7 @@ static void snd_sof_refresh_control(struct snd_sof_control *scontrol)
 
        /* refresh the component data from DSP */
        scontrol->comp_data_dirty = false;
-       ret = snd_sof_ipc_set_get_comp_data(scontrol, ctrl_type,
+       ret = snd_sof_ipc_set_get_comp_data(scontrol, ipc_cmd,
                                            SOF_CTRL_TYPE_VALUE_CHAN_GET,
                                            scontrol->cmd, false);
        if (ret < 0) {
index 30025d3..0862ff8 100644 (file)
@@ -10,6 +10,8 @@
 #include <linux/io.h>
 #include <sound/hdaudio.h>
 #include <sound/hda_i915.h>
+#include <sound/hda_codec.h>
+#include <sound/hda_register.h>
 #include "../sof-priv.h"
 #include "hda.h"
 
 #endif
 
 #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
+static void update_codec_wake_enable(struct hdac_bus *bus, unsigned int addr, bool link_power)
+{
+       unsigned int mask = snd_hdac_chip_readw(bus, WAKEEN);
+
+       if (link_power)
+               mask &= ~BIT(addr);
+       else
+               mask |= BIT(addr);
+
+       snd_hdac_chip_updatew(bus, WAKEEN, STATESTS_INT_MASK, mask);
+}
+
 static void sof_hda_bus_link_power(struct hdac_device *codec, bool enable)
 {
        struct hdac_bus *bus = codec->bus;
@@ -41,6 +55,9 @@ static void sof_hda_bus_link_power(struct hdac_device *codec, bool enable)
         */
        if (codec->addr == HDA_IDISP_ADDR && !enable)
                snd_hdac_display_power(bus, HDA_CODEC_IDX_CONTROLLER, false);
+
+       /* WAKEEN needs to be set for disabled links */
+       update_codec_wake_enable(bus, codec->addr, enable);
 }
 
 static const struct hdac_bus_ops bus_core_ops = {
index 058baca..287dc0e 100644 (file)
@@ -622,8 +622,7 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend)
        hda_dsp_ipc_int_disable(sdev);
 
 #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
-       if (runtime_suspend)
-               hda_codec_jack_wake_enable(sdev, true);
+       hda_codec_jack_wake_enable(sdev, runtime_suspend);
 
        /* power down all hda link */
        snd_hdac_ext_bus_link_power_down_all(bus);
index 883d78d..2c0d4d0 100644 (file)
@@ -58,6 +58,13 @@ int hda_ctrl_dai_widget_setup(struct snd_soc_dapm_widget *w)
                return -EINVAL;
        }
 
+       /* DAI already configured, reset it before reconfiguring it */
+       if (sof_dai->configured) {
+               ret = hda_ctrl_dai_widget_free(w);
+               if (ret < 0)
+                       return ret;
+       }
+
        config = &sof_dai->dai_config[sof_dai->current_config];
 
        /*
@@ -810,6 +817,20 @@ skip_soundwire:
        return 0;
 }
 
+static void hda_check_for_state_change(struct snd_sof_dev *sdev)
+{
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
+       struct hdac_bus *bus = sof_to_bus(sdev);
+       unsigned int codec_mask;
+
+       codec_mask = snd_hdac_chip_readw(bus, STATESTS);
+       if (codec_mask) {
+               hda_codec_jack_check(sdev);
+               snd_hdac_chip_writew(bus, STATESTS, codec_mask);
+       }
+#endif
+}
+
 static irqreturn_t hda_dsp_interrupt_handler(int irq, void *context)
 {
        struct snd_sof_dev *sdev = context;
@@ -851,6 +872,8 @@ static irqreturn_t hda_dsp_interrupt_thread(int irq, void *context)
        if (hda_sdw_check_wakeen_irq(sdev))
                hda_sdw_process_wakeen(sdev);
 
+       hda_check_for_state_change(sdev);
+
        /* enable GIE interrupt */
        snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
                                SOF_HDA_INTCTL,
index 6254bac..717f45a 100644 (file)
@@ -700,7 +700,7 @@ static int stm32_i2s_configure_clock(struct snd_soc_dai *cpu_dai,
                if (ret < 0)
                        return ret;
 
-               nb_bits = frame_len * ((cgfr & I2S_CGFR_CHLEN) + 1);
+               nb_bits = frame_len * (FIELD_GET(I2S_CGFR_CHLEN, cgfr) + 1);
                ret = stm32_i2s_calc_clk_div(i2s, i2s_clock_rate,
                                             (nb_bits * rate));
                if (ret)
index 8ee9a77..a74c980 100644 (file)
@@ -26,51 +26,162 @@ static const struct reg_default tegra186_dspk_reg_defaults[] = {
        { TEGRA186_DSPK_CODEC_CTRL,  0x03000000 },
 };
 
-static int tegra186_dspk_get_control(struct snd_kcontrol *kcontrol,
+static int tegra186_dspk_get_fifo_th(struct snd_kcontrol *kcontrol,
                                     struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
        struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
 
-       if (strstr(kcontrol->id.name, "FIFO Threshold"))
-               ucontrol->value.integer.value[0] = dspk->rx_fifo_th;
-       else if (strstr(kcontrol->id.name, "OSR Value"))
-               ucontrol->value.integer.value[0] = dspk->osr_val;
-       else if (strstr(kcontrol->id.name, "LR Polarity Select"))
-               ucontrol->value.integer.value[0] = dspk->lrsel;
-       else if (strstr(kcontrol->id.name, "Channel Select"))
-               ucontrol->value.integer.value[0] = dspk->ch_sel;
-       else if (strstr(kcontrol->id.name, "Mono To Stereo"))
-               ucontrol->value.integer.value[0] = dspk->mono_to_stereo;
-       else if (strstr(kcontrol->id.name, "Stereo To Mono"))
-               ucontrol->value.integer.value[0] = dspk->stereo_to_mono;
+       ucontrol->value.integer.value[0] = dspk->rx_fifo_th;
 
        return 0;
 }
 
-static int tegra186_dspk_put_control(struct snd_kcontrol *kcontrol,
+static int tegra186_dspk_put_fifo_th(struct snd_kcontrol *kcontrol,
                                     struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
        struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
-       int val = ucontrol->value.integer.value[0];
-
-       if (strstr(kcontrol->id.name, "FIFO Threshold"))
-               dspk->rx_fifo_th = val;
-       else if (strstr(kcontrol->id.name, "OSR Value"))
-               dspk->osr_val = val;
-       else if (strstr(kcontrol->id.name, "LR Polarity Select"))
-               dspk->lrsel = val;
-       else if (strstr(kcontrol->id.name, "Channel Select"))
-               dspk->ch_sel = val;
-       else if (strstr(kcontrol->id.name, "Mono To Stereo"))
-               dspk->mono_to_stereo = val;
-       else if (strstr(kcontrol->id.name, "Stereo To Mono"))
-               dspk->stereo_to_mono = val;
+       int value = ucontrol->value.integer.value[0];
+
+       if (value == dspk->rx_fifo_th)
+               return 0;
+
+       dspk->rx_fifo_th = value;
+
+       return 1;
+}
+
+static int tegra186_dspk_get_osr_val(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+       ucontrol->value.enumerated.item[0] = dspk->osr_val;
 
        return 0;
 }
 
+static int tegra186_dspk_put_osr_val(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == dspk->osr_val)
+               return 0;
+
+       dspk->osr_val = value;
+
+       return 1;
+}
+
+static int tegra186_dspk_get_pol_sel(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+       ucontrol->value.enumerated.item[0] = dspk->lrsel;
+
+       return 0;
+}
+
+static int tegra186_dspk_put_pol_sel(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == dspk->lrsel)
+               return 0;
+
+       dspk->lrsel = value;
+
+       return 1;
+}
+
+static int tegra186_dspk_get_ch_sel(struct snd_kcontrol *kcontrol,
+                                   struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+       ucontrol->value.enumerated.item[0] = dspk->ch_sel;
+
+       return 0;
+}
+
+static int tegra186_dspk_put_ch_sel(struct snd_kcontrol *kcontrol,
+                                   struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == dspk->ch_sel)
+               return 0;
+
+       dspk->ch_sel = value;
+
+       return 1;
+}
+
+static int tegra186_dspk_get_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+       ucontrol->value.enumerated.item[0] = dspk->mono_to_stereo;
+
+       return 0;
+}
+
+static int tegra186_dspk_put_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == dspk->mono_to_stereo)
+               return 0;
+
+       dspk->mono_to_stereo = value;
+
+       return 1;
+}
+
+static int tegra186_dspk_get_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+
+       ucontrol->value.enumerated.item[0] = dspk->stereo_to_mono;
+
+       return 0;
+}
+
+static int tegra186_dspk_put_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol);
+       struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == dspk->stereo_to_mono)
+               return 0;
+
+       dspk->stereo_to_mono = value;
+
+       return 1;
+}
+
 static int __maybe_unused tegra186_dspk_runtime_suspend(struct device *dev)
 {
        struct tegra186_dspk *dspk = dev_get_drvdata(dev);
@@ -279,17 +390,19 @@ static const struct soc_enum tegra186_dspk_lrsel_enum =
 static const struct snd_kcontrol_new tegrat186_dspk_controls[] = {
        SOC_SINGLE_EXT("FIFO Threshold", SND_SOC_NOPM, 0,
                       TEGRA186_DSPK_RX_FIFO_DEPTH - 1, 0,
-                      tegra186_dspk_get_control, tegra186_dspk_put_control),
+                      tegra186_dspk_get_fifo_th, tegra186_dspk_put_fifo_th),
        SOC_ENUM_EXT("OSR Value", tegra186_dspk_osr_enum,
-                    tegra186_dspk_get_control, tegra186_dspk_put_control),
+                    tegra186_dspk_get_osr_val, tegra186_dspk_put_osr_val),
        SOC_ENUM_EXT("LR Polarity Select", tegra186_dspk_lrsel_enum,
-                    tegra186_dspk_get_control, tegra186_dspk_put_control),
+                    tegra186_dspk_get_pol_sel, tegra186_dspk_put_pol_sel),
        SOC_ENUM_EXT("Channel Select", tegra186_dspk_ch_sel_enum,
-                    tegra186_dspk_get_control, tegra186_dspk_put_control),
+                    tegra186_dspk_get_ch_sel, tegra186_dspk_put_ch_sel),
        SOC_ENUM_EXT("Mono To Stereo", tegra186_dspk_mono_conv_enum,
-                    tegra186_dspk_get_control, tegra186_dspk_put_control),
+                    tegra186_dspk_get_mono_to_stereo,
+                    tegra186_dspk_put_mono_to_stereo),
        SOC_ENUM_EXT("Stereo To Mono", tegra186_dspk_stereo_conv_enum,
-                    tegra186_dspk_get_control, tegra186_dspk_put_control),
+                    tegra186_dspk_get_stereo_to_mono,
+                    tegra186_dspk_put_stereo_to_mono),
 };
 
 static const struct snd_soc_component_driver tegra186_dspk_cmpnt = {
index bcccdf3..1a2e868 100644 (file)
@@ -424,46 +424,122 @@ static const struct snd_soc_dai_ops tegra_admaif_dai_ops = {
        .trigger        = tegra_admaif_trigger,
 };
 
-static int tegra_admaif_get_control(struct snd_kcontrol *kcontrol,
-                                   struct snd_ctl_elem_value *ucontrol)
+static int tegra210_admaif_pget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+       struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+       struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+
+       ucontrol->value.enumerated.item[0] =
+               admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg];
+
+       return 0;
+}
+
+static int tegra210_admaif_pput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+       struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+       struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg])
+               return 0;
+
+       admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg] = value;
+
+       return 1;
+}
+
+static int tegra210_admaif_cget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+       struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+       struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+
+       ucontrol->value.enumerated.item[0] =
+               admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg];
+
+       return 0;
+}
+
+static int tegra210_admaif_cput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+       struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
        struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg])
+               return 0;
+
+       admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg] = value;
+
+       return 1;
+}
+
+static int tegra210_admaif_pget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+       struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
-       long *uctl_val = &ucontrol->value.integer.value[0];
+       struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
 
-       if (strstr(kcontrol->id.name, "Playback Mono To Stereo"))
-               *uctl_val = admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg];
-       else if (strstr(kcontrol->id.name, "Capture Mono To Stereo"))
-               *uctl_val = admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg];
-       else if (strstr(kcontrol->id.name, "Playback Stereo To Mono"))
-               *uctl_val = admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg];
-       else if (strstr(kcontrol->id.name, "Capture Stereo To Mono"))
-               *uctl_val = admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg];
+       ucontrol->value.enumerated.item[0] =
+               admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg];
 
        return 0;
 }
 
-static int tegra_admaif_put_control(struct snd_kcontrol *kcontrol,
-                                   struct snd_ctl_elem_value *ucontrol)
+static int tegra210_admaif_pput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+       struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
        struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg])
+               return 0;
+
+       admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg] = value;
+
+       return 1;
+}
+
+static int tegra210_admaif_cget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+       struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
-       int value = ucontrol->value.integer.value[0];
+       struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
 
-       if (strstr(kcontrol->id.name, "Playback Mono To Stereo"))
-               admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg] = value;
-       else if (strstr(kcontrol->id.name, "Capture Mono To Stereo"))
-               admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg] = value;
-       else if (strstr(kcontrol->id.name, "Playback Stereo To Mono"))
-               admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg] = value;
-       else if (strstr(kcontrol->id.name, "Capture Stereo To Mono"))
-               admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg] = value;
+       ucontrol->value.enumerated.item[0] =
+               admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg];
 
        return 0;
 }
 
+static int tegra210_admaif_cput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+       struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt);
+       struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value;
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg])
+               return 0;
+
+       admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg] = value;
+
+       return 1;
+}
+
 static int tegra_admaif_dai_probe(struct snd_soc_dai *dai)
 {
        struct tegra_admaif *admaif = snd_soc_dai_get_drvdata(dai);
@@ -559,17 +635,21 @@ static const char * const tegra_admaif_mono_conv_text[] = {
 }
 
 #define TEGRA_ADMAIF_CIF_CTRL(reg)                                            \
-       NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Mono To Stereo", reg - 1,\
-                       tegra_admaif_get_control, tegra_admaif_put_control,    \
+       NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Mono To Stereo", reg - 1,     \
+                       tegra210_admaif_pget_mono_to_stereo,                   \
+                       tegra210_admaif_pput_mono_to_stereo,                   \
                        tegra_admaif_mono_conv_text),                          \
-       NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Stereo To Mono", reg - 1,\
-                       tegra_admaif_get_control, tegra_admaif_put_control,    \
+       NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Stereo To Mono", reg - 1,     \
+                       tegra210_admaif_pget_stereo_to_mono,                   \
+                       tegra210_admaif_pput_stereo_to_mono,                   \
                        tegra_admaif_stereo_conv_text),                        \
-       NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Mono To Stereo", reg - 1, \
-                       tegra_admaif_get_control, tegra_admaif_put_control,    \
+       NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Mono To Stereo", reg - 1,      \
+                       tegra210_admaif_cget_mono_to_stereo,                   \
+                       tegra210_admaif_cput_mono_to_stereo,                   \
                        tegra_admaif_mono_conv_text),                          \
-       NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Stereo To Mono", reg - 1, \
-                       tegra_admaif_get_control, tegra_admaif_put_control,    \
+       NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Stereo To Mono", reg - 1,      \
+                       tegra210_admaif_cget_stereo_to_mono,                   \
+                       tegra210_admaif_cput_stereo_to_mono,                   \
                        tegra_admaif_stereo_conv_text)
 
 static struct snd_kcontrol_new tegra210_admaif_controls[] = {
index d7c7849..933c450 100644 (file)
@@ -193,6 +193,9 @@ static int tegra210_adx_put_byte_map(struct snd_kcontrol *kcontrol,
        struct soc_mixer_control *mc =
                (struct soc_mixer_control *)kcontrol->private_value;;
 
+       if (value == bytes_map[mc->reg])
+               return 0;
+
        if (value >= 0 && value <= 255) {
                /* update byte map and enable slot */
                bytes_map[mc->reg] = value;
index a1989ea..388b815 100644 (file)
@@ -62,6 +62,7 @@ static int tegra_ahub_put_value_enum(struct snd_kcontrol *kctl,
        unsigned int *item = uctl->value.enumerated.item;
        unsigned int value = e->values[item[0]];
        unsigned int i, bit_pos, reg_idx = 0, reg_val = 0;
+       int change = 0;
 
        if (item[0] >= e->items)
                return -EINVAL;
@@ -86,12 +87,14 @@ static int tegra_ahub_put_value_enum(struct snd_kcontrol *kctl,
 
                /* Update widget power if state has changed */
                if (snd_soc_component_test_bits(cmpnt, update[i].reg,
-                                               update[i].mask, update[i].val))
-                       snd_soc_dapm_mux_update_power(dapm, kctl, item[0], e,
-                                                     &update[i]);
+                                               update[i].mask,
+                                               update[i].val))
+                       change |= snd_soc_dapm_mux_update_power(dapm, kctl,
+                                                               item[0], e,
+                                                               &update[i]);
        }
 
-       return 0;
+       return change;
 }
 
 static struct snd_soc_dai_driver tegra210_ahub_dais[] = {
index af9bddf..6895763 100644 (file)
@@ -222,6 +222,9 @@ static int tegra210_amx_put_byte_map(struct snd_kcontrol *kcontrol,
        int reg = mc->reg;
        int value = ucontrol->value.integer.value[0];
 
+       if (value == bytes_map[reg])
+               return 0;
+
        if (value >= 0 && value <= 255) {
                /* Update byte map and enable slot */
                bytes_map[reg] = value;
index b096478..db95794 100644 (file)
@@ -156,51 +156,162 @@ static int tegra210_dmic_hw_params(struct snd_pcm_substream *substream,
        return 0;
 }
 
-static int tegra210_dmic_get_control(struct snd_kcontrol *kcontrol,
+static int tegra210_dmic_get_boost_gain(struct snd_kcontrol *kcontrol,
+                                       struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+       ucontrol->value.integer.value[0] = dmic->boost_gain;
+
+       return 0;
+}
+
+static int tegra210_dmic_put_boost_gain(struct snd_kcontrol *kcontrol,
+                                       struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+       int value = ucontrol->value.integer.value[0];
+
+       if (value == dmic->boost_gain)
+               return 0;
+
+       dmic->boost_gain = value;
+
+       return 1;
+}
+
+static int tegra210_dmic_get_ch_select(struct snd_kcontrol *kcontrol,
+                                      struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+       ucontrol->value.enumerated.item[0] = dmic->ch_select;
+
+       return 0;
+}
+
+static int tegra210_dmic_put_ch_select(struct snd_kcontrol *kcontrol,
+                                      struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == dmic->ch_select)
+               return 0;
+
+       dmic->ch_select = value;
+
+       return 1;
+}
+
+static int tegra210_dmic_get_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+       ucontrol->value.enumerated.item[0] = dmic->mono_to_stereo;
+
+       return 0;
+}
+
+static int tegra210_dmic_put_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == dmic->mono_to_stereo)
+               return 0;
+
+       dmic->mono_to_stereo = value;
+
+       return 1;
+}
+
+static int tegra210_dmic_get_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+       ucontrol->value.enumerated.item[0] = dmic->stereo_to_mono;
+
+       return 0;
+}
+
+static int tegra210_dmic_put_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == dmic->stereo_to_mono)
+               return 0;
+
+       dmic->stereo_to_mono = value;
+
+       return 1;
+}
+
+static int tegra210_dmic_get_osr_val(struct snd_kcontrol *kcontrol,
                                     struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
 
-       if (strstr(kcontrol->id.name, "Boost Gain Volume"))
-               ucontrol->value.integer.value[0] = dmic->boost_gain;
-       else if (strstr(kcontrol->id.name, "Channel Select"))
-               ucontrol->value.integer.value[0] = dmic->ch_select;
-       else if (strstr(kcontrol->id.name, "Mono To Stereo"))
-               ucontrol->value.integer.value[0] = dmic->mono_to_stereo;
-       else if (strstr(kcontrol->id.name, "Stereo To Mono"))
-               ucontrol->value.integer.value[0] = dmic->stereo_to_mono;
-       else if (strstr(kcontrol->id.name, "OSR Value"))
-               ucontrol->value.integer.value[0] = dmic->osr_val;
-       else if (strstr(kcontrol->id.name, "LR Polarity Select"))
-               ucontrol->value.integer.value[0] = dmic->lrsel;
+       ucontrol->value.enumerated.item[0] = dmic->osr_val;
 
        return 0;
 }
 
-static int tegra210_dmic_put_control(struct snd_kcontrol *kcontrol,
+static int tegra210_dmic_put_osr_val(struct snd_kcontrol *kcontrol,
                                     struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
-       int value = ucontrol->value.integer.value[0];
+       unsigned int value = ucontrol->value.enumerated.item[0];
 
-       if (strstr(kcontrol->id.name, "Boost Gain Volume"))
-               dmic->boost_gain = value;
-       else if (strstr(kcontrol->id.name, "Channel Select"))
-               dmic->ch_select = ucontrol->value.integer.value[0];
-       else if (strstr(kcontrol->id.name, "Mono To Stereo"))
-               dmic->mono_to_stereo = value;
-       else if (strstr(kcontrol->id.name, "Stereo To Mono"))
-               dmic->stereo_to_mono = value;
-       else if (strstr(kcontrol->id.name, "OSR Value"))
-               dmic->osr_val = value;
-       else if (strstr(kcontrol->id.name, "LR Polarity Select"))
-               dmic->lrsel = value;
+       if (value == dmic->osr_val)
+               return 0;
+
+       dmic->osr_val = value;
+
+       return 1;
+}
+
+static int tegra210_dmic_get_pol_sel(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+
+       ucontrol->value.enumerated.item[0] = dmic->lrsel;
 
        return 0;
 }
 
+static int tegra210_dmic_put_pol_sel(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == dmic->lrsel)
+               return 0;
+
+       dmic->lrsel = value;
+
+       return 1;
+}
+
 static const struct snd_soc_dai_ops tegra210_dmic_dai_ops = {
        .hw_params      = tegra210_dmic_hw_params,
 };
@@ -287,19 +398,22 @@ static const struct soc_enum tegra210_dmic_lrsel_enum =
 
 static const struct snd_kcontrol_new tegra210_dmic_controls[] = {
        SOC_SINGLE_EXT("Boost Gain Volume", 0, 0, MAX_BOOST_GAIN, 0,
-                      tegra210_dmic_get_control, tegra210_dmic_put_control),
+                      tegra210_dmic_get_boost_gain,
+                      tegra210_dmic_put_boost_gain),
        SOC_ENUM_EXT("Channel Select", tegra210_dmic_ch_enum,
-                    tegra210_dmic_get_control, tegra210_dmic_put_control),
+                    tegra210_dmic_get_ch_select, tegra210_dmic_put_ch_select),
        SOC_ENUM_EXT("Mono To Stereo",
-                    tegra210_dmic_mono_conv_enum, tegra210_dmic_get_control,
-                    tegra210_dmic_put_control),
+                    tegra210_dmic_mono_conv_enum,
+                    tegra210_dmic_get_mono_to_stereo,
+                    tegra210_dmic_put_mono_to_stereo),
        SOC_ENUM_EXT("Stereo To Mono",
-                    tegra210_dmic_stereo_conv_enum, tegra210_dmic_get_control,
-                    tegra210_dmic_put_control),
+                    tegra210_dmic_stereo_conv_enum,
+                    tegra210_dmic_get_stereo_to_mono,
+                    tegra210_dmic_put_stereo_to_mono),
        SOC_ENUM_EXT("OSR Value", tegra210_dmic_osr_enum,
-                    tegra210_dmic_get_control, tegra210_dmic_put_control),
+                    tegra210_dmic_get_osr_val, tegra210_dmic_put_osr_val),
        SOC_ENUM_EXT("LR Polarity Select", tegra210_dmic_lrsel_enum,
-                    tegra210_dmic_get_control, tegra210_dmic_put_control),
+                    tegra210_dmic_get_pol_sel, tegra210_dmic_put_pol_sel),
 };
 
 static const struct snd_soc_component_driver tegra210_dmic_compnt = {
index 45f31cc..9552bbb 100644 (file)
@@ -302,85 +302,235 @@ static int tegra210_i2s_set_tdm_slot(struct snd_soc_dai *dai,
        return 0;
 }
 
-static int tegra210_i2s_set_dai_bclk_ratio(struct snd_soc_dai *dai,
-                                          unsigned int ratio)
+static int tegra210_i2s_get_loopback(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
 {
-       struct tegra210_i2s *i2s = snd_soc_dai_get_drvdata(dai);
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
 
-       i2s->bclk_ratio = ratio;
+       ucontrol->value.integer.value[0] = i2s->loopback;
 
        return 0;
 }
 
-static int tegra210_i2s_get_control(struct snd_kcontrol *kcontrol,
-                                   struct snd_ctl_elem_value *ucontrol)
+static int tegra210_i2s_put_loopback(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+       int value = ucontrol->value.integer.value[0];
+
+       if (value == i2s->loopback)
+               return 0;
+
+       i2s->loopback = value;
+
+       regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL, I2S_CTRL_LPBK_MASK,
+                          i2s->loopback << I2S_CTRL_LPBK_SHIFT);
+
+       return 1;
+}
+
+static int tegra210_i2s_get_fsync_width(struct snd_kcontrol *kcontrol,
+                                       struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
-       long *uctl_val = &ucontrol->value.integer.value[0];
-
-       if (strstr(kcontrol->id.name, "Loopback"))
-               *uctl_val = i2s->loopback;
-       else if (strstr(kcontrol->id.name, "FSYNC Width"))
-               *uctl_val = i2s->fsync_width;
-       else if (strstr(kcontrol->id.name, "Capture Stereo To Mono"))
-               *uctl_val = i2s->stereo_to_mono[I2S_TX_PATH];
-       else if (strstr(kcontrol->id.name, "Capture Mono To Stereo"))
-               *uctl_val = i2s->mono_to_stereo[I2S_TX_PATH];
-       else if (strstr(kcontrol->id.name, "Playback Stereo To Mono"))
-               *uctl_val = i2s->stereo_to_mono[I2S_RX_PATH];
-       else if (strstr(kcontrol->id.name, "Playback Mono To Stereo"))
-               *uctl_val = i2s->mono_to_stereo[I2S_RX_PATH];
-       else if (strstr(kcontrol->id.name, "Playback FIFO Threshold"))
-               *uctl_val = i2s->rx_fifo_th;
-       else if (strstr(kcontrol->id.name, "BCLK Ratio"))
-               *uctl_val = i2s->bclk_ratio;
+
+       ucontrol->value.integer.value[0] = i2s->fsync_width;
 
        return 0;
 }
 
-static int tegra210_i2s_put_control(struct snd_kcontrol *kcontrol,
-                                   struct snd_ctl_elem_value *ucontrol)
+static int tegra210_i2s_put_fsync_width(struct snd_kcontrol *kcontrol,
+                                       struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
        int value = ucontrol->value.integer.value[0];
 
-       if (strstr(kcontrol->id.name, "Loopback")) {
-               i2s->loopback = value;
+       if (value == i2s->fsync_width)
+               return 0;
 
-               regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
-                                  I2S_CTRL_LPBK_MASK,
-                                  i2s->loopback << I2S_CTRL_LPBK_SHIFT);
+       i2s->fsync_width = value;
 
-       } else if (strstr(kcontrol->id.name, "FSYNC Width")) {
-               /*
-                * Frame sync width is used only for FSYNC modes and not
-                * applicable for LRCK modes. Reset value for this field is "0",
-                * which means the width is one bit clock wide.
-                * The width requirement may depend on the codec and in such
-                * cases mixer control is used to update custom values. A value
-                * of "N" here means, width is "N + 1" bit clock wide.
-                */
-               i2s->fsync_width = value;
-
-               regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
-                                  I2S_CTRL_FSYNC_WIDTH_MASK,
-                                  i2s->fsync_width << I2S_FSYNC_WIDTH_SHIFT);
-
-       } else if (strstr(kcontrol->id.name, "Capture Stereo To Mono")) {
-               i2s->stereo_to_mono[I2S_TX_PATH] = value;
-       } else if (strstr(kcontrol->id.name, "Capture Mono To Stereo")) {
-               i2s->mono_to_stereo[I2S_TX_PATH] = value;
-       } else if (strstr(kcontrol->id.name, "Playback Stereo To Mono")) {
-               i2s->stereo_to_mono[I2S_RX_PATH] = value;
-       } else if (strstr(kcontrol->id.name, "Playback Mono To Stereo")) {
-               i2s->mono_to_stereo[I2S_RX_PATH] = value;
-       } else if (strstr(kcontrol->id.name, "Playback FIFO Threshold")) {
-               i2s->rx_fifo_th = value;
-       } else if (strstr(kcontrol->id.name, "BCLK Ratio")) {
-               i2s->bclk_ratio = value;
-       }
+       /*
+        * Frame sync width is used only for FSYNC modes and not
+        * applicable for LRCK modes. Reset value for this field is "0",
+        * which means the width is one bit clock wide.
+        * The width requirement may depend on the codec and in such
+        * cases mixer control is used to update custom values. A value
+        * of "N" here means, width is "N + 1" bit clock wide.
+        */
+       regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL,
+                          I2S_CTRL_FSYNC_WIDTH_MASK,
+                          i2s->fsync_width << I2S_FSYNC_WIDTH_SHIFT);
+
+       return 1;
+}
+
+static int tegra210_i2s_cget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+       ucontrol->value.enumerated.item[0] = i2s->stereo_to_mono[I2S_TX_PATH];
+
+       return 0;
+}
+
+static int tegra210_i2s_cput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == i2s->stereo_to_mono[I2S_TX_PATH])
+               return 0;
+
+       i2s->stereo_to_mono[I2S_TX_PATH] = value;
+
+       return 1;
+}
+
+static int tegra210_i2s_cget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+       ucontrol->value.enumerated.item[0] = i2s->mono_to_stereo[I2S_TX_PATH];
+
+       return 0;
+}
+
+static int tegra210_i2s_cput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == i2s->mono_to_stereo[I2S_TX_PATH])
+               return 0;
+
+       i2s->mono_to_stereo[I2S_TX_PATH] = value;
+
+       return 1;
+}
+
+static int tegra210_i2s_pget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+       ucontrol->value.enumerated.item[0] = i2s->stereo_to_mono[I2S_RX_PATH];
+
+       return 0;
+}
+
+static int tegra210_i2s_pput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == i2s->stereo_to_mono[I2S_RX_PATH])
+               return 0;
+
+       i2s->stereo_to_mono[I2S_RX_PATH] = value;
+
+       return 1;
+}
+
+static int tegra210_i2s_pget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+       ucontrol->value.enumerated.item[0] = i2s->mono_to_stereo[I2S_RX_PATH];
+
+       return 0;
+}
+
+static int tegra210_i2s_pput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == i2s->mono_to_stereo[I2S_RX_PATH])
+               return 0;
+
+       i2s->mono_to_stereo[I2S_RX_PATH] = value;
+
+       return 1;
+}
+
+static int tegra210_i2s_pget_fifo_th(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+       ucontrol->value.integer.value[0] = i2s->rx_fifo_th;
+
+       return 0;
+}
+
+static int tegra210_i2s_pput_fifo_th(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+       int value = ucontrol->value.integer.value[0];
+
+       if (value == i2s->rx_fifo_th)
+               return 0;
+
+       i2s->rx_fifo_th = value;
+
+       return 1;
+}
+
+static int tegra210_i2s_get_bclk_ratio(struct snd_kcontrol *kcontrol,
+                                      struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+
+       ucontrol->value.integer.value[0] = i2s->bclk_ratio;
+
+       return 0;
+}
+
+static int tegra210_i2s_put_bclk_ratio(struct snd_kcontrol *kcontrol,
+                                      struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt);
+       int value = ucontrol->value.integer.value[0];
+
+       if (value == i2s->bclk_ratio)
+               return 0;
+
+       i2s->bclk_ratio = value;
+
+       return 1;
+}
+
+static int tegra210_i2s_set_dai_bclk_ratio(struct snd_soc_dai *dai,
+                                          unsigned int ratio)
+{
+       struct tegra210_i2s *i2s = snd_soc_dai_get_drvdata(dai);
+
+       i2s->bclk_ratio = ratio;
 
        return 0;
 }
@@ -598,22 +748,28 @@ static const struct soc_enum tegra210_i2s_stereo_conv_enum =
                        tegra210_i2s_stereo_conv_text);
 
 static const struct snd_kcontrol_new tegra210_i2s_controls[] = {
-       SOC_SINGLE_EXT("Loopback", 0, 0, 1, 0, tegra210_i2s_get_control,
-                      tegra210_i2s_put_control),
-       SOC_SINGLE_EXT("FSYNC Width", 0, 0, 255, 0, tegra210_i2s_get_control,
-                      tegra210_i2s_put_control),
+       SOC_SINGLE_EXT("Loopback", 0, 0, 1, 0, tegra210_i2s_get_loopback,
+                      tegra210_i2s_put_loopback),
+       SOC_SINGLE_EXT("FSYNC Width", 0, 0, 255, 0,
+                      tegra210_i2s_get_fsync_width,
+                      tegra210_i2s_put_fsync_width),
        SOC_ENUM_EXT("Capture Stereo To Mono", tegra210_i2s_stereo_conv_enum,
-                    tegra210_i2s_get_control, tegra210_i2s_put_control),
+                    tegra210_i2s_cget_stereo_to_mono,
+                    tegra210_i2s_cput_stereo_to_mono),
        SOC_ENUM_EXT("Capture Mono To Stereo", tegra210_i2s_mono_conv_enum,
-                    tegra210_i2s_get_control, tegra210_i2s_put_control),
+                    tegra210_i2s_cget_mono_to_stereo,
+                    tegra210_i2s_cput_mono_to_stereo),
        SOC_ENUM_EXT("Playback Stereo To Mono", tegra210_i2s_stereo_conv_enum,
-                    tegra210_i2s_get_control, tegra210_i2s_put_control),
+                    tegra210_i2s_pget_mono_to_stereo,
+                    tegra210_i2s_pput_mono_to_stereo),
        SOC_ENUM_EXT("Playback Mono To Stereo", tegra210_i2s_mono_conv_enum,
-                    tegra210_i2s_get_control, tegra210_i2s_put_control),
+                    tegra210_i2s_pget_stereo_to_mono,
+                    tegra210_i2s_pput_stereo_to_mono),
        SOC_SINGLE_EXT("Playback FIFO Threshold", 0, 0, I2S_RX_FIFO_DEPTH - 1,
-                      0, tegra210_i2s_get_control, tegra210_i2s_put_control),
-       SOC_SINGLE_EXT("BCLK Ratio", 0, 0, INT_MAX, 0, tegra210_i2s_get_control,
-                      tegra210_i2s_put_control),
+                      0, tegra210_i2s_pget_fifo_th, tegra210_i2s_pput_fifo_th),
+       SOC_SINGLE_EXT("BCLK Ratio", 0, 0, INT_MAX, 0,
+                      tegra210_i2s_get_bclk_ratio,
+                      tegra210_i2s_put_bclk_ratio),
 };
 
 static const struct snd_soc_dapm_widget tegra210_i2s_widgets[] = {
index 55e6177..51d3755 100644 (file)
@@ -192,24 +192,24 @@ static int tegra210_mixer_get_gain(struct snd_kcontrol *kcontrol,
        return 0;
 }
 
-static int tegra210_mixer_put_gain(struct snd_kcontrol *kcontrol,
-                                  struct snd_ctl_elem_value *ucontrol)
+static int tegra210_mixer_apply_gain(struct snd_kcontrol *kcontrol,
+                                    struct snd_ctl_elem_value *ucontrol,
+                                    bool instant_gain)
 {
        struct soc_mixer_control *mc =
                (struct soc_mixer_control *)kcontrol->private_value;
        struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_mixer *mixer = snd_soc_component_get_drvdata(cmpnt);
        unsigned int reg = mc->reg, id;
-       bool instant_gain = false;
        int err;
 
-       if (strstr(kcontrol->id.name, "Instant Gain Volume"))
-               instant_gain = true;
-
        /* Save gain value for specific MIXER input */
        id = (reg - TEGRA210_MIXER_GAIN_CFG_RAM_ADDR_0) /
             TEGRA210_MIXER_GAIN_CFG_RAM_ADDR_STRIDE;
 
+       if (mixer->gain_value[id] == ucontrol->value.integer.value[0])
+               return 0;
+
        mixer->gain_value[id] = ucontrol->value.integer.value[0];
 
        err = tegra210_mixer_configure_gain(cmpnt, id, instant_gain);
@@ -221,6 +221,18 @@ static int tegra210_mixer_put_gain(struct snd_kcontrol *kcontrol,
        return 1;
 }
 
+static int tegra210_mixer_put_gain(struct snd_kcontrol *kcontrol,
+                                  struct snd_ctl_elem_value *ucontrol)
+{
+       return tegra210_mixer_apply_gain(kcontrol, ucontrol, false);
+}
+
+static int tegra210_mixer_put_instant_gain(struct snd_kcontrol *kcontrol,
+                                          struct snd_ctl_elem_value *ucontrol)
+{
+       return tegra210_mixer_apply_gain(kcontrol, ucontrol, true);
+}
+
 static int tegra210_mixer_set_audio_cif(struct tegra210_mixer *mixer,
                                        struct snd_pcm_hw_params *params,
                                        unsigned int reg,
@@ -388,7 +400,7 @@ ADDER_CTRL_DECL(adder5, TEGRA210_MIXER_TX5_ADDER_CONFIG);
        SOC_SINGLE_EXT("RX" #id " Instant Gain Volume",         \
                       MIXER_GAIN_CFG_RAM_ADDR((id) - 1), 0,    \
                       0x20000, 0, tegra210_mixer_get_gain,     \
-                      tegra210_mixer_put_gain),
+                      tegra210_mixer_put_instant_gain),
 
 /* Volume controls for all MIXER inputs */
 static const struct snd_kcontrol_new tegra210_mixer_gain_ctls[] = {
index 7b9c700..85b1558 100644 (file)
@@ -136,7 +136,7 @@ static int tegra210_mvc_put_mute(struct snd_kcontrol *kcontrol,
        struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_mvc *mvc = snd_soc_component_get_drvdata(cmpnt);
        unsigned int value;
-       u8 mute_mask;
+       u8 new_mask, old_mask;
        int err;
 
        pm_runtime_get_sync(cmpnt->dev);
@@ -148,11 +148,19 @@ static int tegra210_mvc_put_mute(struct snd_kcontrol *kcontrol,
        if (err < 0)
                goto end;
 
-       mute_mask = ucontrol->value.integer.value[0];
+       regmap_read(mvc->regmap, TEGRA210_MVC_CTRL, &value);
+
+       old_mask = (value >> TEGRA210_MVC_MUTE_SHIFT) & TEGRA210_MUTE_MASK_EN;
+       new_mask = ucontrol->value.integer.value[0];
+
+       if (new_mask == old_mask) {
+               err = 0;
+               goto end;
+       }
 
        err = regmap_update_bits(mvc->regmap, mc->reg,
                                 TEGRA210_MVC_MUTE_MASK,
-                                mute_mask << TEGRA210_MVC_MUTE_SHIFT);
+                                new_mask << TEGRA210_MVC_MUTE_SHIFT);
        if (err < 0)
                goto end;
 
@@ -195,7 +203,7 @@ static int tegra210_mvc_put_vol(struct snd_kcontrol *kcontrol,
        unsigned int reg = mc->reg;
        unsigned int value;
        u8 chan;
-       int err;
+       int err, old_volume;
 
        pm_runtime_get_sync(cmpnt->dev);
 
@@ -207,10 +215,16 @@ static int tegra210_mvc_put_vol(struct snd_kcontrol *kcontrol,
                goto end;
 
        chan = (reg - TEGRA210_MVC_TARGET_VOL) / REG_SIZE;
+       old_volume = mvc->volume[chan];
 
        tegra210_mvc_conv_vol(mvc, chan,
                              ucontrol->value.integer.value[0]);
 
+       if (mvc->volume[chan] == old_volume) {
+               err = 0;
+               goto end;
+       }
+
        /* Configure init volume same as target volume */
        regmap_write(mvc->regmap,
                TEGRA210_MVC_REG_OFFSET(TEGRA210_MVC_INIT_VOL, chan),
@@ -275,7 +289,7 @@ static int tegra210_mvc_get_curve_type(struct snd_kcontrol *kcontrol,
        struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_mvc *mvc = snd_soc_component_get_drvdata(cmpnt);
 
-       ucontrol->value.integer.value[0] = mvc->curve_type;
+       ucontrol->value.enumerated.item[0] = mvc->curve_type;
 
        return 0;
 }
@@ -285,7 +299,7 @@ static int tegra210_mvc_put_curve_type(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_mvc *mvc = snd_soc_component_get_drvdata(cmpnt);
-       int value;
+       unsigned int value;
 
        regmap_read(mvc->regmap, TEGRA210_MVC_ENABLE, &value);
        if (value & TEGRA210_MVC_EN) {
@@ -294,10 +308,10 @@ static int tegra210_mvc_put_curve_type(struct snd_kcontrol *kcontrol,
                return -EINVAL;
        }
 
-       if (mvc->curve_type == ucontrol->value.integer.value[0])
+       if (mvc->curve_type == ucontrol->value.enumerated.item[0])
                return 0;
 
-       mvc->curve_type = ucontrol->value.integer.value[0];
+       mvc->curve_type = ucontrol->value.enumerated.item[0];
 
        tegra210_mvc_reset_vol_settings(mvc, cmpnt->dev);
 
index dc477ee..7a2227e 100644 (file)
@@ -3244,46 +3244,107 @@ static int tegra210_sfc_init(struct snd_soc_dapm_widget *w,
        return tegra210_sfc_write_coeff_ram(cmpnt);
 }
 
-static int tegra210_sfc_get_control(struct snd_kcontrol *kcontrol,
+static int tegra210_sfc_iget_stereo_to_mono(struct snd_kcontrol *kcontrol,
                                    struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
 
-       if (strstr(kcontrol->id.name, "Input Stereo To Mono"))
-               ucontrol->value.integer.value[0] =
-                       sfc->stereo_to_mono[SFC_RX_PATH];
-       else if (strstr(kcontrol->id.name, "Input Mono To Stereo"))
-               ucontrol->value.integer.value[0] =
-                       sfc->mono_to_stereo[SFC_RX_PATH];
-       else if (strstr(kcontrol->id.name, "Output Stereo To Mono"))
-               ucontrol->value.integer.value[0] =
-                       sfc->stereo_to_mono[SFC_TX_PATH];
-       else if (strstr(kcontrol->id.name, "Output Mono To Stereo"))
-               ucontrol->value.integer.value[0] =
-                       sfc->mono_to_stereo[SFC_TX_PATH];
+       ucontrol->value.enumerated.item[0] = sfc->stereo_to_mono[SFC_RX_PATH];
 
        return 0;
 }
 
-static int tegra210_sfc_put_control(struct snd_kcontrol *kcontrol,
+static int tegra210_sfc_iput_stereo_to_mono(struct snd_kcontrol *kcontrol,
                                    struct snd_ctl_elem_value *ucontrol)
 {
        struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
        struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
-       int value = ucontrol->value.integer.value[0];
-
-       if (strstr(kcontrol->id.name, "Input Stereo To Mono"))
-               sfc->stereo_to_mono[SFC_RX_PATH] = value;
-       else if (strstr(kcontrol->id.name, "Input Mono To Stereo"))
-               sfc->mono_to_stereo[SFC_RX_PATH] = value;
-       else if (strstr(kcontrol->id.name, "Output Stereo To Mono"))
-               sfc->stereo_to_mono[SFC_TX_PATH] = value;
-       else if (strstr(kcontrol->id.name, "Output Mono To Stereo"))
-               sfc->mono_to_stereo[SFC_TX_PATH] = value;
-       else
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == sfc->stereo_to_mono[SFC_RX_PATH])
+               return 0;
+
+       sfc->stereo_to_mono[SFC_RX_PATH] = value;
+
+       return 1;
+}
+
+static int tegra210_sfc_iget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                   struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+
+       ucontrol->value.enumerated.item[0] = sfc->mono_to_stereo[SFC_RX_PATH];
+
+       return 0;
+}
+
+static int tegra210_sfc_iput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                   struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == sfc->mono_to_stereo[SFC_RX_PATH])
                return 0;
 
+       sfc->mono_to_stereo[SFC_RX_PATH] = value;
+
+       return 1;
+}
+
+static int tegra210_sfc_oget_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                   struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+
+       ucontrol->value.enumerated.item[0] = sfc->stereo_to_mono[SFC_TX_PATH];
+
+       return 0;
+}
+
+static int tegra210_sfc_oput_stereo_to_mono(struct snd_kcontrol *kcontrol,
+                                   struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == sfc->stereo_to_mono[SFC_TX_PATH])
+               return 0;
+
+       sfc->stereo_to_mono[SFC_TX_PATH] = value;
+
+       return 1;
+}
+
+static int tegra210_sfc_oget_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                   struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+
+       ucontrol->value.enumerated.item[0] = sfc->mono_to_stereo[SFC_TX_PATH];
+
+       return 0;
+}
+
+static int tegra210_sfc_oput_mono_to_stereo(struct snd_kcontrol *kcontrol,
+                                   struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol);
+       struct tegra210_sfc *sfc = snd_soc_component_get_drvdata(cmpnt);
+       unsigned int value = ucontrol->value.enumerated.item[0];
+
+       if (value == sfc->mono_to_stereo[SFC_TX_PATH])
+               return 0;
+
+       sfc->mono_to_stereo[SFC_TX_PATH] = value;
+
        return 1;
 }
 
@@ -3384,13 +3445,17 @@ static const struct soc_enum tegra210_sfc_mono_conv_enum =
 
 static const struct snd_kcontrol_new tegra210_sfc_controls[] = {
        SOC_ENUM_EXT("Input Stereo To Mono", tegra210_sfc_stereo_conv_enum,
-               tegra210_sfc_get_control, tegra210_sfc_put_control),
+                    tegra210_sfc_iget_stereo_to_mono,
+                    tegra210_sfc_iput_stereo_to_mono),
        SOC_ENUM_EXT("Input Mono To Stereo", tegra210_sfc_mono_conv_enum,
-               tegra210_sfc_get_control, tegra210_sfc_put_control),
+                    tegra210_sfc_iget_mono_to_stereo,
+                    tegra210_sfc_iput_mono_to_stereo),
        SOC_ENUM_EXT("Output Stereo To Mono", tegra210_sfc_stereo_conv_enum,
-               tegra210_sfc_get_control, tegra210_sfc_put_control),
+                    tegra210_sfc_oget_stereo_to_mono,
+                    tegra210_sfc_oput_stereo_to_mono),
        SOC_ENUM_EXT("Output Mono To Stereo", tegra210_sfc_mono_conv_enum,
-               tegra210_sfc_get_control, tegra210_sfc_put_control),
+                    tegra210_sfc_oget_mono_to_stereo,
+                    tegra210_sfc_oput_mono_to_stereo),
 };
 
 static const struct snd_soc_component_driver tegra210_sfc_cmpnt = {
index 95ec8ee..cec6e91 100644 (file)
@@ -581,6 +581,12 @@ static int snd_usb_hw_free(struct snd_pcm_substream *substream)
        return 0;
 }
 
+/* free-wheeling mode? (e.g. dmix) */
+static int in_free_wheeling_mode(struct snd_pcm_runtime *runtime)
+{
+       return runtime->stop_threshold > runtime->buffer_size;
+}
+
 /* check whether early start is needed for playback stream */
 static int lowlatency_playback_available(struct snd_pcm_runtime *runtime,
                                         struct snd_usb_substream *subs)
@@ -592,8 +598,7 @@ static int lowlatency_playback_available(struct snd_pcm_runtime *runtime,
        /* disabled via module option? */
        if (!chip->lowlatency)
                return false;
-       /* free-wheeling mode? (e.g. dmix) */
-       if (runtime->stop_threshold > runtime->buffer_size)
+       if (in_free_wheeling_mode(runtime))
                return false;
        /* implicit feedback mode has own operation mode */
        if (snd_usb_endpoint_implicit_feedback_sink(subs->data_endpoint))
@@ -635,7 +640,8 @@ static int snd_usb_pcm_prepare(struct snd_pcm_substream *substream)
        runtime->delay = 0;
 
        subs->lowlatency_playback = lowlatency_playback_available(runtime, subs);
-       if (!subs->lowlatency_playback)
+       if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
+           !subs->lowlatency_playback)
                ret = start_endpoints(subs);
 
  unlock:
@@ -1552,6 +1558,8 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea
                                              subs);
                if (subs->lowlatency_playback &&
                    cmd == SNDRV_PCM_TRIGGER_START) {
+                       if (in_free_wheeling_mode(substream->runtime))
+                               subs->lowlatency_playback = false;
                        err = start_endpoints(subs);
                        if (err < 0) {
                                snd_usb_endpoint_set_callback(subs->data_endpoint,
index 2cb0a19..4041748 100644 (file)
@@ -358,6 +358,7 @@ static struct xenbus_driver xen_driver = {
        .probe = xen_drv_probe,
        .remove = xen_drv_remove,
        .otherend_changed = sndback_changed,
+       .not_essential = true,
 };
 
 static int __init xen_drv_init(void)
index a7e54a0..3e8df50 100644 (file)
@@ -7,6 +7,7 @@
 #include <assert.h>
 #include <linux/build_bug.h>
 #include <linux/compiler.h>
+#include <linux/math.h>
 #include <endian.h>
 #include <byteswap.h>
 
@@ -14,8 +15,6 @@
 #define UINT_MAX       (~0U)
 #endif
 
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
-
 #define PERF_ALIGN(x, a)       __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
 #define __PERF_ALIGN_MASK(x, mask)     (((x)+(mask))&~(mask))
 
        _min1 < _min2 ? _min1 : _min2; })
 #endif
 
-#ifndef roundup
-#define roundup(x, y) (                                \
-{                                                      \
-       const typeof(y) __y = y;                       \
-       (((x) + (__y - 1)) / __y) * __y;               \
-}                                                      \
-)
-#endif
-
 #ifndef BUG_ON
 #ifdef NDEBUG
 #define BUG_ON(cond) do { if (cond) {} } while (0)
@@ -104,16 +94,6 @@ int scnprintf_pad(char * buf, size_t size, const char * fmt, ...);
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
-#define round_down(x, y) ((x) & ~__round_mask(x, y))
-
 #define current_gfp_context(k) 0
 #define synchronize_rcu()
 
diff --git a/tools/include/linux/math.h b/tools/include/linux/math.h
new file mode 100644 (file)
index 0000000..4e7af99
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef _TOOLS_MATH_H
+#define _TOOLS_MATH_H
+
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+#ifndef roundup
+#define roundup(x, y) (                                \
+{                                                      \
+       const typeof(y) __y = y;                       \
+       (((x) + (__y - 1)) / __y) * __y;               \
+}                                                      \
+)
+#endif
+
+#endif
index b3610fd..eebd389 100644 (file)
@@ -7,24 +7,23 @@
 
 /* This struct should be in sync with struct rtnl_link_stats64 */
 struct rtnl_link_stats {
-       __u32   rx_packets;             /* total packets received       */
-       __u32   tx_packets;             /* total packets transmitted    */
-       __u32   rx_bytes;               /* total bytes received         */
-       __u32   tx_bytes;               /* total bytes transmitted      */
-       __u32   rx_errors;              /* bad packets received         */
-       __u32   tx_errors;              /* packet transmit problems     */
-       __u32   rx_dropped;             /* no space in linux buffers    */
-       __u32   tx_dropped;             /* no space available in linux  */
-       __u32   multicast;              /* multicast packets received   */
+       __u32   rx_packets;
+       __u32   tx_packets;
+       __u32   rx_bytes;
+       __u32   tx_bytes;
+       __u32   rx_errors;
+       __u32   tx_errors;
+       __u32   rx_dropped;
+       __u32   tx_dropped;
+       __u32   multicast;
        __u32   collisions;
-
        /* detailed rx_errors: */
        __u32   rx_length_errors;
-       __u32   rx_over_errors;         /* receiver ring buff overflow  */
-       __u32   rx_crc_errors;          /* recved pkt with crc error    */
-       __u32   rx_frame_errors;        /* recv'd frame alignment error */
-       __u32   rx_fifo_errors;         /* recv'r fifo overrun          */
-       __u32   rx_missed_errors;       /* receiver missed packet       */
+       __u32   rx_over_errors;
+       __u32   rx_crc_errors;
+       __u32   rx_frame_errors;
+       __u32   rx_fifo_errors;
+       __u32   rx_missed_errors;
 
        /* detailed tx_errors */
        __u32   tx_aborted_errors;
@@ -37,29 +36,201 @@ struct rtnl_link_stats {
        __u32   rx_compressed;
        __u32   tx_compressed;
 
-       __u32   rx_nohandler;           /* dropped, no handler found    */
+       __u32   rx_nohandler;
 };
 
-/* The main device statistics structure */
+/**
+ * struct rtnl_link_stats64 - The main device statistics structure.
+ *
+ * @rx_packets: Number of good packets received by the interface.
+ *   For hardware interfaces counts all good packets received from the device
+ *   by the host, including packets which host had to drop at various stages
+ *   of processing (even in the driver).
+ *
+ * @tx_packets: Number of packets successfully transmitted.
+ *   For hardware interfaces counts packets which host was able to successfully
+ *   hand over to the device, which does not necessarily mean that packets
+ *   had been successfully transmitted out of the device, only that device
+ *   acknowledged it copied them out of host memory.
+ *
+ * @rx_bytes: Number of good received bytes, corresponding to @rx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @rx_errors: Total number of bad packets received on this network device.
+ *   This counter must include events counted by @rx_length_errors,
+ *   @rx_crc_errors, @rx_frame_errors and other errors not otherwise
+ *   counted.
+ *
+ * @tx_errors: Total number of transmit problems.
+ *   This counter must include events counter by @tx_aborted_errors,
+ *   @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors,
+ *   @tx_window_errors and other errors not otherwise counted.
+ *
+ * @rx_dropped: Number of packets received but not processed,
+ *   e.g. due to lack of resources or unsupported protocol.
+ *   For hardware interfaces this counter may include packets discarded
+ *   due to L2 address filtering but should not include packets dropped
+ *   by the device due to buffer exhaustion which are counted separately in
+ *   @rx_missed_errors (since procfs folds those two counters together).
+ *
+ * @tx_dropped: Number of packets dropped on their way to transmission,
+ *   e.g. due to lack of resources.
+ *
+ * @multicast: Multicast packets received.
+ *   For hardware interfaces this statistic is commonly calculated
+ *   at the device level (unlike @rx_packets) and therefore may include
+ *   packets which did not reach the host.
+ *
+ *   For IEEE 802.3 devices this counter may be equivalent to:
+ *
+ *    - 30.3.1.1.21 aMulticastFramesReceivedOK
+ *
+ * @collisions: Number of collisions during packet transmissions.
+ *
+ * @rx_length_errors: Number of packets dropped due to invalid length.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to a sum
+ *   of the following attributes:
+ *
+ *    - 30.3.1.1.23 aInRangeLengthErrors
+ *    - 30.3.1.1.24 aOutOfRangeLengthField
+ *    - 30.3.1.1.25 aFrameTooLongErrors
+ *
+ * @rx_over_errors: Receiver FIFO overflow event counter.
+ *
+ *   Historically the count of overflow events. Such events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   The recommended interpretation for high speed interfaces is -
+ *   number of packets dropped because they did not fit into buffers
+ *   provided by the host, e.g. packets larger than MTU or next buffer
+ *   in the ring was not available for a scatter transfer.
+ *
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   This statistics was historically used interchangeably with
+ *   @rx_fifo_errors.
+ *
+ *   This statistic corresponds to hardware events and is not commonly used
+ *   on software devices.
+ *
+ * @rx_crc_errors: Number of packets received with a CRC error.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.6 aFrameCheckSequenceErrors
+ *
+ * @rx_frame_errors: Receiver frame alignment errors.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to:
+ *
+ *    - 30.3.1.1.7 aAlignmentErrors
+ *
+ * @rx_fifo_errors: Receiver FIFO error counter.
+ *
+ *   Historically the count of overflow events. Those events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   This statistics was used interchangeably with @rx_over_errors.
+ *   Not recommended for use in drivers for high speed interfaces.
+ *
+ *   This statistic is used on software devices, e.g. to count software
+ *   packet queue overflow (can) or sequencing errors (GRE).
+ *
+ * @rx_missed_errors: Count of packets missed by the host.
+ *   Folded into the "drop" counter in `/proc/net/dev`.
+ *
+ *   Counts number of packets dropped by the device due to lack
+ *   of buffer space. This usually indicates that the host interface
+ *   is slower than the network interface, or host is not keeping up
+ *   with the receive packet rate.
+ *
+ *   This statistic corresponds to hardware events and is not used
+ *   on software devices.
+ *
+ * @tx_aborted_errors:
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *   For IEEE 802.3 devices capable of half-duplex operation this counter
+ *   must be equivalent to:
+ *
+ *    - 30.3.1.1.11 aFramesAbortedDueToXSColls
+ *
+ *   High speed interfaces may use this counter as a general device
+ *   discard counter.
+ *
+ * @tx_carrier_errors: Number of frame transmission errors due to loss
+ *   of carrier during transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.13 aCarrierSenseErrors
+ *
+ * @tx_fifo_errors: Number of frame transmission errors due to device
+ *   FIFO underrun / underflow. This condition occurs when the device
+ *   begins transmission of a frame but is unable to deliver the
+ *   entire frame to the transmitter in time for transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for
+ *   old half-duplex Ethernet.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices possibly equivalent to:
+ *
+ *    - 30.3.2.1.4 aSQETestErrors
+ *
+ * @tx_window_errors: Number of frame transmission errors due
+ *   to late collisions (for Ethernet - after the first 64B of transmission).
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.10 aLateCollisions
+ *
+ * @rx_compressed: Number of correctly received compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @tx_compressed: Number of transmitted compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @rx_nohandler: Number of packets received on the interface
+ *   but dropped by the networking stack because the device is
+ *   not designated to receive packets (e.g. backup link in a bond).
+ */
 struct rtnl_link_stats64 {
-       __u64   rx_packets;             /* total packets received       */
-       __u64   tx_packets;             /* total packets transmitted    */
-       __u64   rx_bytes;               /* total bytes received         */
-       __u64   tx_bytes;               /* total bytes transmitted      */
-       __u64   rx_errors;              /* bad packets received         */
-       __u64   tx_errors;              /* packet transmit problems     */
-       __u64   rx_dropped;             /* no space in linux buffers    */
-       __u64   tx_dropped;             /* no space available in linux  */
-       __u64   multicast;              /* multicast packets received   */
+       __u64   rx_packets;
+       __u64   tx_packets;
+       __u64   rx_bytes;
+       __u64   tx_bytes;
+       __u64   rx_errors;
+       __u64   tx_errors;
+       __u64   rx_dropped;
+       __u64   tx_dropped;
+       __u64   multicast;
        __u64   collisions;
 
        /* detailed rx_errors: */
        __u64   rx_length_errors;
-       __u64   rx_over_errors;         /* receiver ring buff overflow  */
-       __u64   rx_crc_errors;          /* recved pkt with crc error    */
-       __u64   rx_frame_errors;        /* recv'd frame alignment error */
-       __u64   rx_fifo_errors;         /* recv'r fifo overrun          */
-       __u64   rx_missed_errors;       /* receiver missed packet       */
+       __u64   rx_over_errors;
+       __u64   rx_crc_errors;
+       __u64   rx_frame_errors;
+       __u64   rx_fifo_errors;
+       __u64   rx_missed_errors;
 
        /* detailed tx_errors */
        __u64   tx_aborted_errors;
@@ -71,8 +242,7 @@ struct rtnl_link_stats64 {
        /* for cslip etc */
        __u64   rx_compressed;
        __u64   tx_compressed;
-
-       __u64   rx_nohandler;           /* dropped, no handler found    */
+       __u64   rx_nohandler;
 };
 
 /* The struct should be in sync with struct ifmap */
@@ -170,12 +340,29 @@ enum {
        IFLA_PROP_LIST,
        IFLA_ALT_IFNAME, /* Alternative ifname */
        IFLA_PERM_ADDRESS,
+       IFLA_PROTO_DOWN_REASON,
+
+       /* device (sysfs) name as parent, used instead
+        * of IFLA_LINK where there's no parent netdev
+        */
+       IFLA_PARENT_DEV_NAME,
+       IFLA_PARENT_DEV_BUS_NAME,
+
        __IFLA_MAX
 };
 
 
 #define IFLA_MAX (__IFLA_MAX - 1)
 
+enum {
+       IFLA_PROTO_DOWN_REASON_UNSPEC,
+       IFLA_PROTO_DOWN_REASON_MASK,    /* u32, mask for reason bits */
+       IFLA_PROTO_DOWN_REASON_VALUE,   /* u32, reason bit value */
+
+       __IFLA_PROTO_DOWN_REASON_CNT,
+       IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1
+};
+
 /* backwards compatibility for userspace */
 #ifndef __KERNEL__
 #define IFLA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
@@ -293,6 +480,7 @@ enum {
        IFLA_BR_MCAST_MLD_VERSION,
        IFLA_BR_VLAN_STATS_PER_PORT,
        IFLA_BR_MULTI_BOOLOPT,
+       IFLA_BR_MCAST_QUERIER_STATE,
        __IFLA_BR_MAX,
 };
 
@@ -346,6 +534,8 @@ enum {
        IFLA_BRPORT_BACKUP_PORT,
        IFLA_BRPORT_MRP_RING_OPEN,
        IFLA_BRPORT_MRP_IN_OPEN,
+       IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+       IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
        __IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -433,6 +623,7 @@ enum macvlan_macaddr_mode {
 };
 
 #define MACVLAN_FLAG_NOPROMISC 1
+#define MACVLAN_FLAG_NODST     2 /* skip dst macvlan if matching src macvlan */
 
 /* VRF section */
 enum {
@@ -597,6 +788,18 @@ enum ifla_geneve_df {
        GENEVE_DF_MAX = __GENEVE_DF_END - 1,
 };
 
+/* Bareudp section  */
+enum {
+       IFLA_BAREUDP_UNSPEC,
+       IFLA_BAREUDP_PORT,
+       IFLA_BAREUDP_ETHERTYPE,
+       IFLA_BAREUDP_SRCPORT_MIN,
+       IFLA_BAREUDP_MULTIPROTO_MODE,
+       __IFLA_BAREUDP_MAX
+};
+
+#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1)
+
 /* PPP section */
 enum {
        IFLA_PPP_UNSPEC,
@@ -899,7 +1102,14 @@ enum {
 #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
 
 
-/* HSR section */
+/* HSR/PRP section, both uses same interface */
+
+/* Different redundancy protocols for hsr device */
+enum {
+       HSR_PROTOCOL_HSR,
+       HSR_PROTOCOL_PRP,
+       HSR_PROTOCOL_MAX,
+};
 
 enum {
        IFLA_HSR_UNSPEC,
@@ -909,6 +1119,9 @@ enum {
        IFLA_HSR_SUPERVISION_ADDR,      /* Supervision frame multicast addr */
        IFLA_HSR_SEQ_NR,
        IFLA_HSR_VERSION,               /* HSR version */
+       IFLA_HSR_PROTOCOL,              /* Indicate different protocol than
+                                        * HSR. For example PRP.
+                                        */
        __IFLA_HSR_MAX,
 };
 
@@ -1033,6 +1246,8 @@ enum {
 #define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
 #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
 #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5           (1U << 4)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5            (1U << 5)
 
 enum {
        IFLA_RMNET_UNSPEC,
@@ -1048,4 +1263,14 @@ struct ifla_rmnet_flags {
        __u32   mask;
 };
 
+/* MCTP section */
+
+enum {
+       IFLA_MCTP_UNSPEC,
+       IFLA_MCTP_NET,
+       __IFLA_MCTP_MAX,
+};
+
+#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
index 81a4c54..4b384c9 100644 (file)
@@ -375,6 +375,7 @@ static int read_symbols(struct elf *elf)
                        return -1;
                }
                memset(sym, 0, sizeof(*sym));
+               INIT_LIST_HEAD(&sym->pv_target);
                sym->alias = sym;
 
                sym->idx = i;
index c90c708..bdf699f 100644 (file)
@@ -153,6 +153,10 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
            !strcmp(func->name, "_paravirt_ident_64"))
                return;
 
+       /* already added this function */
+       if (!list_empty(&func->pv_target))
+               return;
+
        list_add(&func->pv_target, &f->pv_ops[idx].targets);
        f->pv_ops[idx].clean = false;
 }
index 565fccd..016cff4 100644 (file)
@@ -1,5 +1,8 @@
 #ifndef _LINUX_LOCKDEP_H
 #define _LINUX_LOCKDEP_H
+
+#include <linux/spinlock.h>
+
 struct lock_class_key {
        unsigned int a;
 };
index 3763105..3cb5ac5 100644 (file)
 /x86_64/svm_int_ctl_test
 /x86_64/sync_regs_test
 /x86_64/tsc_msrs_test
+/x86_64/userspace_io_test
 /x86_64/userspace_msr_exit_test
 /x86_64/vmx_apic_access_test
 /x86_64/vmx_close_while_nested_test
 /x86_64/vmx_dirty_log_test
+/x86_64/vmx_invalid_nested_guest_state
 /x86_64/vmx_preemption_timer_test
 /x86_64/vmx_set_nested_state_test
 /x86_64/vmx_tsc_adjust_test
index c4e3471..17342b5 100644 (file)
@@ -59,10 +59,12 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
 TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
index 6a1a37f..2d62edc 100644 (file)
@@ -321,6 +321,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
 
 unsigned int vm_get_page_size(struct kvm_vm *vm);
 unsigned int vm_get_page_shift(struct kvm_vm *vm);
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
 uint64_t vm_get_max_gfn(struct kvm_vm *vm);
 int vm_get_fd(struct kvm_vm *vm);
 
index f968dfd..aed9dc3 100644 (file)
@@ -12,6 +12,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/resource.h>
 
 #include "test_util.h"
 
@@ -40,10 +41,39 @@ int main(int argc, char *argv[])
 {
        int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
        int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+       /*
+        * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
+        * an arbitrary number for everything else.
+        */
+       int nr_fds_wanted = kvm_max_vcpus + 100;
+       struct rlimit rl;
 
        pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
        pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
 
+       /*
+        * Check that we're allowed to open nr_fds_wanted file descriptors and
+        * try raising the limits if needed.
+        */
+       TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+       if (rl.rlim_cur < nr_fds_wanted) {
+               rl.rlim_cur = nr_fds_wanted;
+               if (rl.rlim_max < nr_fds_wanted) {
+                       int old_rlim_max = rl.rlim_max;
+                       rl.rlim_max = nr_fds_wanted;
+
+                       int r = setrlimit(RLIMIT_NOFILE, &rl);
+                       if (r < 0) {
+                               printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n",
+                                      old_rlim_max, nr_fds_wanted);
+                               exit(KSFT_SKIP);
+                       }
+               } else {
+                       TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+               }
+       }
+
        /*
         * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
         * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
index 3836322..ba1fdc3 100644 (file)
@@ -280,7 +280,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
 #ifdef __s390x__
        alignment = max(0x100000, alignment);
 #endif
-       guest_test_phys_mem = align_down(guest_test_virt_mem, alignment);
+       guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
 
        /* Set up the shared data structure test_args */
        test_args.vm = vm;
index 8f2e0bb..53d2b5d 100644 (file)
@@ -302,7 +302,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
                (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
 
        /* Limit physical addresses to PA-bits. */
-       vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+       vm->max_gfn = vm_compute_max_gfn(vm);
 
        /* Allocate and setup memory for guest. */
        vm->vpages_mapped = sparsebit_alloc();
@@ -2328,6 +2328,11 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm)
        return vm->page_shift;
 }
 
+unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm)
+{
+       return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+}
+
 uint64_t vm_get_max_gfn(struct kvm_vm *vm)
 {
        return vm->max_gfn;
index 82c39db..eef7b34 100644 (file)
@@ -1431,3 +1431,71 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
 
        return cpuid;
 }
+
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
+#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65
+
+static inline unsigned x86_family(unsigned int eax)
+{
+        unsigned int x86;
+
+        x86 = (eax >> 8) & 0xf;
+
+        if (x86 == 0xf)
+                x86 += (eax >> 20) & 0xff;
+
+        return x86;
+}
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
+{
+       const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
+       unsigned long ht_gfn, max_gfn, max_pfn;
+       uint32_t eax, ebx, ecx, edx, max_ext_leaf;
+
+       max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
+
+       /* Avoid reserved HyperTransport region on AMD processors.  */
+       eax = ecx = 0;
+       cpuid(&eax, &ebx, &ecx, &edx);
+       if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx ||
+           ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx ||
+           edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx)
+               return max_gfn;
+
+       /* On parts with <40 physical address bits, the area is fully hidden */
+       if (vm->pa_bits < 40)
+               return max_gfn;
+
+       /* Before family 17h, the HyperTransport area is just below 1T.  */
+       ht_gfn = (1 << 28) - num_ht_pages;
+       eax = 1;
+       cpuid(&eax, &ebx, &ecx, &edx);
+       if (x86_family(eax) < 0x17)
+               goto done;
+
+       /*
+        * Otherwise it's at the top of the physical address space, possibly
+        * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX.  Use
+        * the old conservative value if MAXPHYADDR is not enumerated.
+        */
+       eax = 0x80000000;
+       cpuid(&eax, &ebx, &ecx, &edx);
+       max_ext_leaf = eax;
+       if (max_ext_leaf < 0x80000008)
+               goto done;
+
+       eax = 0x80000008;
+       cpuid(&eax, &ebx, &ecx, &edx);
+       max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1;
+       if (max_ext_leaf >= 0x8000001f) {
+               eax = 0x8000001f;
+               cpuid(&eax, &ebx, &ecx, &edx);
+               max_pfn >>= (ebx >> 6) & 0x3f;
+       }
+
+       ht_gfn = max_pfn - num_ht_pages;
+done:
+       return min(max_gfn, ht_gfn - 1);
+}
index 91d88aa..672915c 100644 (file)
@@ -165,10 +165,10 @@ static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid,
        vcpu_set_cpuid(vm, VCPU_ID, cpuid);
 }
 
-static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
-                                  struct kvm_cpuid2 *best)
+static void guest_test_msrs_access(void)
 {
        struct kvm_run *run;
+       struct kvm_vm *vm;
        struct ucall uc;
        int stage = 0, r;
        struct kvm_cpuid_entry2 feat = {
@@ -180,11 +180,34 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
        struct kvm_cpuid_entry2 dbg = {
                .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
        };
-       struct kvm_enable_cap cap = {0};
-
-       run = vcpu_state(vm, VCPU_ID);
+       struct kvm_cpuid2 *best;
+       vm_vaddr_t msr_gva;
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+               .args = {1}
+       };
+       struct msr_data *msr;
 
        while (true) {
+               vm = vm_create_default(VCPU_ID, 0, guest_msr);
+
+               msr_gva = vm_vaddr_alloc_page(vm);
+               memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+               msr = addr_gva2hva(vm, msr_gva);
+
+               vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
+               vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+               vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+               best = kvm_get_supported_hv_cpuid();
+
+               vm_init_descriptor_tables(vm);
+               vcpu_init_descriptor_tables(vm, VCPU_ID);
+               vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+               run = vcpu_state(vm, VCPU_ID);
+
                switch (stage) {
                case 0:
                        /*
@@ -315,6 +338,7 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
                         * capability enabled and guest visible CPUID bit unset.
                         */
                        cap.cap = KVM_CAP_HYPERV_SYNIC2;
+                       cap.args[0] = 0;
                        vcpu_enable_cap(vm, VCPU_ID, &cap);
                        break;
                case 22:
@@ -461,9 +485,9 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
 
                switch (get_ucall(vm, VCPU_ID, &uc)) {
                case UCALL_SYNC:
-                       TEST_ASSERT(uc.args[1] == stage,
-                                   "Unexpected stage: %ld (%d expected)\n",
-                                   uc.args[1], stage);
+                       TEST_ASSERT(uc.args[1] == 0,
+                                   "Unexpected stage: %ld (0 expected)\n",
+                                   uc.args[1]);
                        break;
                case UCALL_ABORT:
                        TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@@ -474,13 +498,14 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
                }
 
                stage++;
+               kvm_vm_free(vm);
        }
 }
 
-static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall,
-                                    void *input, void *output, struct kvm_cpuid2 *best)
+static void guest_test_hcalls_access(void)
 {
        struct kvm_run *run;
+       struct kvm_vm *vm;
        struct ucall uc;
        int stage = 0, r;
        struct kvm_cpuid_entry2 feat = {
@@ -493,10 +518,38 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
        struct kvm_cpuid_entry2 dbg = {
                .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
        };
-
-       run = vcpu_state(vm, VCPU_ID);
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+               .args = {1}
+       };
+       vm_vaddr_t hcall_page, hcall_params;
+       struct hcall_data *hcall;
+       struct kvm_cpuid2 *best;
 
        while (true) {
+               vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+
+               vm_init_descriptor_tables(vm);
+               vcpu_init_descriptor_tables(vm, VCPU_ID);
+               vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+               /* Hypercall input/output */
+               hcall_page = vm_vaddr_alloc_pages(vm, 2);
+               hcall = addr_gva2hva(vm, hcall_page);
+               memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+               hcall_params = vm_vaddr_alloc_page(vm);
+               memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+
+               vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+               vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+               vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+               best = kvm_get_supported_hv_cpuid();
+
+               run = vcpu_state(vm, VCPU_ID);
+
                switch (stage) {
                case 0:
                        hcall->control = 0xdeadbeef;
@@ -606,9 +659,9 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
 
                switch (get_ucall(vm, VCPU_ID, &uc)) {
                case UCALL_SYNC:
-                       TEST_ASSERT(uc.args[1] == stage,
-                                   "Unexpected stage: %ld (%d expected)\n",
-                                   uc.args[1], stage);
+                       TEST_ASSERT(uc.args[1] == 0,
+                                   "Unexpected stage: %ld (0 expected)\n",
+                                   uc.args[1]);
                        break;
                case UCALL_ABORT:
                        TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
@@ -619,66 +672,15 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
                }
 
                stage++;
+               kvm_vm_free(vm);
        }
 }
 
 int main(void)
 {
-       struct kvm_cpuid2 *best;
-       struct kvm_vm *vm;
-       vm_vaddr_t msr_gva, hcall_page, hcall_params;
-       struct kvm_enable_cap cap = {
-               .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
-               .args = {1}
-       };
-
-       /* Test MSRs */
-       vm = vm_create_default(VCPU_ID, 0, guest_msr);
-
-       msr_gva = vm_vaddr_alloc_page(vm);
-       memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
-       vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
-       vcpu_enable_cap(vm, VCPU_ID, &cap);
-
-       vcpu_set_hv_cpuid(vm, VCPU_ID);
-
-       best = kvm_get_supported_hv_cpuid();
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vm, VCPU_ID);
-       vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
-
        pr_info("Testing access to Hyper-V specific MSRs\n");
-       guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
-                              best);
-       kvm_vm_free(vm);
-
-       /* Test hypercalls */
-       vm = vm_create_default(VCPU_ID, 0, guest_hcall);
-
-       vm_init_descriptor_tables(vm);
-       vcpu_init_descriptor_tables(vm, VCPU_ID);
-       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
-
-       /* Hypercall input/output */
-       hcall_page = vm_vaddr_alloc_pages(vm, 2);
-       memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
-
-       hcall_params = vm_vaddr_alloc_page(vm);
-       memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
-
-       vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
-       vcpu_enable_cap(vm, VCPU_ID, &cap);
-
-       vcpu_set_hv_cpuid(vm, VCPU_ID);
-
-       best = kvm_get_supported_hv_cpuid();
+       guest_test_msrs_access();
 
        pr_info("Testing access to Hyper-V hypercalls\n");
-       guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params),
-                                addr_gva2hva(vm, hcall_page),
-                                addr_gva2hva(vm, hcall_page) + getpagesize(),
-                                best);
-
-       kvm_vm_free(vm);
+       guest_test_hcalls_access();
 }
index 5ba325c..29b18d5 100644 (file)
@@ -54,12 +54,15 @@ static struct kvm_vm *sev_vm_create(bool es)
        return vm;
 }
 
-static struct kvm_vm *__vm_create(void)
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
 {
        struct kvm_vm *vm;
        int i;
 
        vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+       if (!with_vcpus)
+               return vm;
+
        for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
                vm_vcpu_add(vm, i);
 
@@ -89,11 +92,11 @@ static void test_sev_migrate_from(bool es)
 {
        struct kvm_vm *src_vm;
        struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
-       int i;
+       int i, ret;
 
        src_vm = sev_vm_create(es);
        for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
-               dst_vms[i] = __vm_create();
+               dst_vms[i] = aux_vm_create(true);
 
        /* Initial migration from the src to the first dst. */
        sev_migrate_from(dst_vms[0]->fd, src_vm->fd);
@@ -102,7 +105,10 @@ static void test_sev_migrate_from(bool es)
                sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd);
 
        /* Migrate the guest back to the original VM. */
-       sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
+       ret = __sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
+       TEST_ASSERT(ret == -1 && errno == EIO,
+                   "VM that was migrated from should be dead. ret %d, errno: %d\n", ret,
+                   errno);
 
        kvm_vm_free(src_vm);
        for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
@@ -146,6 +152,8 @@ static void test_sev_migrate_locking(void)
 
        for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
                pthread_join(pt[i], NULL);
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+               kvm_vm_free(input[i].vm);
 }
 
 static void test_sev_migrate_parameters(void)
@@ -157,12 +165,11 @@ static void test_sev_migrate_parameters(void)
        sev_vm = sev_vm_create(/* es= */ false);
        sev_es_vm = sev_vm_create(/* es= */ true);
        vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
-       vm_no_sev = __vm_create();
+       vm_no_sev = aux_vm_create(true);
        sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
        sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
        vm_vcpu_add(sev_es_vm_no_vmsa, 1);
 
-
        ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd);
        TEST_ASSERT(
                ret == -1 && errno == EINVAL,
@@ -191,13 +198,151 @@ static void test_sev_migrate_parameters(void)
        TEST_ASSERT(ret == -1 && errno == EINVAL,
                    "Migrations require SEV enabled. ret %d, errno: %d\n", ret,
                    errno);
+
+       kvm_vm_free(sev_vm);
+       kvm_vm_free(sev_es_vm);
+       kvm_vm_free(sev_es_vm_no_vmsa);
+       kvm_vm_free(vm_no_vcpu);
+       kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(int dst_fd, int src_fd)
+{
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_VM_COPY_ENC_CONTEXT_FROM,
+               .args = { src_fd }
+       };
+
+       return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
+}
+
+
+static void sev_mirror_create(int dst_fd, int src_fd)
+{
+       int ret;
+
+       ret = __sev_mirror_create(dst_fd, src_fd);
+       TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
+}
+
+static void test_sev_mirror(bool es)
+{
+       struct kvm_vm *src_vm, *dst_vm;
+       struct kvm_sev_launch_start start = {
+               .policy = es ? SEV_POLICY_ES : 0
+       };
+       int i;
+
+       src_vm = sev_vm_create(es);
+       dst_vm = aux_vm_create(false);
+
+       sev_mirror_create(dst_vm->fd, src_vm->fd);
+
+       /* Check that we can complete creation of the mirror VM.  */
+       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+               vm_vcpu_add(dst_vm, i);
+       sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_START, &start);
+       if (es)
+               sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+       kvm_vm_free(src_vm);
+       kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+       int ret;
+
+       sev_vm = sev_vm_create(/* es= */ false);
+       sev_es_vm = sev_vm_create(/* es= */ true);
+       vm_with_vcpu = aux_vm_create(true);
+       vm_no_vcpu = aux_vm_create(false);
+
+       ret = __sev_mirror_create(sev_vm->fd, sev_vm->fd);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able copy context to self. ret: %d, errno: %d\n",
+               ret, errno);
+
+       ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n",
+               ret, errno);
+
+       ret = __sev_mirror_create(sev_es_vm->fd, sev_vm->fd);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n",
+               ret, errno);
+
+       ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd);
+       TEST_ASSERT(ret == -1 && errno == EINVAL,
+                   "Copy context requires SEV enabled. ret %d, errno: %d\n", ret,
+                   errno);
+
+       ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n",
+               ret, errno);
+
+       kvm_vm_free(sev_vm);
+       kvm_vm_free(sev_es_vm);
+       kvm_vm_free(vm_with_vcpu);
+       kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+       struct kvm_vm *dst_vm, *sev_vm, *mirror_vm, *dst_mirror_vm;
+       int ret;
+
+       sev_vm = sev_vm_create(/* es= */ false);
+       dst_vm = aux_vm_create(true);
+       mirror_vm = aux_vm_create(false);
+       dst_mirror_vm = aux_vm_create(false);
+
+       sev_mirror_create(mirror_vm->fd, sev_vm->fd);
+       ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
+       TEST_ASSERT(ret == -1 && errno == EBUSY,
+                   "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
+                   errno);
+
+       /* The mirror itself can be migrated.  */
+       sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd);
+       ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd);
+       TEST_ASSERT(ret == -1 && errno == EBUSY,
+                   "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret,
+                   errno);
+
+       /*
+        * mirror_vm is not a mirror anymore, dst_mirror_vm is.  Thus,
+        * the owner can be copied as soon as dst_mirror_vm is gone.
+        */
+       kvm_vm_free(dst_mirror_vm);
+       sev_migrate_from(dst_vm->fd, sev_vm->fd);
+
+       kvm_vm_free(mirror_vm);
+       kvm_vm_free(dst_vm);
+       kvm_vm_free(sev_vm);
 }
 
 int main(int argc, char *argv[])
 {
-       test_sev_migrate_from(/* es= */ false);
-       test_sev_migrate_from(/* es= */ true);
-       test_sev_migrate_locking();
-       test_sev_migrate_parameters();
+       if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+               test_sev_migrate_from(/* es= */ false);
+               test_sev_migrate_from(/* es= */ true);
+               test_sev_migrate_locking();
+               test_sev_migrate_parameters();
+               if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+                       test_sev_move_copy();
+       }
+       if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+               test_sev_mirror(/* es= */ false);
+               test_sev_mirror(/* es= */ true);
+               test_sev_mirror_parameters();
+       }
        return 0;
 }
index df04f56..30a8103 100644 (file)
@@ -75,7 +75,7 @@ static void l1_guest_code(struct svm_test_data *svm)
        vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
 
        /* No intercepts for real and virtual interrupts */
-       vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
+       vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
 
        /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
        vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
new file mode 100644 (file)
index 0000000..e4bef2e
--- /dev/null
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID                        1
+
+static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+{
+       unsigned long end;
+
+       if (count == 2)
+               end = (unsigned long)buffer + 1;
+       else
+               end = (unsigned long)buffer + 8192;
+
+       asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
+       GUEST_ASSERT_1(count == 0, count);
+       GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end);
+}
+
+static void guest_code(void)
+{
+       uint8_t buffer[8192];
+       int i;
+
+       /*
+        * Special case tests.  main() will adjust RCX 2 => 1 and 3 => 8192 to
+        * test that KVM doesn't explode when userspace modifies the "count" on
+        * a userspace I/O exit.  KVM isn't required to play nice with the I/O
+        * itself as KVM doesn't support manipulating the count, it just needs
+        * to not explode or overflow a buffer.
+        */
+       guest_ins_port80(buffer, 2);
+       guest_ins_port80(buffer, 3);
+
+       /* Verify KVM fills the buffer correctly when not stuffing RCX. */
+       memset(buffer, 0, sizeof(buffer));
+       guest_ins_port80(buffer, 8192);
+       for (i = 0; i < 8192; i++)
+               GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]);
+
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       struct kvm_regs regs;
+       struct kvm_run *run;
+       struct kvm_vm *vm;
+       struct ucall uc;
+       int rc;
+
+       /* Tell stdout not to buffer its content */
+       setbuf(stdout, NULL);
+
+       /* Create VM */
+       vm = vm_create_default(VCPU_ID, 0, guest_code);
+       run = vcpu_state(vm, VCPU_ID);
+
+       memset(&regs, 0, sizeof(regs));
+
+       while (1) {
+               rc = _vcpu_run(vm, VCPU_ID);
+
+               TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                           "Unexpected exit reason: %u (%s),\n",
+                           run->exit_reason,
+                           exit_reason_str(run->exit_reason));
+
+               if (get_ucall(vm, VCPU_ID, &uc))
+                       break;
+
+               TEST_ASSERT(run->io.port == 0x80,
+                           "Expected I/O at port 0x80, got port 0x%x\n", run->io.port);
+
+               /*
+                * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
+                * Note, this abuses KVM's batching of rep string I/O to avoid
+                * getting stuck in an infinite loop.  That behavior isn't in
+                * scope from a testing perspective as it's not ABI in any way,
+                * i.e. it really is abusing internal KVM knowledge.
+                */
+               vcpu_regs_get(vm, VCPU_ID, &regs);
+               if (regs.rcx == 2)
+                       regs.rcx = 1;
+               if (regs.rcx == 3)
+                       regs.rcx = 8192;
+               memset((void *)run + run->io.data_offset, 0xaa, 4096);
+               vcpu_regs_set(vm, VCPU_ID, &regs);
+       }
+
+       switch (uc.cmd) {
+       case UCALL_DONE:
+               break;
+       case UCALL_ABORT:
+               TEST_FAIL("%s at %s:%ld : argN+1 = 0x%lx, argN+2 = 0x%lx",
+                         (const char *)uc.args[0], __FILE__, uc.args[1],
+                         uc.args[2], uc.args[3]);
+       default:
+               TEST_FAIL("Unknown ucall %lu", uc.cmd);
+       }
+
+       kvm_vm_free(vm);
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
new file mode 100644 (file)
index 0000000..489fbed
--- /dev/null
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define VCPU_ID        0
+#define ARBITRARY_IO_PORT 0x2000
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+       /*
+        * Generate an exit to L0 userspace, i.e. main(), via I/O to an
+        * arbitrary port.
+        */
+       asm volatile("inb %%dx, %%al"
+                    : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+       unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+       GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+       GUEST_ASSERT(load_vmcs(vmx_pages));
+
+       /* Prepare the VMCS for L2 execution. */
+       prepare_vmcs(vmx_pages, l2_guest_code,
+                    &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+       /*
+        * L2 must be run without unrestricted guest, verify that the selftests
+        * library hasn't enabled it.  Because KVM selftests jump directly to
+        * 64-bit mode, unrestricted guest support isn't required.
+        */
+       GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
+                    !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
+
+       GUEST_ASSERT(!vmlaunch());
+
+       /* L2 should triple fault after main() stuffs invalid guest state. */
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+       GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+       vm_vaddr_t vmx_pages_gva;
+       struct kvm_sregs sregs;
+       struct kvm_run *run;
+       struct ucall uc;
+
+       nested_vmx_check_supported();
+
+       vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+
+       /* Allocate VMX pages and shared descriptors (vmx_pages). */
+       vcpu_alloc_vmx(vm, &vmx_pages_gva);
+       vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+       vcpu_run(vm, VCPU_ID);
+
+       run = vcpu_state(vm, VCPU_ID);
+
+       /*
+        * The first exit to L0 userspace should be an I/O access from L2.
+        * Running L1 should launch L2 without triggering an exit to userspace.
+        */
+       TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                   "Expected KVM_EXIT_IO, got: %u (%s)\n",
+                   run->exit_reason, exit_reason_str(run->exit_reason));
+
+       TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+                   "Expected IN from port %d from L2, got port %d",
+                   ARBITRARY_IO_PORT, run->io.port);
+
+       /*
+        * Stuff invalid guest state for L2 by making TR unusuable.  The next
+        * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+        * emulating invalid guest state for L2.
+        */
+       memset(&sregs, 0, sizeof(sregs));
+       vcpu_sregs_get(vm, VCPU_ID, &sregs);
+       sregs.tr.unusable = 1;
+       vcpu_sregs_set(vm, VCPU_ID, &sregs);
+
+       vcpu_run(vm, VCPU_ID);
+
+       switch (get_ucall(vm, VCPU_ID, &uc)) {
+       case UCALL_DONE:
+               break;
+       case UCALL_ABORT:
+               TEST_FAIL("%s", (const char *)uc.args[0]);
+       default:
+               TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+       }
+}
index 23051d8..2454a1f 100644 (file)
@@ -110,22 +110,5 @@ int main(int argc, char *argv[])
        ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
        TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
 
-       /* testcase 4, set capabilities when we don't have PDCM bit */
-       entry_1_0->ecx &= ~X86_FEATURE_PDCM;
-       vcpu_set_cpuid(vm, VCPU_ID, cpuid);
-       ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
-       TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
-
-       /* testcase 5, set capabilities when we don't have PMU version bits */
-       entry_1_0->ecx |= X86_FEATURE_PDCM;
-       eax.split.version_id = 0;
-       entry_1_0->ecx = eax.full;
-       vcpu_set_cpuid(vm, VCPU_ID, cpuid);
-       ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
-       TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail.");
-
-       vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0);
-       ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0);
-
        kvm_vm_free(vm);
 }
index 7615f29..9897fa9 100644 (file)
@@ -34,6 +34,7 @@ TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
 TEST_PROGS += vrf_strict_mode_test.sh
+TEST_PROGS += arp_ndisc_evict_nocarrier.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
index 3313566..7f5b265 100755 (executable)
@@ -4002,8 +4002,8 @@ EOF
 ################################################################################
 # main
 
-TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter"
-TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter"
+TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter"
+TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter"
 TESTS_OTHER="use_cases"
 
 PAUSE_ON_FAIL=no
index b5a69ad..d444ee6 100755 (executable)
@@ -629,6 +629,66 @@ ipv6_fcnal()
        log_test $? 0 "Nexthops removed on admin down"
 }
 
+ipv6_grp_refs()
+{
+       if [ ! -x "$(command -v mausezahn)" ]; then
+               echo "SKIP: Could not run test; need mausezahn tool"
+               return
+       fi
+
+       run_cmd "$IP link set dev veth1 up"
+       run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10"
+       run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20"
+       run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10"
+       run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20"
+       run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10"
+       run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20"
+       run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10"
+       run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20"
+       run_cmd "$IP nexthop add id 102 group 100"
+       run_cmd "$IP route add 2001:db8:101::1/128 nhid 102"
+
+       # create per-cpu dsts through nh 100
+       run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
+
+       # remove nh 100 from the group to delete the route potentially leaving
+       # a stale per-cpu dst which holds a reference to the nexthop's net
+       # device and to the IPv6 route
+       run_cmd "$IP nexthop replace id 102 group 101"
+       run_cmd "$IP route del 2001:db8:101::1/128"
+
+       # add both nexthops to the group so a reference is taken on them
+       run_cmd "$IP nexthop replace id 102 group 100/101"
+
+       # if the bug described in commit "net: nexthop: release IPv6 per-cpu
+       # dsts when replacing a nexthop group" exists at this point we have
+       # an unlinked IPv6 route (but not freed due to stale dst) with a
+       # reference over the group so we delete the group which will again
+       # only unlink it due to the route reference
+       run_cmd "$IP nexthop del id 102"
+
+       # delete the nexthop with stale dst, since we have an unlinked
+       # group with a ref to it and an unlinked IPv6 route with ref to the
+       # group, the nh will only be unlinked and not freed so the stale dst
+       # remains forever and we get a net device refcount imbalance
+       run_cmd "$IP nexthop del id 100"
+
+       # if a reference was lost this command will hang because the net device
+       # cannot be removed
+       timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1
+
+       # we can't cleanup if the command is hung trying to delete the netdev
+       if [ $? -eq 137 ]; then
+               return 1
+       fi
+
+       # cleanup
+       run_cmd "$IP link del veth1.20"
+       run_cmd "$IP nexthop flush"
+
+       return 0
+}
+
 ipv6_grp_fcnal()
 {
        local rc
@@ -734,6 +794,9 @@ ipv6_grp_fcnal()
 
        run_cmd "$IP nexthop add id 108 group 31/24"
        log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+
+       ipv6_grp_refs
+       log_test $? 0 "Nexthop group replace refcounts"
 }
 
 ipv6_res_grp_fcnal()
index e61fc4c..8a22db0 100644 (file)
@@ -78,26 +78,21 @@ static void memrnd(void *s, size_t n)
                *byte++ = rand();
 }
 
-FIXTURE(tls_basic)
-{
-       int fd, cfd;
-       bool notls;
-};
-
-FIXTURE_SETUP(tls_basic)
+static void ulp_sock_pair(struct __test_metadata *_metadata,
+                         int *fd, int *cfd, bool *notls)
 {
        struct sockaddr_in addr;
        socklen_t len;
        int sfd, ret;
 
-       self->notls = false;
+       *notls = false;
        len = sizeof(addr);
 
        addr.sin_family = AF_INET;
        addr.sin_addr.s_addr = htonl(INADDR_ANY);
        addr.sin_port = 0;
 
-       self->fd = socket(AF_INET, SOCK_STREAM, 0);
+       *fd = socket(AF_INET, SOCK_STREAM, 0);
        sfd = socket(AF_INET, SOCK_STREAM, 0);
 
        ret = bind(sfd, &addr, sizeof(addr));
@@ -108,26 +103,96 @@ FIXTURE_SETUP(tls_basic)
        ret = getsockname(sfd, &addr, &len);
        ASSERT_EQ(ret, 0);
 
-       ret = connect(self->fd, &addr, sizeof(addr));
+       ret = connect(*fd, &addr, sizeof(addr));
        ASSERT_EQ(ret, 0);
 
-       self->cfd = accept(sfd, &addr, &len);
-       ASSERT_GE(self->cfd, 0);
+       *cfd = accept(sfd, &addr, &len);
+       ASSERT_GE(*cfd, 0);
 
        close(sfd);
 
-       ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+       ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
        if (ret != 0) {
                ASSERT_EQ(errno, ENOENT);
-               self->notls = true;
+               *notls = true;
                printf("Failure setting TCP_ULP, testing without tls\n");
                return;
        }
 
-       ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+       ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
        ASSERT_EQ(ret, 0);
 }
 
+/* Produce a basic cmsg */
+static int tls_send_cmsg(int fd, unsigned char record_type,
+                        void *data, size_t len, int flags)
+{
+       char cbuf[CMSG_SPACE(sizeof(char))];
+       int cmsg_len = sizeof(char);
+       struct cmsghdr *cmsg;
+       struct msghdr msg;
+       struct iovec vec;
+
+       vec.iov_base = data;
+       vec.iov_len = len;
+       memset(&msg, 0, sizeof(struct msghdr));
+       msg.msg_iov = &vec;
+       msg.msg_iovlen = 1;
+       msg.msg_control = cbuf;
+       msg.msg_controllen = sizeof(cbuf);
+       cmsg = CMSG_FIRSTHDR(&msg);
+       cmsg->cmsg_level = SOL_TLS;
+       /* test sending non-record types. */
+       cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+       cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+       *CMSG_DATA(cmsg) = record_type;
+       msg.msg_controllen = cmsg->cmsg_len;
+
+       return sendmsg(fd, &msg, flags);
+}
+
+static int tls_recv_cmsg(struct __test_metadata *_metadata,
+                        int fd, unsigned char record_type,
+                        void *data, size_t len, int flags)
+{
+       char cbuf[CMSG_SPACE(sizeof(char))];
+       struct cmsghdr *cmsg;
+       unsigned char ctype;
+       struct msghdr msg;
+       struct iovec vec;
+       int n;
+
+       vec.iov_base = data;
+       vec.iov_len = len;
+       memset(&msg, 0, sizeof(struct msghdr));
+       msg.msg_iov = &vec;
+       msg.msg_iovlen = 1;
+       msg.msg_control = cbuf;
+       msg.msg_controllen = sizeof(cbuf);
+
+       n = recvmsg(fd, &msg, flags);
+
+       cmsg = CMSG_FIRSTHDR(&msg);
+       EXPECT_NE(cmsg, NULL);
+       EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+       EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+       ctype = *((unsigned char *)CMSG_DATA(cmsg));
+       EXPECT_EQ(ctype, record_type);
+
+       return n;
+}
+
+FIXTURE(tls_basic)
+{
+       int fd, cfd;
+       bool notls;
+};
+
+FIXTURE_SETUP(tls_basic)
+{
+       ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+}
+
 FIXTURE_TEARDOWN(tls_basic)
 {
        close(self->fd);
@@ -199,60 +264,21 @@ FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm)
 FIXTURE_SETUP(tls)
 {
        struct tls_crypto_info_keys tls12;
-       struct sockaddr_in addr;
-       socklen_t len;
-       int sfd, ret;
-
-       self->notls = false;
-       len = sizeof(addr);
+       int ret;
 
        tls_crypto_info_init(variant->tls_version, variant->cipher_type,
                             &tls12);
 
-       addr.sin_family = AF_INET;
-       addr.sin_addr.s_addr = htonl(INADDR_ANY);
-       addr.sin_port = 0;
+       ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
 
-       self->fd = socket(AF_INET, SOCK_STREAM, 0);
-       sfd = socket(AF_INET, SOCK_STREAM, 0);
-
-       ret = bind(sfd, &addr, sizeof(addr));
-       ASSERT_EQ(ret, 0);
-       ret = listen(sfd, 10);
-       ASSERT_EQ(ret, 0);
+       if (self->notls)
+               return;
 
-       ret = getsockname(sfd, &addr, &len);
+       ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
        ASSERT_EQ(ret, 0);
 
-       ret = connect(self->fd, &addr, sizeof(addr));
+       ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
        ASSERT_EQ(ret, 0);
-
-       ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
-       if (ret != 0) {
-               self->notls = true;
-               printf("Failure setting TCP_ULP, testing without tls\n");
-       }
-
-       if (!self->notls) {
-               ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
-                                tls12.len);
-               ASSERT_EQ(ret, 0);
-       }
-
-       self->cfd = accept(sfd, &addr, &len);
-       ASSERT_GE(self->cfd, 0);
-
-       if (!self->notls) {
-               ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
-                                sizeof("tls"));
-               ASSERT_EQ(ret, 0);
-
-               ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
-                                tls12.len);
-               ASSERT_EQ(ret, 0);
-       }
-
-       close(sfd);
 }
 
 FIXTURE_TEARDOWN(tls)
@@ -613,6 +639,95 @@ TEST_F(tls, splice_to_pipe)
        EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
 
+TEST_F(tls, splice_cmsg_to_pipe)
+{
+       char *test_str = "test_read";
+       char record_type = 100;
+       int send_len = 10;
+       char buf[10];
+       int p[2];
+
+       ASSERT_GE(pipe(p), 0);
+       EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+       EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+       EXPECT_EQ(errno, EINVAL);
+       EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+       EXPECT_EQ(errno, EIO);
+       EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+                               buf, sizeof(buf), MSG_WAITALL),
+                 send_len);
+       EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, splice_dec_cmsg_to_pipe)
+{
+       char *test_str = "test_read";
+       char record_type = 100;
+       int send_len = 10;
+       char buf[10];
+       int p[2];
+
+       ASSERT_GE(pipe(p), 0);
+       EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+       EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+       EXPECT_EQ(errno, EIO);
+       EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+       EXPECT_EQ(errno, EINVAL);
+       EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+                               buf, sizeof(buf), MSG_WAITALL),
+                 send_len);
+       EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_and_splice)
+{
+       int send_len = TLS_PAYLOAD_MAX_LEN;
+       char mem_send[TLS_PAYLOAD_MAX_LEN];
+       char mem_recv[TLS_PAYLOAD_MAX_LEN];
+       int half = send_len / 2;
+       int p[2];
+
+       ASSERT_GE(pipe(p), 0);
+       EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+       /* Recv hald of the record, splice the other half */
+       EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half);
+       EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK),
+                 half);
+       EXPECT_EQ(read(p[0], &mem_recv[half], half), half);
+       EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, peek_and_splice)
+{
+       int send_len = TLS_PAYLOAD_MAX_LEN;
+       char mem_send[TLS_PAYLOAD_MAX_LEN];
+       char mem_recv[TLS_PAYLOAD_MAX_LEN];
+       int chunk = TLS_PAYLOAD_MAX_LEN / 4;
+       int n, i, p[2];
+
+       memrnd(mem_send, sizeof(mem_send));
+
+       ASSERT_GE(pipe(p), 0);
+       for (i = 0; i < 4; i++)
+               EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0),
+                         chunk);
+
+       EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2,
+                      MSG_WAITALL | MSG_PEEK),
+                 chunk * 5 / 2);
+       EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0);
+
+       n = 0;
+       while (n < send_len) {
+               i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0);
+               EXPECT_GT(i, 0);
+               n += i;
+       }
+       EXPECT_EQ(n, send_len);
+       EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+       EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
 TEST_F(tls, recvmsg_single)
 {
        char const *test_str = "test_recvmsg_single";
@@ -1193,60 +1308,30 @@ TEST_F(tls, mutliproc_sendpage_writers)
 
 TEST_F(tls, control_msg)
 {
-       if (self->notls)
-               return;
-
-       char cbuf[CMSG_SPACE(sizeof(char))];
-       char const *test_str = "test_read";
-       int cmsg_len = sizeof(char);
+       char *test_str = "test_read";
        char record_type = 100;
-       struct cmsghdr *cmsg;
-       struct msghdr msg;
        int send_len = 10;
-       struct iovec vec;
        char buf[10];
 
-       vec.iov_base = (char *)test_str;
-       vec.iov_len = 10;
-       memset(&msg, 0, sizeof(struct msghdr));
-       msg.msg_iov = &vec;
-       msg.msg_iovlen = 1;
-       msg.msg_control = cbuf;
-       msg.msg_controllen = sizeof(cbuf);
-       cmsg = CMSG_FIRSTHDR(&msg);
-       cmsg->cmsg_level = SOL_TLS;
-       /* test sending non-record types. */
-       cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
-       cmsg->cmsg_len = CMSG_LEN(cmsg_len);
-       *CMSG_DATA(cmsg) = record_type;
-       msg.msg_controllen = cmsg->cmsg_len;
+       if (self->notls)
+               SKIP(return, "no TLS support");
 
-       EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+       EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0),
+                 send_len);
        /* Should fail because we didn't provide a control message */
        EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
 
-       vec.iov_base = buf;
-       EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len);
-
-       cmsg = CMSG_FIRSTHDR(&msg);
-       EXPECT_NE(cmsg, NULL);
-       EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
-       EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
-       record_type = *((unsigned char *)CMSG_DATA(cmsg));
-       EXPECT_EQ(record_type, 100);
+       EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+                               buf, sizeof(buf), MSG_WAITALL | MSG_PEEK),
+                 send_len);
        EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 
        /* Recv the message again without MSG_PEEK */
-       record_type = 0;
        memset(buf, 0, sizeof(buf));
 
-       EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
-       cmsg = CMSG_FIRSTHDR(&msg);
-       EXPECT_NE(cmsg, NULL);
-       EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
-       EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
-       record_type = *((unsigned char *)CMSG_DATA(cmsg));
-       EXPECT_EQ(record_type, 100);
+       EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+                               buf, sizeof(buf), MSG_WAITALL),
+                 send_len);
        EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
@@ -1301,6 +1386,160 @@ TEST_F(tls, shutdown_reuse)
        EXPECT_EQ(errno, EISCONN);
 }
 
+FIXTURE(tls_err)
+{
+       int fd, cfd;
+       int fd2, cfd2;
+       bool notls;
+};
+
+FIXTURE_VARIANT(tls_err)
+{
+       uint16_t tls_version;
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm)
+{
+       .tls_version = TLS_1_2_VERSION,
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm)
+{
+       .tls_version = TLS_1_3_VERSION,
+};
+
+FIXTURE_SETUP(tls_err)
+{
+       struct tls_crypto_info_keys tls12;
+       int ret;
+
+       tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
+                            &tls12);
+
+       ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+       ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
+       if (self->notls)
+               return;
+
+       ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+       ASSERT_EQ(ret, 0);
+
+       ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len);
+       ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(tls_err)
+{
+       close(self->fd);
+       close(self->cfd);
+       close(self->fd2);
+       close(self->cfd2);
+}
+
+TEST_F(tls_err, bad_rec)
+{
+       char buf[64];
+
+       if (self->notls)
+               SKIP(return, "no TLS support");
+
+       memset(buf, 0x55, sizeof(buf));
+       EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+       EXPECT_EQ(errno, EMSGSIZE);
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1);
+       EXPECT_EQ(errno, EAGAIN);
+}
+
+TEST_F(tls_err, bad_auth)
+{
+       char buf[128];
+       int n;
+
+       if (self->notls)
+               SKIP(return, "no TLS support");
+
+       memrnd(buf, sizeof(buf) / 2);
+       EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2);
+       n = recv(self->cfd, buf, sizeof(buf), 0);
+       EXPECT_GT(n, sizeof(buf) / 2);
+
+       buf[n - 1]++;
+
+       EXPECT_EQ(send(self->fd2, buf, n, 0), n);
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+       EXPECT_EQ(errno, EBADMSG);
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+       EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_in_large_read)
+{
+       char txt[3][64];
+       char cip[3][128];
+       char buf[3 * 128];
+       int i, n;
+
+       if (self->notls)
+               SKIP(return, "no TLS support");
+
+       /* Put 3 records in the sockets */
+       for (i = 0; i < 3; i++) {
+               memrnd(txt[i], sizeof(txt[i]));
+               EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0),
+                         sizeof(txt[i]));
+               n = recv(self->cfd, cip[i], sizeof(cip[i]), 0);
+               EXPECT_GT(n, sizeof(txt[i]));
+               /* Break the third message */
+               if (i == 2)
+                       cip[2][n - 1]++;
+               EXPECT_EQ(send(self->fd2, cip[i], n, 0), n);
+       }
+
+       /* We should be able to receive the first two messages */
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2);
+       EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0);
+       EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0);
+       /* Third mesasge is bad */
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+       EXPECT_EQ(errno, EBADMSG);
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+       EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_cmsg)
+{
+       char *test_str = "test_read";
+       int send_len = 10;
+       char cip[128];
+       char buf[128];
+       char txt[64];
+       int n;
+
+       if (self->notls)
+               SKIP(return, "no TLS support");
+
+       /* Queue up one data record */
+       memrnd(txt, sizeof(txt));
+       EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt));
+       n = recv(self->cfd, cip, sizeof(cip), 0);
+       EXPECT_GT(n, sizeof(txt));
+       EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+       EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+       n = recv(self->cfd, cip, sizeof(cip), 0);
+       cip[n - 1]++; /* Break it */
+       EXPECT_GT(n, send_len);
+       EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt));
+       EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0);
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+       EXPECT_EQ(errno, EBADMSG);
+       EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+       EXPECT_EQ(errno, EBADMSG);
+}
+
 TEST(non_established) {
        struct tls12_crypto_info_aes_gcm_256 tls12;
        struct sockaddr_in addr;
@@ -1355,64 +1594,82 @@ TEST(non_established) {
 
 TEST(keysizes) {
        struct tls12_crypto_info_aes_gcm_256 tls12;
-       struct sockaddr_in addr;
-       int sfd, ret, fd, cfd;
-       socklen_t len;
+       int ret, fd, cfd;
        bool notls;
 
-       notls = false;
-       len = sizeof(addr);
-
        memset(&tls12, 0, sizeof(tls12));
        tls12.info.version = TLS_1_2_VERSION;
        tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
 
-       addr.sin_family = AF_INET;
-       addr.sin_addr.s_addr = htonl(INADDR_ANY);
-       addr.sin_port = 0;
+       ulp_sock_pair(_metadata, &fd, &cfd, &notls);
 
-       fd = socket(AF_INET, SOCK_STREAM, 0);
-       sfd = socket(AF_INET, SOCK_STREAM, 0);
+       if (!notls) {
+               ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
+                                sizeof(tls12));
+               EXPECT_EQ(ret, 0);
+
+               ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
+                                sizeof(tls12));
+               EXPECT_EQ(ret, 0);
+       }
+
+       close(fd);
+       close(cfd);
+}
+
+TEST(tls_v6ops) {
+       struct tls_crypto_info_keys tls12;
+       struct sockaddr_in6 addr, addr2;
+       int sfd, ret, fd;
+       socklen_t len, len2;
+
+       tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+
+       addr.sin6_family = AF_INET6;
+       addr.sin6_addr = in6addr_any;
+       addr.sin6_port = 0;
+
+       fd = socket(AF_INET6, SOCK_STREAM, 0);
+       sfd = socket(AF_INET6, SOCK_STREAM, 0);
 
        ret = bind(sfd, &addr, sizeof(addr));
        ASSERT_EQ(ret, 0);
        ret = listen(sfd, 10);
        ASSERT_EQ(ret, 0);
 
+       len = sizeof(addr);
        ret = getsockname(sfd, &addr, &len);
        ASSERT_EQ(ret, 0);
 
        ret = connect(fd, &addr, sizeof(addr));
        ASSERT_EQ(ret, 0);
 
+       len = sizeof(addr);
+       ret = getsockname(fd, &addr, &len);
+       ASSERT_EQ(ret, 0);
+
        ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
-       if (ret != 0) {
-               notls = true;
-               printf("Failure setting TCP_ULP, testing without tls\n");
+       if (ret) {
+               ASSERT_EQ(errno, ENOENT);
+               SKIP(return, "no TLS support");
        }
+       ASSERT_EQ(ret, 0);
 
-       if (!notls) {
-               ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
-                                sizeof(tls12));
-               EXPECT_EQ(ret, 0);
-       }
+       ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+       ASSERT_EQ(ret, 0);
 
-       cfd = accept(sfd, &addr, &len);
-       ASSERT_GE(cfd, 0);
+       ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+       ASSERT_EQ(ret, 0);
 
-       if (!notls) {
-               ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
-                                sizeof("tls"));
-               EXPECT_EQ(ret, 0);
+       len2 = sizeof(addr2);
+       ret = getsockname(fd, &addr2, &len2);
+       ASSERT_EQ(ret, 0);
 
-               ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
-                                sizeof(tls12));
-               EXPECT_EQ(ret, 0);
-       }
+       EXPECT_EQ(len2, len);
+       EXPECT_EQ(memcmp(&addr, &addr2, len), 0);
 
-       close(sfd);
        close(fd);
-       close(cfd);
+       close(sfd);
 }
 
 TEST_HARNESS_MAIN
index 8748199..ffca314 100644 (file)
@@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
        conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
        nft_concat_range.sh nft_conntrack_helper.sh \
        nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
-       ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh
+       ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
+       conntrack_vrf.sh
 
 LDLIBS = -lmnl
 TEST_GEN_FILES =  nf-queue
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh
new file mode 100755 (executable)
index 0000000..91f3ef0
--- /dev/null
@@ -0,0 +1,219 @@
+#!/bin/sh
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device.  In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+ksft_skip=4
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+
+cleanup()
+{
+       ip netns pids $ns0 | xargs kill 2>/dev/null
+       ip netns pids $ns1 | xargs kill 2>/dev/null
+
+       ip netns del $ns0 $ns1
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without nft tool"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+ip netns add "$ns0"
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not create net namespace $ns0"
+       exit $ksft_skip
+fi
+ip netns add "$ns1"
+
+trap cleanup EXIT
+
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not add veth device"
+       exit $ksft_skip
+fi
+
+ip -net $ns0 li add tvrf type vrf table 9876
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not add vrf device"
+       exit $ksft_skip
+fi
+
+ip -net $ns0 li set lo up
+
+ip -net $ns0 li set veth0 master tvrf
+ip -net $ns0 li set tvrf up
+ip -net $ns0 li set veth0 up
+ip -net $ns1 li set veth0 up
+
+ip -net $ns0 addr add $IP0/$PFXL dev veth0
+ip -net $ns1 addr add $IP1/$PFXL dev veth0
+
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not start iperf3"
+       exit $ksft_skip
+fi
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec $ns0 nft -f - <<EOF
+table testct {
+       chain rawpre {
+               type filter hook prerouting priority raw;
+
+               iif { veth0, tvrf } counter meta nftrace set 1
+               iif veth0 counter ct zone set 1 counter return
+               iif tvrf counter ct zone set 2 counter return
+               ip protocol icmp counter
+               notrack counter
+       }
+
+       chain rawout {
+               type filter hook output priority raw;
+
+               oif veth0 counter ct zone set 1 counter return
+               oif tvrf counter ct zone set 2 counter return
+               notrack counter
+       }
+}
+EOF
+       ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+
+       # should be in zone 1, not zone 2
+       count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+       if [ $count -eq 1 ]; then
+               echo "PASS: entry found in conntrack zone 1"
+       else
+               echo "FAIL: entry not found in conntrack zone 1"
+               count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+               if [ $count -eq 1 ]; then
+                       echo "FAIL: entry found in zone 2 instead"
+               else
+                       echo "FAIL: entry not in zone 1 or 2, dumping table"
+                       ip netns exec $ns0 conntrack -L
+                       ip netns exec $ns0 nft list ruleset
+               fi
+       fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+       ip netns exec $ns0 conntrack -F 2>/dev/null
+
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+       chain postrouting {
+               type nat hook postrouting priority 0;
+               # NB: masquerade should always be combined with 'oif(name) bla',
+               # lack of this is intentional here, we want to exercise double-snat.
+               ip saddr 172.30.30.0/30 counter masquerade random
+       }
+}
+EOF
+       ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
+       if [ $? -ne 0 ]; then
+               echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+               ret=1
+               return
+       fi
+
+       # must also check that nat table was evaluated on second (lower device) iteration.
+       ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+       if [ $? -eq 0 ]; then
+               echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
+       else
+               echo "FAIL: vrf masq rule has unexpected counter value"
+               ret=1
+       fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+       ip netns exec $ns0 conntrack -F 2>/dev/null
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+       chain postrouting {
+               type nat hook postrouting priority 0;
+               meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+       }
+}
+EOF
+       ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
+       if [ $? -ne 0 ]; then
+               echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+               ret=1
+               return
+       fi
+
+       # must also check that nat table was evaluated on second (lower device) iteration.
+       ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+       if [ $? -eq 0 ]; then
+               echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+       else
+               echo "FAIL: vrf masq rule has unexpected counter value"
+               ret=1
+       fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf
+test_masquerade_veth
+
+exit $ret
index da1c1e4..d88867d 100755 (executable)
@@ -759,19 +759,21 @@ test_port_shadow()
        local result=""
        local logmsg=""
 
-       echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
-       nc_r=$!
+       # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+       echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
 
-       echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
-       nc_c=$!
+       echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
+       sc_r=$!
 
-       # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
-       echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
+       echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
+       sc_c=$!
+
+       sleep 0.3
 
        # ns1 tries to connect to ns0:1405.  With default settings this should connect
        # to client, it matches the conntrack entry created above.
 
-       result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
+       result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
 
        if [ "$result" = "$expect" ] ;then
                echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
@@ -780,7 +782,7 @@ test_port_shadow()
                ret=1
        fi
 
-       kill $nc_r $nc_c 2>/dev/null
+       kill $sc_r $sc_c 2>/dev/null
 
        # flush udp entries for next test round, if any
        ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
@@ -816,11 +818,10 @@ table $family raw {
        chain prerouting {
                type filter hook prerouting priority -300; policy accept;
                meta iif veth0 udp dport 1405 notrack
-               udp dport 1405 notrack
        }
        chain output {
                type filter hook output priority -300; policy accept;
-               udp sport 1405 notrack
+               meta oif veth0 udp sport 1405 notrack
        }
 }
 EOF
@@ -851,6 +852,18 @@ test_port_shadowing()
 {
        local family="ip"
 
+       conntrack -h >/dev/null 2>&1
+       if [ $? -ne 0 ];then
+               echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+               return
+       fi
+
+       socat -h > /dev/null 2>&1
+       if [ $? -ne 0 ];then
+               echo "SKIP: Could not run nat port shadowing test without socat tool"
+               return
+       fi
+
        ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
        ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
 
index 3d202b9..7d27f1f 100755 (executable)
@@ -16,6 +16,10 @@ timeout=4
 
 cleanup()
 {
+       ip netns pids ${ns1} | xargs kill 2>/dev/null
+       ip netns pids ${ns2} | xargs kill 2>/dev/null
+       ip netns pids ${nsrouter} | xargs kill 2>/dev/null
+
        ip netns del ${ns1}
        ip netns del ${ns2}
        ip netns del ${nsrouter}
@@ -332,6 +336,55 @@ EOF
        echo "PASS: tcp via loopback and re-queueing"
 }
 
+test_icmp_vrf() {
+       ip -net $ns1 link add tvrf type vrf table 9876
+       if [ $? -ne 0 ];then
+               echo "SKIP: Could not add vrf device"
+               return
+       fi
+
+       ip -net $ns1 li set eth0 master tvrf
+       ip -net $ns1 li set tvrf up
+
+       ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec ${ns1} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+       chain output {
+               type filter hook output priority 0; policy accept;
+               meta oifname "tvrf" icmp type echo-request counter queue num 1
+               meta oifname "eth0" icmp type echo-request counter queue num 1
+       }
+       chain post {
+               type filter hook postrouting priority 0; policy accept;
+               meta oifname "tvrf" icmp type echo-request counter queue num 1
+               meta oifname "eth0" icmp type echo-request counter queue num 1
+       }
+}
+EOF
+       ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
+       local nfqpid=$!
+
+       sleep 1
+       ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+       for n in output post; do
+               for d in tvrf eth0; do
+                       ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
+                       if [ $? -ne 0 ] ; then
+                               echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+                               ip netns exec ${ns1} nft list ruleset
+                               ret=1
+                               return
+                       fi
+               done
+       done
+
+       wait $nfqpid
+       [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
+       wait 2>/dev/null
+}
+
 ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -372,5 +425,6 @@ test_queue 20
 test_tcp_forward
 test_tcp_localhost
 test_tcp_localhost_requeue
+test_icmp_vrf
 
 exit $ret
index 503982b..9183240 100644 (file)
@@ -68,7 +68,7 @@
         "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
         "expExitCode": "0",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
         "matchCount": "1",
         "teardown": [
             "$TC action flush action bpf"
index 88a20c7..c604609 100644 (file)
@@ -15,7 +15,7 @@
            "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
            "expExitCode": "0",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "4",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -37,7 +37,7 @@
            "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
            "expExitCode": "0",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-9,a-f][0-9,a-f]{0,2}",
            "matchCount": "256",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -60,7 +60,7 @@
            "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
            "expExitCode": "2",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "4",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -82,7 +82,7 @@
            "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
            "expExitCode": "2",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "0",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
            "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
            "expExitCode": "2",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "0",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
            "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
            "expExitCode": "2",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "0",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
index ebc4ee0..8a9461a 100755 (executable)
@@ -276,7 +276,11 @@ n0 ping -W 1 -c 1 192.168.241.2
 n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
 ip2 link del wg0
 ip2 link del wg1
-! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
+read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
+! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
+sleep 1
+read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
+(( tx_bytes_after - tx_bytes_before < 70000 ))
 
 ip0 link del wg1
 ip1 link del wg0
@@ -609,6 +613,28 @@ ip0 link set wg0 up
 kill $ncat_pid
 ip0 link del wg0
 
+# Ensure that dst_cache references don't outlive netns lifetime
+ip1 link add dev wg0 type wireguard
+ip2 link add dev wg0 type wireguard
+configure_peers
+ip1 link add veth1 type veth peer name veth2
+ip1 link set veth2 netns $netns2
+ip1 addr add fd00:aa::1/64 dev veth1
+ip2 addr add fd00:aa::2/64 dev veth2
+ip1 link set veth1 up
+ip2 link set veth2 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+ip1 -6 route add default dev veth1 via fd00:aa::2
+ip2 -6 route add default dev veth2 via fd00:aa::1
+n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
+n1 ping6 -c 1 fd00::2
+pp ip netns delete $netns1
+pp ip netns delete $netns2
+pp ip netns add $netns1
+pp ip netns add $netns2
+
 # Ensure there aren't circular reference loops
 ip1 link add wg1 type wireguard
 ip2 link add wg2 type wireguard
@@ -627,7 +653,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
 done < /dev/kmsg
 alldeleted=1
 for object in "${!objects[@]}"; do
-       if [[ ${objects["$object"]} != *createddestroyed ]]; then
+       if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then
                echo "Error: $object: merely ${objects["$object"]}" >&3
                alldeleted=0
        fi
index fe07d97..2b321b8 100644 (file)
@@ -47,7 +47,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
 CONFIG_TRACE_IRQFLAGS=y
 CONFIG_DEBUG_BUGVERBOSE=y
 CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_PLIST=y
 CONFIG_PROVE_RCU=y
 CONFIG_SPARSE_RCU_POINTER=y
 CONFIG_RCU_CPU_STALL_TIMEOUT=21
index 74db83a..a9b5a52 100644 (file)
@@ -66,6 +66,7 @@ CONFIG_PROC_SYSCTL=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
+CONFIG_LOG_BUF_SHIFT=18
 CONFIG_PRINTK_TIME=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_LEGACY_VSYSCALL_NONE=y
index 62b3914..97cf541 100644 (file)
@@ -13,6 +13,9 @@ config HAVE_KVM_IRQFD
 config HAVE_KVM_IRQ_ROUTING
        bool
 
+config HAVE_KVM_DIRTY_RING
+       bool
+
 config HAVE_KVM_EVENTFD
        bool
        select EVENTFD
diff --git a/virt/kvm/Makefile.kvm b/virt/kvm/Makefile.kvm
new file mode 100644 (file)
index 0000000..ffdcad3
--- /dev/null
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+KVM ?= ../../../virt/kvm
+
+kvm-y := $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
+kvm-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
+kvm-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
+kvm-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
+kvm-$(CONFIG_HAVE_KVM_DIRTY_RING) += $(KVM)/dirty_ring.o
index dd77768..9bfe1d6 100644 (file)
@@ -85,7 +85,7 @@ static void async_pf_execute(struct work_struct *work)
 
        trace_kvm_async_pf_completed(addr, cr2_or_gpa);
 
-       rcuwait_wake_up(&vcpu->wait);
+       __kvm_vcpu_wake_up(vcpu);
 
        mmput(mm);
        kvm_put_kvm(vcpu->kvm);
index 9646bb9..b0f7e6e 100644 (file)
@@ -305,8 +305,9 @@ bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
 {
        struct kvm_vcpu *vcpu;
        struct cpumask *cpus;
+       unsigned long i;
        bool called;
-       int i, me;
+       int me;
 
        me = get_cpu();
 
@@ -421,7 +422,9 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
        vcpu->kvm = kvm;
        vcpu->vcpu_id = id;
        vcpu->pid = NULL;
+#ifndef __KVM_HAVE_ARCH_WQP
        rcuwait_init(&vcpu->wait);
+#endif
        kvm_async_pf_vcpu_init(vcpu);
 
        vcpu->pre_pcpu = -1;
@@ -432,10 +435,10 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
        vcpu->preempted = false;
        vcpu->ready = false;
        preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
-       vcpu->last_used_slot = 0;
+       vcpu->last_used_slot = NULL;
 }
 
-void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
+static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
        kvm_dirty_ring_free(&vcpu->dirty_ring);
        kvm_arch_vcpu_destroy(vcpu);
@@ -450,7 +453,20 @@ void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
        free_page((unsigned long)vcpu->run);
        kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
-EXPORT_SYMBOL_GPL(kvm_vcpu_destroy);
+
+void kvm_destroy_vcpus(struct kvm *kvm)
+{
+       unsigned long i;
+       struct kvm_vcpu *vcpu;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               kvm_vcpu_destroy(vcpu);
+               xa_erase(&kvm->vcpu_array, i);
+       }
+
+       atomic_set(&kvm->online_vcpus, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_destroy_vcpus);
 
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
@@ -498,6 +514,12 @@ static void kvm_null_fn(void)
 }
 #define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
 
+/* Iterate over each memslot intersecting [start, last] (inclusive) range */
+#define kvm_for_each_memslot_in_hva_range(node, slots, start, last)         \
+       for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
+            node;                                                           \
+            node = interval_tree_iter_next(node, start, last))      \
+
 static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
                                                  const struct kvm_hva_range *range)
 {
@@ -507,6 +529,9 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
        struct kvm_memslots *slots;
        int i, idx;
 
+       if (WARN_ON_ONCE(range->end <= range->start))
+               return 0;
+
        /* A null handler is allowed if and only if on_lock() is provided. */
        if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
                         IS_KVM_NULL_FN(range->handler)))
@@ -515,15 +540,17 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
        idx = srcu_read_lock(&kvm->srcu);
 
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               struct interval_tree_node *node;
+
                slots = __kvm_memslots(kvm, i);
-               kvm_for_each_memslot(slot, slots) {
+               kvm_for_each_memslot_in_hva_range(node, slots,
+                                                 range->start, range->end - 1) {
                        unsigned long hva_start, hva_end;
 
+                       slot = container_of(node, struct kvm_memory_slot, hva_node[slots->node_idx]);
                        hva_start = max(range->start, slot->userspace_addr);
                        hva_end = min(range->end, slot->userspace_addr +
                                                  (slot->npages << PAGE_SHIFT));
-                       if (hva_start >= hva_end)
-                               continue;
 
                        /*
                         * To optimize for the likely case where the address
@@ -851,21 +878,6 @@ static void kvm_destroy_pm_notifier(struct kvm *kvm)
 }
 #endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */
 
-static struct kvm_memslots *kvm_alloc_memslots(void)
-{
-       int i;
-       struct kvm_memslots *slots;
-
-       slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL_ACCOUNT);
-       if (!slots)
-               return NULL;
-
-       for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-               slots->id_to_index[i] = -1;
-
-       return slots;
-}
-
 static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
        if (!memslot->dirty_bitmap)
@@ -875,27 +887,33 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
        memslot->dirty_bitmap = NULL;
 }
 
+/* This does not remove the slot from struct kvm_memslots data structures */
 static void kvm_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
        kvm_destroy_dirty_bitmap(slot);
 
        kvm_arch_free_memslot(kvm, slot);
 
-       slot->flags = 0;
-       slot->npages = 0;
+       kfree(slot);
 }
 
 static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
 {
+       struct hlist_node *idnode;
        struct kvm_memory_slot *memslot;
+       int bkt;
 
-       if (!slots)
+       /*
+        * The same memslot objects live in both active and inactive sets,
+        * arbitrarily free using index '1' so the second invocation of this
+        * function isn't operating over a structure with dangling pointers
+        * (even though this function isn't actually touching them).
+        */
+       if (!slots->node_idx)
                return;
 
-       kvm_for_each_memslot(memslot, slots)
+       hash_for_each_safe(slots->id_hash, bkt, idnode, memslot, id_node[1])
                kvm_free_memslot(kvm, memslot);
-
-       kvfree(slots);
 }
 
 static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc)
@@ -1034,8 +1052,9 @@ int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
 static struct kvm *kvm_create_vm(unsigned long type)
 {
        struct kvm *kvm = kvm_arch_alloc_vm();
+       struct kvm_memslots *slots;
        int r = -ENOMEM;
-       int i;
+       int i, j;
 
        if (!kvm)
                return ERR_PTR(-ENOMEM);
@@ -1050,6 +1069,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
        mutex_init(&kvm->slots_arch_lock);
        spin_lock_init(&kvm->mn_invalidate_lock);
        rcuwait_init(&kvm->mn_memslots_update_rcuwait);
+       xa_init(&kvm->vcpu_array);
 
        INIT_LIST_HEAD(&kvm->devices);
 
@@ -1062,13 +1082,20 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
        refcount_set(&kvm->users_count, 1);
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
-               struct kvm_memslots *slots = kvm_alloc_memslots();
+               for (j = 0; j < 2; j++) {
+                       slots = &kvm->__memslots[i][j];
 
-               if (!slots)
-                       goto out_err_no_arch_destroy_vm;
-               /* Generations must be different for each address space. */
-               slots->generation = i;
-               rcu_assign_pointer(kvm->memslots[i], slots);
+                       atomic_long_set(&slots->last_used_slot, (unsigned long)NULL);
+                       slots->hva_tree = RB_ROOT_CACHED;
+                       slots->gfn_tree = RB_ROOT;
+                       hash_init(slots->id_hash);
+                       slots->node_idx = j;
+
+                       /* Generations must be different for each address space. */
+                       slots->generation = i;
+               }
+
+               rcu_assign_pointer(kvm->memslots[i], &kvm->__memslots[i][0]);
        }
 
        for (i = 0; i < KVM_NR_BUSES; i++) {
@@ -1122,8 +1149,6 @@ out_err_no_arch_destroy_vm:
        WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
        for (i = 0; i < KVM_NR_BUSES; i++)
                kfree(kvm_get_bus(kvm, i));
-       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-               kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
        cleanup_srcu_struct(&kvm->irq_srcu);
 out_err_no_irq_srcu:
        cleanup_srcu_struct(&kvm->srcu);
@@ -1188,8 +1213,10 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #endif
        kvm_arch_destroy_vm(kvm);
        kvm_destroy_devices(kvm);
-       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-               kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
+       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+               kvm_free_memslots(kvm, &kvm->__memslots[i][0]);
+               kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
+       }
        cleanup_srcu_struct(&kvm->irq_srcu);
        cleanup_srcu_struct(&kvm->srcu);
        kvm_arch_free_vm(kvm);
@@ -1259,165 +1286,136 @@ static int kvm_alloc_dirty_bitmap(struct kvm_memory_slot *memslot)
        return 0;
 }
 
-/*
- * Delete a memslot by decrementing the number of used slots and shifting all
- * other entries in the array forward one spot.
- */
-static inline void kvm_memslot_delete(struct kvm_memslots *slots,
-                                     struct kvm_memory_slot *memslot)
+static struct kvm_memslots *kvm_get_inactive_memslots(struct kvm *kvm, int as_id)
 {
-       struct kvm_memory_slot *mslots = slots->memslots;
-       int i;
-
-       if (WARN_ON(slots->id_to_index[memslot->id] == -1))
-               return;
-
-       slots->used_slots--;
+       struct kvm_memslots *active = __kvm_memslots(kvm, as_id);
+       int node_idx_inactive = active->node_idx ^ 1;
 
-       if (atomic_read(&slots->last_used_slot) >= slots->used_slots)
-               atomic_set(&slots->last_used_slot, 0);
-
-       for (i = slots->id_to_index[memslot->id]; i < slots->used_slots; i++) {
-               mslots[i] = mslots[i + 1];
-               slots->id_to_index[mslots[i].id] = i;
-       }
-       mslots[i] = *memslot;
-       slots->id_to_index[memslot->id] = -1;
+       return &kvm->__memslots[as_id][node_idx_inactive];
 }
 
 /*
- * "Insert" a new memslot by incrementing the number of used slots.  Returns
- * the new slot's initial index into the memslots array.
+ * Helper to get the address space ID when one of memslot pointers may be NULL.
+ * This also serves as a sanity that at least one of the pointers is non-NULL,
+ * and that their address space IDs don't diverge.
  */
-static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
+static int kvm_memslots_get_as_id(struct kvm_memory_slot *a,
+                                 struct kvm_memory_slot *b)
 {
-       return slots->used_slots++;
-}
+       if (WARN_ON_ONCE(!a && !b))
+               return 0;
 
-/*
- * Move a changed memslot backwards in the array by shifting existing slots
- * with a higher GFN toward the front of the array.  Note, the changed memslot
- * itself is not preserved in the array, i.e. not swapped at this time, only
- * its new index into the array is tracked.  Returns the changed memslot's
- * current index into the memslots array.
- */
-static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
-                                           struct kvm_memory_slot *memslot)
-{
-       struct kvm_memory_slot *mslots = slots->memslots;
-       int i;
+       if (!a)
+               return b->as_id;
+       if (!b)
+               return a->as_id;
 
-       if (WARN_ON_ONCE(slots->id_to_index[memslot->id] == -1) ||
-           WARN_ON_ONCE(!slots->used_slots))
-               return -1;
+       WARN_ON_ONCE(a->as_id != b->as_id);
+       return a->as_id;
+}
 
-       /*
-        * Move the target memslot backward in the array by shifting existing
-        * memslots with a higher GFN (than the target memslot) towards the
-        * front of the array.
-        */
-       for (i = slots->id_to_index[memslot->id]; i < slots->used_slots - 1; i++) {
-               if (memslot->base_gfn > mslots[i + 1].base_gfn)
-                       break;
+static void kvm_insert_gfn_node(struct kvm_memslots *slots,
+                               struct kvm_memory_slot *slot)
+{
+       struct rb_root *gfn_tree = &slots->gfn_tree;
+       struct rb_node **node, *parent;
+       int idx = slots->node_idx;
 
-               WARN_ON_ONCE(memslot->base_gfn == mslots[i + 1].base_gfn);
+       parent = NULL;
+       for (node = &gfn_tree->rb_node; *node; ) {
+               struct kvm_memory_slot *tmp;
 
-               /* Shift the next memslot forward one and update its index. */
-               mslots[i] = mslots[i + 1];
-               slots->id_to_index[mslots[i].id] = i;
+               tmp = container_of(*node, struct kvm_memory_slot, gfn_node[idx]);
+               parent = *node;
+               if (slot->base_gfn < tmp->base_gfn)
+                       node = &(*node)->rb_left;
+               else if (slot->base_gfn > tmp->base_gfn)
+                       node = &(*node)->rb_right;
+               else
+                       BUG();
        }
-       return i;
+
+       rb_link_node(&slot->gfn_node[idx], parent, node);
+       rb_insert_color(&slot->gfn_node[idx], gfn_tree);
 }
 
-/*
- * Move a changed memslot forwards in the array by shifting existing slots with
- * a lower GFN toward the back of the array.  Note, the changed memslot itself
- * is not preserved in the array, i.e. not swapped at this time, only its new
- * index into the array is tracked.  Returns the changed memslot's final index
- * into the memslots array.
- */
-static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
-                                          struct kvm_memory_slot *memslot,
-                                          int start)
+static void kvm_erase_gfn_node(struct kvm_memslots *slots,
+                              struct kvm_memory_slot *slot)
 {
-       struct kvm_memory_slot *mslots = slots->memslots;
-       int i;
+       rb_erase(&slot->gfn_node[slots->node_idx], &slots->gfn_tree);
+}
 
-       for (i = start; i > 0; i--) {
-               if (memslot->base_gfn < mslots[i - 1].base_gfn)
-                       break;
+static void kvm_replace_gfn_node(struct kvm_memslots *slots,
+                                struct kvm_memory_slot *old,
+                                struct kvm_memory_slot *new)
+{
+       int idx = slots->node_idx;
 
-               WARN_ON_ONCE(memslot->base_gfn == mslots[i - 1].base_gfn);
+       WARN_ON_ONCE(old->base_gfn != new->base_gfn);
 
-               /* Shift the next memslot back one and update its index. */
-               mslots[i] = mslots[i - 1];
-               slots->id_to_index[mslots[i].id] = i;
-       }
-       return i;
+       rb_replace_node(&old->gfn_node[idx], &new->gfn_node[idx],
+                       &slots->gfn_tree);
 }
 
 /*
- * Re-sort memslots based on their GFN to account for an added, deleted, or
- * moved memslot.  Sorting memslots by GFN allows using a binary search during
- * memslot lookup.
+ * Replace @old with @new in the inactive memslots.
  *
- * IMPORTANT: Slots are sorted from highest GFN to lowest GFN!  I.e. the entry
- * at memslots[0] has the highest GFN.
+ * With NULL @old this simply adds @new.
+ * With NULL @new this simply removes @old.
  *
- * The sorting algorithm takes advantage of having initially sorted memslots
- * and knowing the position of the changed memslot.  Sorting is also optimized
- * by not swapping the updated memslot and instead only shifting other memslots
- * and tracking the new index for the update memslot.  Only once its final
- * index is known is the updated memslot copied into its position in the array.
- *
- *  - When deleting a memslot, the deleted memslot simply needs to be moved to
- *    the end of the array.
- *
- *  - When creating a memslot, the algorithm "inserts" the new memslot at the
- *    end of the array and then it forward to its correct location.
- *
- *  - When moving a memslot, the algorithm first moves the updated memslot
- *    backward to handle the scenario where the memslot's GFN was changed to a
- *    lower value.  update_memslots() then falls through and runs the same flow
- *    as creating a memslot to move the memslot forward to handle the scenario
- *    where its GFN was changed to a higher value.
- *
- * Note, slots are sorted from highest->lowest instead of lowest->highest for
- * historical reasons.  Originally, invalid memslots where denoted by having
- * GFN=0, thus sorting from highest->lowest naturally sorted invalid memslots
- * to the end of the array.  The current algorithm uses dedicated logic to
- * delete a memslot and thus does not rely on invalid memslots having GFN=0.
- *
- * The other historical motiviation for highest->lowest was to improve the
- * performance of memslot lookup.  KVM originally used a linear search starting
- * at memslots[0].  On x86, the largest memslot usually has one of the highest,
- * if not *the* highest, GFN, as the bulk of the guest's RAM is located in a
- * single memslot above the 4gb boundary.  As the largest memslot is also the
- * most likely to be referenced, sorting it to the front of the array was
- * advantageous.  The current binary search starts from the middle of the array
- * and uses an LRU pointer to improve performance for all memslots and GFNs.
+ * If @new is non-NULL its hva_node[slots_idx] range has to be set
+ * appropriately.
  */
-static void update_memslots(struct kvm_memslots *slots,
-                           struct kvm_memory_slot *memslot,
-                           enum kvm_mr_change change)
+static void kvm_replace_memslot(struct kvm *kvm,
+                               struct kvm_memory_slot *old,
+                               struct kvm_memory_slot *new)
 {
-       int i;
+       int as_id = kvm_memslots_get_as_id(old, new);
+       struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id);
+       int idx = slots->node_idx;
 
-       if (change == KVM_MR_DELETE) {
-               kvm_memslot_delete(slots, memslot);
-       } else {
-               if (change == KVM_MR_CREATE)
-                       i = kvm_memslot_insert_back(slots);
-               else
-                       i = kvm_memslot_move_backward(slots, memslot);
-               i = kvm_memslot_move_forward(slots, memslot, i);
+       if (old) {
+               hash_del(&old->id_node[idx]);
+               interval_tree_remove(&old->hva_node[idx], &slots->hva_tree);
 
-               /*
-                * Copy the memslot to its new position in memslots and update
-                * its index accordingly.
-                */
-               slots->memslots[i] = *memslot;
-               slots->id_to_index[memslot->id] = i;
+               if ((long)old == atomic_long_read(&slots->last_used_slot))
+                       atomic_long_set(&slots->last_used_slot, (long)new);
+
+               if (!new) {
+                       kvm_erase_gfn_node(slots, old);
+                       return;
+               }
+       }
+
+       /*
+        * Initialize @new's hva range.  Do this even when replacing an @old
+        * slot, kvm_copy_memslot() deliberately does not touch node data.
+        */
+       new->hva_node[idx].start = new->userspace_addr;
+       new->hva_node[idx].last = new->userspace_addr +
+                                 (new->npages << PAGE_SHIFT) - 1;
+
+       /*
+        * (Re)Add the new memslot.  There is no O(1) interval_tree_replace(),
+        * hva_node needs to be swapped with remove+insert even though hva can't
+        * change when replacing an existing slot.
+        */
+       hash_add(slots->id_hash, &new->id_node[idx], new->id);
+       interval_tree_insert(&new->hva_node[idx], &slots->hva_tree);
+
+       /*
+        * If the memslot gfn is unchanged, rb_replace_node() can be used to
+        * switch the node in the gfn tree instead of removing the old and
+        * inserting the new as two separate operations. Replacement is a
+        * single O(1) operation versus two O(log(n)) operations for
+        * remove+insert.
+        */
+       if (old && old->base_gfn == new->base_gfn) {
+               kvm_replace_gfn_node(slots, old, new);
+       } else {
+               if (old)
+                       kvm_erase_gfn_node(slots, old);
+               kvm_insert_gfn_node(slots, new);
        }
 }
 
@@ -1435,11 +1433,12 @@ static int check_memory_region_flags(const struct kvm_userspace_memory_region *m
        return 0;
 }
 
-static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
-               int as_id, struct kvm_memslots *slots)
+static void kvm_swap_active_memslots(struct kvm *kvm, int as_id)
 {
-       struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
-       u64 gen = old_memslots->generation;
+       struct kvm_memslots *slots = kvm_get_inactive_memslots(kvm, as_id);
+
+       /* Grab the generation from the activate memslots. */
+       u64 gen = __kvm_memslots(kvm, as_id)->generation;
 
        WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
        slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
@@ -1490,61 +1489,226 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
        kvm_arch_memslots_updated(kvm, gen);
 
        slots->generation = gen;
-
-       return old_memslots;
 }
 
-static size_t kvm_memslots_size(int slots)
+static int kvm_prepare_memory_region(struct kvm *kvm,
+                                    const struct kvm_memory_slot *old,
+                                    struct kvm_memory_slot *new,
+                                    enum kvm_mr_change change)
 {
-       return sizeof(struct kvm_memslots) +
-              (sizeof(struct kvm_memory_slot) * slots);
+       int r;
+
+       /*
+        * If dirty logging is disabled, nullify the bitmap; the old bitmap
+        * will be freed on "commit".  If logging is enabled in both old and
+        * new, reuse the existing bitmap.  If logging is enabled only in the
+        * new and KVM isn't using a ring buffer, allocate and initialize a
+        * new bitmap.
+        */
+       if (change != KVM_MR_DELETE) {
+               if (!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+                       new->dirty_bitmap = NULL;
+               else if (old && old->dirty_bitmap)
+                       new->dirty_bitmap = old->dirty_bitmap;
+               else if (!kvm->dirty_ring_size) {
+                       r = kvm_alloc_dirty_bitmap(new);
+                       if (r)
+                               return r;
+
+                       if (kvm_dirty_log_manual_protect_and_init_set(kvm))
+                               bitmap_set(new->dirty_bitmap, 0, new->npages);
+               }
+       }
+
+       r = kvm_arch_prepare_memory_region(kvm, old, new, change);
+
+       /* Free the bitmap on failure if it was allocated above. */
+       if (r && new && new->dirty_bitmap && old && !old->dirty_bitmap)
+               kvm_destroy_dirty_bitmap(new);
+
+       return r;
 }
 
-static void kvm_copy_memslots(struct kvm_memslots *to,
-                             struct kvm_memslots *from)
+static void kvm_commit_memory_region(struct kvm *kvm,
+                                    struct kvm_memory_slot *old,
+                                    const struct kvm_memory_slot *new,
+                                    enum kvm_mr_change change)
 {
-       memcpy(to, from, kvm_memslots_size(from->used_slots));
+       /*
+        * Update the total number of memslot pages before calling the arch
+        * hook so that architectures can consume the result directly.
+        */
+       if (change == KVM_MR_DELETE)
+               kvm->nr_memslot_pages -= old->npages;
+       else if (change == KVM_MR_CREATE)
+               kvm->nr_memslot_pages += new->npages;
+
+       kvm_arch_commit_memory_region(kvm, old, new, change);
+
+       switch (change) {
+       case KVM_MR_CREATE:
+               /* Nothing more to do. */
+               break;
+       case KVM_MR_DELETE:
+               /* Free the old memslot and all its metadata. */
+               kvm_free_memslot(kvm, old);
+               break;
+       case KVM_MR_MOVE:
+       case KVM_MR_FLAGS_ONLY:
+               /*
+                * Free the dirty bitmap as needed; the below check encompasses
+                * both the flags and whether a ring buffer is being used)
+                */
+               if (old->dirty_bitmap && !new->dirty_bitmap)
+                       kvm_destroy_dirty_bitmap(old);
+
+               /*
+                * The final quirk.  Free the detached, old slot, but only its
+                * memory, not any metadata.  Metadata, including arch specific
+                * data, may be reused by @new.
+                */
+               kfree(old);
+               break;
+       default:
+               BUG();
+       }
 }
 
 /*
- * Note, at a minimum, the current number of used slots must be allocated, even
- * when deleting a memslot, as we need a complete duplicate of the memslots for
- * use when invalidating a memslot prior to deleting/moving the memslot.
+ * Activate @new, which must be installed in the inactive slots by the caller,
+ * by swapping the active slots and then propagating @new to @old once @old is
+ * unreachable and can be safely modified.
+ *
+ * With NULL @old this simply adds @new to @active (while swapping the sets).
+ * With NULL @new this simply removes @old from @active and frees it
+ * (while also swapping the sets).
  */
-static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
-                                            enum kvm_mr_change change)
+static void kvm_activate_memslot(struct kvm *kvm,
+                                struct kvm_memory_slot *old,
+                                struct kvm_memory_slot *new)
 {
-       struct kvm_memslots *slots;
-       size_t new_size;
+       int as_id = kvm_memslots_get_as_id(old, new);
 
-       if (change == KVM_MR_CREATE)
-               new_size = kvm_memslots_size(old->used_slots + 1);
-       else
-               new_size = kvm_memslots_size(old->used_slots);
+       kvm_swap_active_memslots(kvm, as_id);
 
-       slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
-       if (likely(slots))
-               kvm_copy_memslots(slots, old);
+       /* Propagate the new memslot to the now inactive memslots. */
+       kvm_replace_memslot(kvm, old, new);
+}
 
-       return slots;
+static void kvm_copy_memslot(struct kvm_memory_slot *dest,
+                            const struct kvm_memory_slot *src)
+{
+       dest->base_gfn = src->base_gfn;
+       dest->npages = src->npages;
+       dest->dirty_bitmap = src->dirty_bitmap;
+       dest->arch = src->arch;
+       dest->userspace_addr = src->userspace_addr;
+       dest->flags = src->flags;
+       dest->id = src->id;
+       dest->as_id = src->as_id;
+}
+
+static void kvm_invalidate_memslot(struct kvm *kvm,
+                                  struct kvm_memory_slot *old,
+                                  struct kvm_memory_slot *invalid_slot)
+{
+       /*
+        * Mark the current slot INVALID.  As with all memslot modifications,
+        * this must be done on an unreachable slot to avoid modifying the
+        * current slot in the active tree.
+        */
+       kvm_copy_memslot(invalid_slot, old);
+       invalid_slot->flags |= KVM_MEMSLOT_INVALID;
+       kvm_replace_memslot(kvm, old, invalid_slot);
+
+       /*
+        * Activate the slot that is now marked INVALID, but don't propagate
+        * the slot to the now inactive slots. The slot is either going to be
+        * deleted or recreated as a new slot.
+        */
+       kvm_swap_active_memslots(kvm, old->as_id);
+
+       /*
+        * From this point no new shadow pages pointing to a deleted, or moved,
+        * memslot will be created.  Validation of sp->gfn happens in:
+        *      - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+        *      - kvm_is_visible_gfn (mmu_check_root)
+        */
+       kvm_arch_flush_shadow_memslot(kvm, old);
+
+       /* Was released by kvm_swap_active_memslots, reacquire. */
+       mutex_lock(&kvm->slots_arch_lock);
+
+       /*
+        * Copy the arch-specific field of the newly-installed slot back to the
+        * old slot as the arch data could have changed between releasing
+        * slots_arch_lock in install_new_memslots() and re-acquiring the lock
+        * above.  Writers are required to retrieve memslots *after* acquiring
+        * slots_arch_lock, thus the active slot's data is guaranteed to be fresh.
+        */
+       old->arch = invalid_slot->arch;
+}
+
+static void kvm_create_memslot(struct kvm *kvm,
+                              struct kvm_memory_slot *new)
+{
+       /* Add the new memslot to the inactive set and activate. */
+       kvm_replace_memslot(kvm, NULL, new);
+       kvm_activate_memslot(kvm, NULL, new);
+}
+
+static void kvm_delete_memslot(struct kvm *kvm,
+                              struct kvm_memory_slot *old,
+                              struct kvm_memory_slot *invalid_slot)
+{
+       /*
+        * Remove the old memslot (in the inactive memslots) by passing NULL as
+        * the "new" slot, and for the invalid version in the active slots.
+        */
+       kvm_replace_memslot(kvm, old, NULL);
+       kvm_activate_memslot(kvm, invalid_slot, NULL);
+}
+
+static void kvm_move_memslot(struct kvm *kvm,
+                            struct kvm_memory_slot *old,
+                            struct kvm_memory_slot *new,
+                            struct kvm_memory_slot *invalid_slot)
+{
+       /*
+        * Replace the old memslot in the inactive slots, and then swap slots
+        * and replace the current INVALID with the new as well.
+        */
+       kvm_replace_memslot(kvm, old, new);
+       kvm_activate_memslot(kvm, invalid_slot, new);
+}
+
+static void kvm_update_flags_memslot(struct kvm *kvm,
+                                    struct kvm_memory_slot *old,
+                                    struct kvm_memory_slot *new)
+{
+       /*
+        * Similar to the MOVE case, but the slot doesn't need to be zapped as
+        * an intermediate step. Instead, the old memslot is simply replaced
+        * with a new, updated copy in both memslot sets.
+        */
+       kvm_replace_memslot(kvm, old, new);
+       kvm_activate_memslot(kvm, old, new);
 }
 
 static int kvm_set_memslot(struct kvm *kvm,
-                          const struct kvm_userspace_memory_region *mem,
                           struct kvm_memory_slot *old,
-                          struct kvm_memory_slot *new, int as_id,
+                          struct kvm_memory_slot *new,
                           enum kvm_mr_change change)
 {
-       struct kvm_memory_slot *slot;
-       struct kvm_memslots *slots;
+       struct kvm_memory_slot *invalid_slot;
        int r;
 
        /*
-        * Released in install_new_memslots.
+        * Released in kvm_swap_active_memslots.
         *
         * Must be held from before the current memslots are copied until
         * after the new memslots are installed with rcu_assign_pointer,
-        * then released before the synchronize srcu in install_new_memslots.
+        * then released before the synchronize srcu in kvm_swap_active_memslots.
         *
         * When modifying memslots outside of the slots_lock, must be held
         * before reading the pointer to the current memslots until after all
@@ -1555,96 +1719,88 @@ static int kvm_set_memslot(struct kvm *kvm,
         */
        mutex_lock(&kvm->slots_arch_lock);
 
-       slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change);
-       if (!slots) {
-               mutex_unlock(&kvm->slots_arch_lock);
-               return -ENOMEM;
-       }
-
+       /*
+        * Invalidate the old slot if it's being deleted or moved.  This is
+        * done prior to actually deleting/moving the memslot to allow vCPUs to
+        * continue running by ensuring there are no mappings or shadow pages
+        * for the memslot when it is deleted/moved.  Without pre-invalidation
+        * (and without a lock), a window would exist between effecting the
+        * delete/move and committing the changes in arch code where KVM or a
+        * guest could access a non-existent memslot.
+        *
+        * Modifications are done on a temporary, unreachable slot.  The old
+        * slot needs to be preserved in case a later step fails and the
+        * invalidation needs to be reverted.
+        */
        if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
-               /*
-                * Note, the INVALID flag needs to be in the appropriate entry
-                * in the freshly allocated memslots, not in @old or @new.
-                */
-               slot = id_to_memslot(slots, old->id);
-               slot->flags |= KVM_MEMSLOT_INVALID;
-
-               /*
-                * We can re-use the memory from the old memslots.
-                * It will be overwritten with a copy of the new memslots
-                * after reacquiring the slots_arch_lock below.
-                */
-               slots = install_new_memslots(kvm, as_id, slots);
-
-               /* From this point no new shadow pages pointing to a deleted,
-                * or moved, memslot will be created.
-                *
-                * validation of sp->gfn happens in:
-                *      - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
-                *      - kvm_is_visible_gfn (mmu_check_root)
-                */
-               kvm_arch_flush_shadow_memslot(kvm, slot);
-
-               /* Released in install_new_memslots. */
-               mutex_lock(&kvm->slots_arch_lock);
+               invalid_slot = kzalloc(sizeof(*invalid_slot), GFP_KERNEL_ACCOUNT);
+               if (!invalid_slot) {
+                       mutex_unlock(&kvm->slots_arch_lock);
+                       return -ENOMEM;
+               }
+               kvm_invalidate_memslot(kvm, old, invalid_slot);
+       }
 
+       r = kvm_prepare_memory_region(kvm, old, new, change);
+       if (r) {
                /*
-                * The arch-specific fields of the memslots could have changed
-                * between releasing the slots_arch_lock in
-                * install_new_memslots and here, so get a fresh copy of the
-                * slots.
+                * For DELETE/MOVE, revert the above INVALID change.  No
+                * modifications required since the original slot was preserved
+                * in the inactive slots.  Changing the active memslots also
+                * release slots_arch_lock.
                 */
-               kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
+               if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+                       kvm_activate_memslot(kvm, invalid_slot, old);
+                       kfree(invalid_slot);
+               } else {
+                       mutex_unlock(&kvm->slots_arch_lock);
+               }
+               return r;
        }
 
-       r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
-       if (r)
-               goto out_slots;
+       /*
+        * For DELETE and MOVE, the working slot is now active as the INVALID
+        * version of the old slot.  MOVE is particularly special as it reuses
+        * the old slot and returns a copy of the old slot (in working_slot).
+        * For CREATE, there is no old slot.  For DELETE and FLAGS_ONLY, the
+        * old slot is detached but otherwise preserved.
+        */
+       if (change == KVM_MR_CREATE)
+               kvm_create_memslot(kvm, new);
+       else if (change == KVM_MR_DELETE)
+               kvm_delete_memslot(kvm, old, invalid_slot);
+       else if (change == KVM_MR_MOVE)
+               kvm_move_memslot(kvm, old, new, invalid_slot);
+       else if (change == KVM_MR_FLAGS_ONLY)
+               kvm_update_flags_memslot(kvm, old, new);
+       else
+               BUG();
 
-       update_memslots(slots, new, change);
-       slots = install_new_memslots(kvm, as_id, slots);
+       /* Free the temporary INVALID slot used for DELETE and MOVE. */
+       if (change == KVM_MR_DELETE || change == KVM_MR_MOVE)
+               kfree(invalid_slot);
 
-       kvm_arch_commit_memory_region(kvm, mem, old, new, change);
+       /*
+        * No need to refresh new->arch, changes after dropping slots_arch_lock
+        * will directly hit the final, active memsot.  Architectures are
+        * responsible for knowing that new->arch may be stale.
+        */
+       kvm_commit_memory_region(kvm, old, new, change);
 
-       kvfree(slots);
        return 0;
-
-out_slots:
-       if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
-               slot = id_to_memslot(slots, old->id);
-               slot->flags &= ~KVM_MEMSLOT_INVALID;
-               slots = install_new_memslots(kvm, as_id, slots);
-       } else {
-               mutex_unlock(&kvm->slots_arch_lock);
-       }
-       kvfree(slots);
-       return r;
 }
 
-static int kvm_delete_memslot(struct kvm *kvm,
-                             const struct kvm_userspace_memory_region *mem,
-                             struct kvm_memory_slot *old, int as_id)
+static bool kvm_check_memslot_overlap(struct kvm_memslots *slots, int id,
+                                     gfn_t start, gfn_t end)
 {
-       struct kvm_memory_slot new;
-       int r;
-
-       if (!old->npages)
-               return -EINVAL;
+       struct kvm_memslot_iter iter;
 
-       memset(&new, 0, sizeof(new));
-       new.id = old->id;
-       /*
-        * This is only for debugging purpose; it should never be referenced
-        * for a removed memslot.
-        */
-       new.as_id = as_id;
-
-       r = kvm_set_memslot(kvm, mem, old, &new, as_id, KVM_MR_DELETE);
-       if (r)
-               return r;
+       kvm_for_each_memslot_in_gfn_range(&iter, slots, start, end) {
+               if (iter.slot->id != id)
+                       return true;
+       }
 
-       kvm_free_memslot(kvm, old);
-       return 0;
+       return false;
 }
 
 /*
@@ -1658,9 +1814,11 @@ static int kvm_delete_memslot(struct kvm *kvm,
 int __kvm_set_memory_region(struct kvm *kvm,
                            const struct kvm_userspace_memory_region *mem)
 {
-       struct kvm_memory_slot old, new;
-       struct kvm_memory_slot *tmp;
+       struct kvm_memory_slot *old, *new;
+       struct kvm_memslots *slots;
        enum kvm_mr_change change;
+       unsigned long npages;
+       gfn_t base_gfn;
        int as_id, id;
        int r;
 
@@ -1672,7 +1830,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
        id = (u16)mem->slot;
 
        /* General sanity checks */
-       if (mem->memory_size & (PAGE_SIZE - 1))
+       if ((mem->memory_size & (PAGE_SIZE - 1)) ||
+           (mem->memory_size != (unsigned long)mem->memory_size))
                return -EINVAL;
        if (mem->guest_phys_addr & (PAGE_SIZE - 1))
                return -EINVAL;
@@ -1686,91 +1845,72 @@ int __kvm_set_memory_region(struct kvm *kvm,
                return -EINVAL;
        if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
                return -EINVAL;
+       if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
+               return -EINVAL;
+
+       slots = __kvm_memslots(kvm, as_id);
 
        /*
-        * Make a full copy of the old memslot, the pointer will become stale
-        * when the memslots are re-sorted by update_memslots(), and the old
-        * memslot needs to be referenced after calling update_memslots(), e.g.
-        * to free its resources and for arch specific behavior.
+        * Note, the old memslot (and the pointer itself!) may be invalidated
+        * and/or destroyed by kvm_set_memslot().
         */
-       tmp = id_to_memslot(__kvm_memslots(kvm, as_id), id);
-       if (tmp) {
-               old = *tmp;
-               tmp = NULL;
-       } else {
-               memset(&old, 0, sizeof(old));
-               old.id = id;
-       }
+       old = id_to_memslot(slots, id);
 
-       if (!mem->memory_size)
-               return kvm_delete_memslot(kvm, mem, &old, as_id);
+       if (!mem->memory_size) {
+               if (!old || !old->npages)
+                       return -EINVAL;
 
-       new.as_id = as_id;
-       new.id = id;
-       new.base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
-       new.npages = mem->memory_size >> PAGE_SHIFT;
-       new.flags = mem->flags;
-       new.userspace_addr = mem->userspace_addr;
+               if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages))
+                       return -EIO;
 
-       if (new.npages > KVM_MEM_MAX_NR_PAGES)
-               return -EINVAL;
+               return kvm_set_memslot(kvm, old, NULL, KVM_MR_DELETE);
+       }
+
+       base_gfn = (mem->guest_phys_addr >> PAGE_SHIFT);
+       npages = (mem->memory_size >> PAGE_SHIFT);
 
-       if (!old.npages) {
+       if (!old || !old->npages) {
                change = KVM_MR_CREATE;
-               new.dirty_bitmap = NULL;
-               memset(&new.arch, 0, sizeof(new.arch));
+
+               /*
+                * To simplify KVM internals, the total number of pages across
+                * all memslots must fit in an unsigned long.
+                */
+               if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
+                       return -EINVAL;
        } else { /* Modify an existing slot. */
-               if ((new.userspace_addr != old.userspace_addr) ||
-                   (new.npages != old.npages) ||
-                   ((new.flags ^ old.flags) & KVM_MEM_READONLY))
+               if ((mem->userspace_addr != old->userspace_addr) ||
+                   (npages != old->npages) ||
+                   ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
                        return -EINVAL;
 
-               if (new.base_gfn != old.base_gfn)
+               if (base_gfn != old->base_gfn)
                        change = KVM_MR_MOVE;
-               else if (new.flags != old.flags)
+               else if (mem->flags != old->flags)
                        change = KVM_MR_FLAGS_ONLY;
                else /* Nothing to change. */
                        return 0;
-
-               /* Copy dirty_bitmap and arch from the current memslot. */
-               new.dirty_bitmap = old.dirty_bitmap;
-               memcpy(&new.arch, &old.arch, sizeof(new.arch));
        }
 
-       if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
-               /* Check for overlaps */
-               kvm_for_each_memslot(tmp, __kvm_memslots(kvm, as_id)) {
-                       if (tmp->id == id)
-                               continue;
-                       if (!((new.base_gfn + new.npages <= tmp->base_gfn) ||
-                             (new.base_gfn >= tmp->base_gfn + tmp->npages)))
-                               return -EEXIST;
-               }
-       }
+       if ((change == KVM_MR_CREATE || change == KVM_MR_MOVE) &&
+           kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages))
+               return -EEXIST;
 
-       /* Allocate/free page dirty bitmap as needed */
-       if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
-               new.dirty_bitmap = NULL;
-       else if (!new.dirty_bitmap && !kvm->dirty_ring_size) {
-               r = kvm_alloc_dirty_bitmap(&new);
-               if (r)
-                       return r;
+       /* Allocate a slot that will persist in the memslot. */
+       new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
+       if (!new)
+               return -ENOMEM;
 
-               if (kvm_dirty_log_manual_protect_and_init_set(kvm))
-                       bitmap_set(new.dirty_bitmap, 0, new.npages);
-       }
+       new->as_id = as_id;
+       new->id = id;
+       new->base_gfn = base_gfn;
+       new->npages = npages;
+       new->flags = mem->flags;
+       new->userspace_addr = mem->userspace_addr;
 
-       r = kvm_set_memslot(kvm, mem, &old, &new, as_id, change);
+       r = kvm_set_memslot(kvm, old, new, change);
        if (r)
-               goto out_bitmap;
-
-       if (old.dirty_bitmap && !new.dirty_bitmap)
-               kvm_destroy_dirty_bitmap(&old);
-       return 0;
-
-out_bitmap:
-       if (new.dirty_bitmap && !old.dirty_bitmap)
-               kvm_destroy_dirty_bitmap(&new);
+               kfree(new);
        return r;
 }
 EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
@@ -2076,21 +2216,30 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot);
 struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
        struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu);
+       u64 gen = slots->generation;
        struct kvm_memory_slot *slot;
-       int slot_index;
 
-       slot = try_get_memslot(slots, vcpu->last_used_slot, gfn);
+       /*
+        * This also protects against using a memslot from a different address space,
+        * since different address spaces have different generation numbers.
+        */
+       if (unlikely(gen != vcpu->last_used_slot_gen)) {
+               vcpu->last_used_slot = NULL;
+               vcpu->last_used_slot_gen = gen;
+       }
+
+       slot = try_get_memslot(vcpu->last_used_slot, gfn);
        if (slot)
                return slot;
 
        /*
         * Fall back to searching all memslots. We purposely use
         * search_memslots() instead of __gfn_to_memslot() to avoid
-        * thrashing the VM-wide last_used_index in kvm_memslots.
+        * thrashing the VM-wide last_used_slot in kvm_memslots.
         */
-       slot = search_memslots(slots, gfn, &slot_index);
+       slot = search_memslots(slots, gfn, false);
        if (slot) {
-               vcpu->last_used_slot = slot_index;
+               vcpu->last_used_slot = slot;
                return slot;
        }
 
@@ -2138,12 +2287,12 @@ out:
        return size;
 }
 
-static bool memslot_is_readonly(struct kvm_memory_slot *slot)
+static bool memslot_is_readonly(const struct kvm_memory_slot *slot)
 {
        return slot->flags & KVM_MEM_READONLY;
 }
 
-static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+static unsigned long __gfn_to_hva_many(const struct kvm_memory_slot *slot, gfn_t gfn,
                                       gfn_t *nr_pages, bool write)
 {
        if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
@@ -2438,7 +2587,7 @@ exit:
        return pfn;
 }
 
-kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
+kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
                               bool atomic, bool *async, bool write_fault,
                               bool *writable, hva_t *hva)
 {
@@ -2478,13 +2627,13 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
 
-kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
+kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
        return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
 
-kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
+kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
        return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL);
 }
@@ -2915,7 +3064,8 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
        int r;
        gpa_t gpa = ghc->gpa + offset;
 
-       BUG_ON(len + offset > ghc->len);
+       if (WARN_ON_ONCE(len + offset > ghc->len))
+               return -EINVAL;
 
        if (slots->generation != ghc->generation) {
                if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
@@ -2952,7 +3102,8 @@ int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
        int r;
        gpa_t gpa = ghc->gpa + offset;
 
-       BUG_ON(len + offset > ghc->len);
+       if (WARN_ON_ONCE(len + offset > ghc->len))
+               return -EINVAL;
 
        if (slots->generation != ghc->generation) {
                if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len))
@@ -3001,7 +3152,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
 void mark_page_dirty_in_slot(struct kvm *kvm,
-                            struct kvm_memory_slot *memslot,
+                            const struct kvm_memory_slot *memslot,
                             gfn_t gfn)
 {
        if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
@@ -3121,68 +3272,93 @@ out:
        return ret;
 }
 
-static inline void
-update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited)
+/*
+ * Block the vCPU until the vCPU is runnable, an event arrives, or a signal is
+ * pending.  This is mostly used when halting a vCPU, but may also be used
+ * directly for other vCPU non-runnable states, e.g. x86's Wait-For-SIPI.
+ */
+bool kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
-       if (waited)
-               vcpu->stat.generic.halt_poll_fail_ns += poll_ns;
-       else
-               vcpu->stat.generic.halt_poll_success_ns += poll_ns;
+       struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
+       bool waited = false;
+
+       vcpu->stat.generic.blocking = 1;
+
+       kvm_arch_vcpu_blocking(vcpu);
+
+       prepare_to_rcuwait(wait);
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if (kvm_vcpu_check_block(vcpu) < 0)
+                       break;
+
+               waited = true;
+               schedule();
+       }
+       finish_rcuwait(wait);
+
+       kvm_arch_vcpu_unblocking(vcpu);
+
+       vcpu->stat.generic.blocking = 0;
+
+       return waited;
+}
+
+static inline void update_halt_poll_stats(struct kvm_vcpu *vcpu, ktime_t start,
+                                         ktime_t end, bool success)
+{
+       struct kvm_vcpu_stat_generic *stats = &vcpu->stat.generic;
+       u64 poll_ns = ktime_to_ns(ktime_sub(end, start));
+
+       ++vcpu->stat.generic.halt_attempted_poll;
+
+       if (success) {
+               ++vcpu->stat.generic.halt_successful_poll;
+
+               if (!vcpu_valid_wakeup(vcpu))
+                       ++vcpu->stat.generic.halt_poll_invalid;
+
+               stats->halt_poll_success_ns += poll_ns;
+               KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_success_hist, poll_ns);
+       } else {
+               stats->halt_poll_fail_ns += poll_ns;
+               KVM_STATS_LOG_HIST_UPDATE(stats->halt_poll_fail_hist, poll_ns);
+       }
 }
 
 /*
- * The vCPU has executed a HLT instruction with in-kernel mode enabled.
+ * Emulate a vCPU halt condition, e.g. HLT on x86, WFI on arm, etc...  If halt
+ * polling is enabled, busy wait for a short time before blocking to avoid the
+ * expensive block+unblock sequence if a wake event arrives soon after the vCPU
+ * is halted.
  */
-void kvm_vcpu_block(struct kvm_vcpu *vcpu)
+void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 {
+       bool halt_poll_allowed = !kvm_arch_no_poll(vcpu);
+       bool do_halt_poll = halt_poll_allowed && vcpu->halt_poll_ns;
        ktime_t start, cur, poll_end;
        bool waited = false;
-       u64 block_ns;
-
-       kvm_arch_vcpu_blocking(vcpu);
+       u64 halt_ns;
 
        start = cur = poll_end = ktime_get();
-       if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
-               ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
+       if (do_halt_poll) {
+               ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns);
 
-               ++vcpu->stat.generic.halt_attempted_poll;
                do {
                        /*
                         * This sets KVM_REQ_UNHALT if an interrupt
                         * arrives.
                         */
-                       if (kvm_vcpu_check_block(vcpu) < 0) {
-                               ++vcpu->stat.generic.halt_successful_poll;
-                               if (!vcpu_valid_wakeup(vcpu))
-                                       ++vcpu->stat.generic.halt_poll_invalid;
-
-                               KVM_STATS_LOG_HIST_UPDATE(
-                                     vcpu->stat.generic.halt_poll_success_hist,
-                                     ktime_to_ns(ktime_get()) -
-                                     ktime_to_ns(start));
+                       if (kvm_vcpu_check_block(vcpu) < 0)
                                goto out;
-                       }
                        cpu_relax();
                        poll_end = cur = ktime_get();
                } while (kvm_vcpu_can_poll(cur, stop));
-
-               KVM_STATS_LOG_HIST_UPDATE(
-                               vcpu->stat.generic.halt_poll_fail_hist,
-                               ktime_to_ns(ktime_get()) - ktime_to_ns(start));
        }
 
+       waited = kvm_vcpu_block(vcpu);
 
-       prepare_to_rcuwait(&vcpu->wait);
-       for (;;) {
-               set_current_state(TASK_INTERRUPTIBLE);
-
-               if (kvm_vcpu_check_block(vcpu) < 0)
-                       break;
-
-               waited = true;
-               schedule();
-       }
-       finish_rcuwait(&vcpu->wait);
        cur = ktime_get();
        if (waited) {
                vcpu->stat.generic.halt_wait_ns +=
@@ -3191,42 +3367,43 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
                                ktime_to_ns(cur) - ktime_to_ns(poll_end));
        }
 out:
-       kvm_arch_vcpu_unblocking(vcpu);
-       block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
+       /* The total time the vCPU was "halted", including polling time. */
+       halt_ns = ktime_to_ns(cur) - ktime_to_ns(start);
 
-       update_halt_poll_stats(
-               vcpu, ktime_to_ns(ktime_sub(poll_end, start)), waited);
+       /*
+        * Note, halt-polling is considered successful so long as the vCPU was
+        * never actually scheduled out, i.e. even if the wake event arrived
+        * after of the halt-polling loop itself, but before the full wait.
+        */
+       if (do_halt_poll)
+               update_halt_poll_stats(vcpu, start, poll_end, !waited);
 
-       if (!kvm_arch_no_poll(vcpu)) {
+       if (halt_poll_allowed) {
                if (!vcpu_valid_wakeup(vcpu)) {
                        shrink_halt_poll_ns(vcpu);
                } else if (vcpu->kvm->max_halt_poll_ns) {
-                       if (block_ns <= vcpu->halt_poll_ns)
+                       if (halt_ns <= vcpu->halt_poll_ns)
                                ;
                        /* we had a long block, shrink polling */
                        else if (vcpu->halt_poll_ns &&
-                                       block_ns > vcpu->kvm->max_halt_poll_ns)
+                                halt_ns > vcpu->kvm->max_halt_poll_ns)
                                shrink_halt_poll_ns(vcpu);
                        /* we had a short halt and our poll time is too small */
                        else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
-                                       block_ns < vcpu->kvm->max_halt_poll_ns)
+                                halt_ns < vcpu->kvm->max_halt_poll_ns)
                                grow_halt_poll_ns(vcpu);
                } else {
                        vcpu->halt_poll_ns = 0;
                }
        }
 
-       trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
-       kvm_arch_vcpu_block_finish(vcpu);
+       trace_kvm_vcpu_wakeup(halt_ns, waited, vcpu_valid_wakeup(vcpu));
 }
-EXPORT_SYMBOL_GPL(kvm_vcpu_block);
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
 
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 {
-       struct rcuwait *waitp;
-
-       waitp = kvm_arch_vcpu_get_wait(vcpu);
-       if (rcuwait_wake_up(waitp)) {
+       if (__kvm_vcpu_wake_up(vcpu)) {
                WRITE_ONCE(vcpu->ready, true);
                ++vcpu->stat.generic.halt_wakeup;
                return true;
@@ -3247,6 +3424,19 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
        if (kvm_vcpu_wake_up(vcpu))
                return;
 
+       me = get_cpu();
+       /*
+        * The only state change done outside the vcpu mutex is IN_GUEST_MODE
+        * to EXITING_GUEST_MODE.  Therefore the moderately expensive "should
+        * kick" check does not need atomic operations if kvm_vcpu_kick is used
+        * within the vCPU thread itself.
+        */
+       if (vcpu == __this_cpu_read(kvm_running_vcpu)) {
+               if (vcpu->mode == IN_GUEST_MODE)
+                       WRITE_ONCE(vcpu->mode, EXITING_GUEST_MODE);
+               goto out;
+       }
+
        /*
         * Note, the vCPU could get migrated to a different pCPU at any point
         * after kvm_arch_vcpu_should_kick(), which could result in sending an
@@ -3254,12 +3444,12 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
         * IPI is to force the vCPU to leave IN_GUEST_MODE, and migrating the
         * vCPU also requires it to leave IN_GUEST_MODE.
         */
-       me = get_cpu();
        if (kvm_arch_vcpu_should_kick(vcpu)) {
                cpu = READ_ONCE(vcpu->cpu);
                if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
                        smp_send_reschedule(cpu);
        }
+out:
        put_cpu();
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
@@ -3357,10 +3547,10 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
        struct kvm *kvm = me->kvm;
        struct kvm_vcpu *vcpu;
        int last_boosted_vcpu = me->kvm->last_boosted_vcpu;
+       unsigned long i;
        int yielded = 0;
        int try = 3;
        int pass;
-       int i;
 
        kvm_vcpu_set_in_spin_loop(me, true);
        /*
@@ -3381,8 +3571,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
                                continue;
                        if (vcpu == me)
                                continue;
-                       if (rcuwait_active(&vcpu->wait) &&
-                           !vcpu_dy_runnable(vcpu))
+                       if (kvm_vcpu_is_blocking(vcpu) && !vcpu_dy_runnable(vcpu))
                                continue;
                        if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
                            !kvm_arch_dy_has_pending_interrupt(vcpu) &&
@@ -3411,7 +3600,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
 
 static bool kvm_page_in_dirty_ring(struct kvm *kvm, unsigned long pgoff)
 {
-#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+#ifdef CONFIG_HAVE_KVM_DIRTY_RING
        return (pgoff >= KVM_DIRTY_LOG_PAGE_OFFSET) &&
            (pgoff < KVM_DIRTY_LOG_PAGE_OFFSET +
             kvm->dirty_ring_size / PAGE_SIZE);
@@ -3567,7 +3756,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
        }
 
        vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
-       BUG_ON(kvm->vcpus[vcpu->vcpu_idx]);
+       r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT);
+       BUG_ON(r == -EBUSY);
+       if (r)
+               goto unlock_vcpu_destroy;
 
        /* Fill the stats id string for the vcpu */
        snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
@@ -3577,15 +3769,14 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
        kvm_get_kvm(kvm);
        r = create_vcpu_fd(vcpu);
        if (r < 0) {
+               xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx);
                kvm_put_kvm_no_destroy(kvm);
                goto unlock_vcpu_destroy;
        }
 
-       kvm->vcpus[vcpu->vcpu_idx] = vcpu;
-
        /*
-        * Pairs with smp_rmb() in kvm_get_vcpu.  Write kvm->vcpus
-        * before kvm->online_vcpu's incremented value.
+        * Pairs with smp_rmb() in kvm_get_vcpu.  Store the vcpu
+        * pointer before kvm->online_vcpu's incremented value.
         */
        smp_wmb();
        atomic_inc(&kvm->online_vcpus);
@@ -4114,7 +4305,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
        case KVM_CAP_NR_MEMSLOTS:
                return KVM_USER_MEM_SLOTS;
        case KVM_CAP_DIRTY_LOG_RING:
-#if KVM_DIRTY_LOG_PAGE_OFFSET > 0
+#ifdef CONFIG_HAVE_KVM_DIRTY_RING
                return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
 #else
                return 0;
@@ -4167,7 +4358,7 @@ static int kvm_vm_ioctl_enable_dirty_log_ring(struct kvm *kvm, u32 size)
 
 static int kvm_vm_ioctl_reset_dirty_pages(struct kvm *kvm)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
        int cleared = 0;
 
@@ -5086,7 +5277,7 @@ static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset)
 
 static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        *val = 0;
@@ -5099,7 +5290,7 @@ static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val)
 
 static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset)
 {
-       int i;
+       unsigned long i;
        struct kvm_vcpu *vcpu;
 
        kvm_for_each_vcpu(i, vcpu, kvm)