Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
authorDavid S. Miller <davem@davemloft.net>
Sun, 17 Nov 2019 02:47:31 +0000 (18:47 -0800)
committerDavid S. Miller <davem@davemloft.net>
Sun, 17 Nov 2019 05:51:42 +0000 (21:51 -0800)
Lots of overlapping changes and parallel additions, stuff
like that.

Signed-off-by: David S. Miller <davem@davemloft.net>
245 files changed:
.mailmap
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/admin-guide/hw-vuln/index.rst
Documentation/admin-guide/hw-vuln/multihit.rst [new file with mode: 0644]
Documentation/admin-guide/hw-vuln/tsx_async_abort.rst [new file with mode: 0644]
Documentation/admin-guide/kernel-parameters.txt
Documentation/x86/index.rst
Documentation/x86/tsx_async_abort.rst [new file with mode: 0644]
MAINTAINERS
Makefile
arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi
arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
arch/arm/boot/dts/stm32mp157c-ev1.dts
arch/arm/boot/dts/stm32mp157c.dtsi
arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
arch/arm/mach-sunxi/mc_smp.c
arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
arch/arm64/boot/dts/freescale/imx8mm.dtsi
arch/arm64/boot/dts/freescale/imx8mn.dtsi
arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
arch/arm64/include/asm/vdso/vsyscall.h
arch/mips/include/asm/vdso/vsyscall.h
arch/mips/sgi-ip27/Kconfig
arch/mips/sgi-ip27/ip27-init.c
arch/mips/sgi-ip27/ip27-memory.c
arch/sparc/vdso/Makefile
arch/x86/Kconfig
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/processor.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/Makefile
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cpu.h
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/resctrl/ctrlmondata.c
arch/x86/kernel/cpu/resctrl/rdtgroup.c
arch/x86/kernel/cpu/tsx.c [new file with mode: 0644]
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/early-quirks.c
arch/x86/kernel/tsc.c
arch/x86/kvm/mmu.c
arch/x86/kvm/mmu.h
arch/x86/kvm/paging_tmpl.h
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
block/bfq-iosched.c
block/bio.c
block/blk-iocost.c
drivers/base/cpu.c
drivers/base/memory.c
drivers/block/rbd.c
drivers/block/rsxx/core.c
drivers/char/hw_random/core.c
drivers/char/random.c
drivers/clocksource/sh_mtu2.c
drivers/clocksource/timer-mediatek.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/i915/display/intel_display_power.c
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gem/i915_gem_context_types.h
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gt/intel_engine_types.h
drivers/gpu/drm/i915/gt/intel_gt_pm.c
drivers/gpu/drm/i915/gt/intel_mocs.c
drivers/gpu/drm/i915/gvt/dmabuf.c
drivers/gpu/drm/i915/i915_cmd_parser.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_getparam.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_pm.h
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/hwtracing/intel_th/gth.c
drivers/hwtracing/intel_th/msu.c
drivers/hwtracing/intel_th/pci.c
drivers/iio/adc/stm32-adc.c
drivers/iio/imu/adis16480.c
drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
drivers/iio/proximity/srf04.c
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/tid_rdma.h
drivers/infiniband/hw/hns/hns_roce_hem.h
drivers/infiniband/hw/hns/hns_roce_srq.c
drivers/input/ff-memless.c
drivers/input/mouse/synaptics.c
drivers/input/rmi4/rmi_f11.c
drivers/input/rmi4/rmi_f12.c
drivers/input/rmi4/rmi_f54.c
drivers/input/touchscreen/cyttsp4_core.c
drivers/interconnect/core.c
drivers/interconnect/qcom/qcs404.c
drivers/interconnect/qcom/sdm845.c
drivers/mmc/host/sdhci-of-at91.c
drivers/net/can/slcan.c
drivers/net/dsa/mv88e6xxx/ptp.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/cirrus/ep93xx_eth.c
drivers/net/ethernet/cortina/gemini.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/marvell/octeontx2/af/cgx.h
drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
drivers/net/ethernet/marvell/octeontx2/af/common.h
drivers/net/ethernet/marvell/octeontx2/af/mbox.h
drivers/net/ethernet/marvell/octeontx2/af/npc.h
drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
drivers/net/ethernet/marvell/octeontx2/af/rvu.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/microchip/lan743x_ptp.c
drivers/net/ethernet/renesas/ravb.h
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/ravb_ptp.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/dwmac5.h
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
drivers/net/ethernet/stmicro/stmmac/hwif.h
drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
drivers/net/phy/dp83640.c
drivers/net/phy/mdio_bus.c
drivers/net/slip/slip.c
drivers/net/usb/ax88172a.c
drivers/net/usb/cdc_ncm.c
drivers/net/usb/qmi_wwan.c
drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
drivers/nfc/nxp-nci/i2c.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/intel/pinctrl-intel.c
drivers/pinctrl/pinctrl-stmfx.c
drivers/ptp/ptp_chardev.c
drivers/reset/core.c
drivers/scsi/qla2xxx/qla_mid.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/scsi_lib.c
drivers/scsi/sd_zbc.c
drivers/soc/imx/gpc.c
drivers/soundwire/Kconfig
drivers/soundwire/intel.c
drivers/soundwire/slave.c
drivers/thunderbolt/nhi_ops.c
drivers/thunderbolt/switch.c
drivers/watchdog/bd70528_wdt.c
drivers/watchdog/cpwd.c
drivers/watchdog/imx_sc_wdt.c
drivers/watchdog/meson_gxbb_wdt.c
drivers/watchdog/pm8916_wdt.c
fs/afs/dir.c
fs/aio.c
fs/autofs/expire.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/space-info.c
fs/btrfs/tree-checker.c
fs/btrfs/volumes.c
fs/ceph/file.c
fs/cifs/smb2pdu.h
fs/configfs/symlink.c
fs/ecryptfs/inode.c
fs/exportfs/expfs.c
fs/io_uring.c
fs/namespace.c
include/asm-generic/vdso/vsyscall.h
include/linux/can/core.h
include/linux/cpu.h
include/linux/kvm_host.h
include/linux/memory.h
include/linux/reset-controller.h
include/linux/reset.h
include/trace/events/tcp.h
include/uapi/linux/devlink.h
include/uapi/linux/ptp_clock.h
kernel/audit_watch.c
kernel/cgroup/cgroup.c
kernel/cpu.c
kernel/events/core.c
kernel/irq/irqdomain.c
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sched/stop_task.c
kernel/signal.c
kernel/stacktrace.c
kernel/time/ntp.c
kernel/time/vsyscall.c
lib/Kconfig
lib/xz/xz_dec_lzma2.c
mm/debug.c
mm/hugetlb_cgroup.c
mm/khugepaged.c
mm/madvise.c
mm/memcontrol.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/page_io.c
mm/slub.c
net/can/af_can.c
net/can/j1939/main.c
net/can/j1939/socket.c
net/can/j1939/transport.c
net/core/devlink.c
net/dsa/tag_8021q.c
net/ipv4/ipmr.c
net/ipv6/seg6_local.c
net/rds/ib_cm.c
net/smc/af_smc.c
net/tipc/core.c
net/tipc/core.h
net/xfrm/xfrm_input.c
net/xfrm/xfrm_state.c
scripts/tools-support-relr.sh
sound/core/pcm_lib.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_hdmi.c
sound/usb/endpoint.c
sound/usb/mixer.c
sound/usb/quirks.c
sound/usb/validate.c
tools/perf/util/hist.c
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/trace-event-parse.c
tools/perf/util/trace-event.h
tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
tools/testing/selftests/kvm/lib/assert.c
tools/testing/selftests/ptp/testptp.c
virt/kvm/kvm_main.c

index 83d7e75..fd62192 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -108,6 +108,10 @@ Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
 Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
 Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
 <javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
+Jayachandran C <c.jayachandran@gmail.com> <jayachandranc@netlogicmicro.com>
+Jayachandran C <c.jayachandran@gmail.com> <jchandra@broadcom.com>
+Jayachandran C <c.jayachandran@gmail.com> <jchandra@digeo.com>
+Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com>
 Jean Tourrilhes <jt@hpl.hp.com>
 <jean-philippe@linaro.org> <jean-philippe.brucker@arm.com>
 Jeff Garzik <jgarzik@pretzel.yyz.us>
index 06d0931..fc20cde 100644 (file)
@@ -486,6 +486,8 @@ What:               /sys/devices/system/cpu/vulnerabilities
                /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
                /sys/devices/system/cpu/vulnerabilities/l1tf
                /sys/devices/system/cpu/vulnerabilities/mds
+               /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+               /sys/devices/system/cpu/vulnerabilities/itlb_multihit
 Date:          January 2018
 Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   Information about CPU vulnerabilities
index 49311f3..0795e3c 100644 (file)
@@ -12,3 +12,5 @@ are configurable at compile, boot or run time.
    spectre
    l1tf
    mds
+   tsx_async_abort
+   multihit.rst
diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst
new file mode 100644 (file)
index 0000000..ba9988d
--- /dev/null
@@ -0,0 +1,163 @@
+iTLB multihit
+=============
+
+iTLB multihit is an erratum where some processors may incur a machine check
+error, possibly resulting in an unrecoverable CPU lockup, when an
+instruction fetch hits multiple entries in the instruction TLB. This can
+occur when the page size is changed along with either the physical address
+or cache type. A malicious guest running on a virtualized system can
+exploit this erratum to perform a denial of service attack.
+
+
+Affected processors
+-------------------
+
+Variations of this erratum are present on most Intel Core and Xeon processor
+models. The erratum is not present on:
+
+   - non-Intel processors
+
+   - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
+
+   - Intel processors that have the PSCHANGE_MC_NO bit set in the
+     IA32_ARCH_CAPABILITIES MSR.
+
+
+Related CVEs
+------------
+
+The following CVE entry is related to this issue:
+
+   ==============  =================================================
+   CVE-2018-12207  Machine Check Error Avoidance on Page Size Change
+   ==============  =================================================
+
+
+Problem
+-------
+
+Privileged software, including OS and virtual machine managers (VMM), are in
+charge of memory management. A key component in memory management is the control
+of the page tables. Modern processors use virtual memory, a technique that creates
+the illusion of a very large memory for processors. This virtual space is split
+into pages of a given size. Page tables translate virtual addresses to physical
+addresses.
+
+To reduce latency when performing a virtual to physical address translation,
+processors include a structure, called TLB, that caches recent translations.
+There are separate TLBs for instruction (iTLB) and data (dTLB).
+
+Under this errata, instructions are fetched from a linear address translated
+using a 4 KB translation cached in the iTLB. Privileged software modifies the
+paging structure so that the same linear address using large page size (2 MB, 4
+MB, 1 GB) with a different physical address or memory type.  After the page
+structure modification but before the software invalidates any iTLB entries for
+the linear address, a code fetch that happens on the same linear address may
+cause a machine-check error which can result in a system hang or shutdown.
+
+
+Attack scenarios
+----------------
+
+Attacks against the iTLB multihit erratum can be mounted from malicious
+guests in a virtualized system.
+
+
+iTLB multihit system information
+--------------------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current iTLB
+multihit status of the system:whether the system is vulnerable and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/itlb_multihit
+
+The possible values in this file are:
+
+.. list-table::
+
+     * - Not affected
+       - The processor is not vulnerable.
+     * - KVM: Mitigation: Split huge pages
+       - Software changes mitigate this issue.
+     * - KVM: Vulnerable
+       - The processor is vulnerable, but no mitigation enabled
+
+
+Enumeration of the erratum
+--------------------------------
+
+A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
+and will be set on CPU's which are mitigated against this issue.
+
+   =======================================   ===========   ===============================
+   IA32_ARCH_CAPABILITIES MSR                Not present   Possibly vulnerable,check model
+   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '0'           Likely vulnerable,check model
+   IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO]    '1'           Not vulnerable
+   =======================================   ===========   ===============================
+
+
+Mitigation mechanism
+-------------------------
+
+This erratum can be mitigated by restricting the use of large page sizes to
+non-executable pages.  This forces all iTLB entries to be 4K, and removes
+the possibility of multiple hits.
+
+In order to mitigate the vulnerability, KVM initially marks all huge pages
+as non-executable. If the guest attempts to execute in one of those pages,
+the page is broken down into 4K pages, which are then marked executable.
+
+If EPT is disabled or not available on the host, KVM is in control of TLB
+flushes and the problematic situation cannot happen.  However, the shadow
+EPT paging mechanism used by nested virtualization is vulnerable, because
+the nested guest can trigger multiple iTLB hits by modifying its own
+(non-nested) page tables.  For simplicity, KVM will make large pages
+non-executable in all shadow paging modes.
+
+Mitigation control on the kernel command line and KVM - module parameter
+------------------------------------------------------------------------
+
+The KVM hypervisor mitigation mechanism for marking huge pages as
+non-executable can be controlled with a module parameter "nx_huge_pages=".
+The kernel command line allows to control the iTLB multihit mitigations at
+boot time with the option "kvm.nx_huge_pages=".
+
+The valid arguments for these options are:
+
+  ==========  ================================================================
+  force       Mitigation is enabled. In this case, the mitigation implements
+              non-executable huge pages in Linux kernel KVM module. All huge
+              pages in the EPT are marked as non-executable.
+              If a guest attempts to execute in one of those pages, the page is
+              broken down into 4K pages, which are then marked executable.
+
+  off        Mitigation is disabled.
+
+  auto        Enable mitigation only if the platform is affected and the kernel
+              was not booted with the "mitigations=off" command line parameter.
+             This is the default option.
+  ==========  ================================================================
+
+
+Mitigation selection guide
+--------------------------
+
+1. No virtualization in use
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   The system is protected by the kernel unconditionally and no further
+   action is required.
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+   If the guest comes from a trusted source, you may assume that the guest will
+   not attempt to maliciously exploit these errata and no further action is
+   required.
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   If the guest comes from an untrusted source, the guest host kernel will need
+   to apply iTLB multihit mitigation via the kernel command line or kvm
+   module parameter.
diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
new file mode 100644 (file)
index 0000000..fddbd75
--- /dev/null
@@ -0,0 +1,276 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TAA - TSX Asynchronous Abort
+======================================
+
+TAA is a hardware vulnerability that allows unprivileged speculative access to
+data which is available in various CPU internal buffers by using asynchronous
+aborts within an Intel TSX transactional region.
+
+Affected processors
+-------------------
+
+This vulnerability only affects Intel processors that support Intel
+Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
+is 0 in the IA32_ARCH_CAPABILITIES MSR.  On processors where the MDS_NO bit
+(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
+also mitigate against TAA.
+
+Whether a processor is affected or not can be read out from the TAA
+vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entry is related to this TAA issue:
+
+   ==============  =====  ===================================================
+   CVE-2019-11135  TAA    TSX Asynchronous Abort (TAA) condition on some
+                          microprocessors utilizing speculative execution may
+                          allow an authenticated user to potentially enable
+                          information disclosure via a side channel with
+                          local access.
+   ==============  =====  ===================================================
+
+Problem
+-------
+
+When performing store, load or L1 refill operations, processors write
+data into temporary microarchitectural structures (buffers). The data in
+those buffers can be forwarded to load operations as an optimization.
+
+Intel TSX is an extension to the x86 instruction set architecture that adds
+hardware transactional memory support to improve performance of multi-threaded
+software. TSX lets the processor expose and exploit concurrency hidden in an
+application due to dynamically avoiding unnecessary synchronization.
+
+TSX supports atomic memory transactions that are either committed (success) or
+aborted. During an abort, operations that happened within the transactional region
+are rolled back. An asynchronous abort takes place, among other options, when a
+different thread accesses a cache line that is also used within the transactional
+region when that access might lead to a data race.
+
+Immediately after an uncompleted asynchronous abort, certain speculatively
+executed loads may read data from those internal buffers and pass it to dependent
+operations. This can be then used to infer the value via a cache side channel
+attack.
+
+Because the buffers are potentially shared between Hyper-Threads cross
+Hyper-Thread attacks are possible.
+
+The victim of a malicious actor does not need to make use of TSX. Only the
+attacker needs to begin a TSX transaction and raise an asynchronous abort
+which in turn potenitally leaks data stored in the buffers.
+
+More detailed technical information is available in the TAA specific x86
+architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
+
+
+Attack scenarios
+----------------
+
+Attacks against the TAA vulnerability can be implemented from unprivileged
+applications running on hosts or guests.
+
+As for MDS, the attacker has no control over the memory addresses that can
+be leaked. Only the victim is responsible for bringing data to the CPU. As
+a result, the malicious actor has to sample as much data as possible and
+then postprocess it to try to infer any useful information from it.
+
+A potential attacker only has read access to the data. Also, there is no direct
+privilege escalation by using this technique.
+
+
+.. _tsx_async_abort_sys_info:
+
+TAA system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current TAA status
+of mitigated systems. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+
+The possible values in this file are:
+
+.. list-table::
+
+   * - 'Vulnerable'
+     - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
+   * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+     - The system tries to clear the buffers but the microcode might not support the operation.
+   * - 'Mitigation: Clear CPU buffers'
+     - The microcode has been updated to clear the buffers. TSX is still enabled.
+   * - 'Mitigation: TSX disabled'
+     - TSX is disabled.
+   * - 'Not affected'
+     - The CPU is not affected by this issue.
+
+.. _ucode_needed:
+
+Best effort mitigation mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If the processor is vulnerable, but the availability of the microcode-based
+mitigation mechanism is not advertised via CPUID the kernel selects a best
+effort mitigation mode.  This mode invokes the mitigation instructions
+without a guarantee that they clear the CPU buffers.
+
+This is done to address virtualization scenarios where the host has the
+microcode update applied, but the hypervisor is not yet updated to expose the
+CPUID to the guest. If the host has updated microcode the protection takes
+effect; otherwise a few CPU cycles are wasted pointlessly.
+
+The state in the tsx_async_abort sysfs file reflects this situation
+accordingly.
+
+
+Mitigation mechanism
+--------------------
+
+The kernel detects the affected CPUs and the presence of the microcode which is
+required. If a CPU is affected and the microcode is available, then the kernel
+enables the mitigation by default.
+
+
+The mitigation can be controlled at boot time via a kernel command line option.
+See :ref:`taa_mitigation_control_command_line`.
+
+.. _virt_mechanism:
+
+Virtualization mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Affected systems where the host has TAA microcode and TAA is mitigated by
+having disabled TSX previously, are not vulnerable regardless of the status
+of the VMs.
+
+In all other cases, if the host either does not have the TAA microcode or
+the kernel is not mitigated, the system might be vulnerable.
+
+
+.. _taa_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the TAA mitigations at boot time with
+the option "tsx_async_abort=". The valid arguments for this option are:
+
+  ============  =============================================================
+  off          This option disables the TAA mitigation on affected platforms.
+                If the system has TSX enabled (see next parameter) and the CPU
+                is affected, the system is vulnerable.
+
+  full         TAA mitigation is enabled. If TSX is enabled, on an affected
+                system it will clear CPU buffers on ring transitions. On
+                systems which are MDS-affected and deploy MDS mitigation,
+                TAA is also mitigated. Specifying this option on those
+                systems will have no effect.
+
+  full,nosmt    The same as tsx_async_abort=full, with SMT disabled on
+                vulnerable CPUs that have TSX enabled. This is the complete
+                mitigation. When TSX is disabled, SMT is not disabled because
+                CPU is not vulnerable to cross-thread TAA attacks.
+  ============  =============================================================
+
+Not specifying this option is equivalent to "tsx_async_abort=full".
+
+The kernel command line also allows to control the TSX feature using the
+parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
+to control the TSX feature and the enumeration of the TSX feature bits (RTM
+and HLE) in CPUID.
+
+The valid options are:
+
+  ============  =============================================================
+  off          Disables TSX on the system.
+
+                Note that this option takes effect only on newer CPUs which are
+                not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
+                and which get the new IA32_TSX_CTRL MSR through a microcode
+                update. This new MSR allows for the reliable deactivation of
+                the TSX functionality.
+
+  on           Enables TSX.
+
+                Although there are mitigations for all known security
+                vulnerabilities, TSX has been known to be an accelerator for
+                several previous speculation-related CVEs, and so there may be
+                unknown security risks associated with leaving it enabled.
+
+  auto         Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
+                on the system.
+  ============  =============================================================
+
+Not specifying this option is equivalent to "tsx=off".
+
+The following combinations of the "tsx_async_abort" and "tsx" are possible. For
+affected platforms tsx=auto is equivalent to tsx=off and the result will be:
+
+  =========  ==========================   =========================================
+  tsx=on     tsx_async_abort=full         The system will use VERW to clear CPU
+                                          buffers. Cross-thread attacks are still
+                                         possible on SMT machines.
+  tsx=on     tsx_async_abort=full,nosmt   As above, cross-thread attacks on SMT
+                                          mitigated.
+  tsx=on     tsx_async_abort=off          The system is vulnerable.
+  tsx=off    tsx_async_abort=full         TSX might be disabled if microcode
+                                          provides a TSX control MSR. If so,
+                                         system is not vulnerable.
+  tsx=off    tsx_async_abort=full,nosmt   Ditto
+  tsx=off    tsx_async_abort=off          ditto
+  =========  ==========================   =========================================
+
+
+For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
+buffers.  For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
+"tsx" command line argument has no effect.
+
+For the affected platforms below table indicates the mitigation status for the
+combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
+and TSX_CTRL_MSR.
+
+  =======  =========  =============  ========================================
+  MDS_NO   MD_CLEAR   TSX_CTRL_MSR   Status
+  =======  =========  =============  ========================================
+    0          0            0        Vulnerable (needs microcode)
+    0          1            0        MDS and TAA mitigated via VERW
+    1          1            0        MDS fixed, TAA vulnerable if TSX enabled
+                                     because MD_CLEAR has no meaning and
+                                     VERW is not guaranteed to clear buffers
+    1          X            1        MDS fixed, TAA can be mitigated by
+                                     VERW or TSX_CTRL_MSR
+  =======  =========  =============  ========================================
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If all user space applications are from a trusted source and do not execute
+untrusted code which is supplied externally, then the mitigation can be
+disabled. The same applies to virtualized environments with trusted guests.
+
+
+2. Untrusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If there are untrusted applications or guests on the system, enabling TSX
+might allow a malicious actor to leak data from the host or from other
+processes running on the same physical core.
+
+If the microcode is available and the TSX is disabled on the host, attacks
+are prevented in a virtualized environment as well, even if the VMs do not
+explicitly enable the mitigation.
+
+
+.. _taa_default_mitigations:
+
+Default mitigations
+-------------------
+
+The kernel's default action for vulnerable processors is:
+
+  - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).
index a84a83f..8dee8f6 100644 (file)
                        KVM MMU at runtime.
                        Default is 0 (off)
 
+       kvm.nx_huge_pages=
+                       [KVM] Controls the software workaround for the
+                       X86_BUG_ITLB_MULTIHIT bug.
+                       force   : Always deploy workaround.
+                       off     : Never deploy workaround.
+                       auto    : Deploy workaround based on the presence of
+                                 X86_BUG_ITLB_MULTIHIT.
+
+                       Default is 'auto'.
+
+                       If the software workaround is enabled for the host,
+                       guests do need not to enable it for nested guests.
+
+       kvm.nx_huge_pages_recovery_ratio=
+                       [KVM] Controls how many 4KiB pages are periodically zapped
+                       back to huge pages.  0 disables the recovery, otherwise if
+                       the value is N KVM will zap 1/Nth of the 4KiB pages every
+                       minute.  The default is 60.
+
        kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
                        Default is 1 (enabled)
 
                                               ssbd=force-off [ARM64]
                                               l1tf=off [X86]
                                               mds=off [X86]
+                                              tsx_async_abort=off [X86]
+                                              kvm.nx_huge_pages=off [X86]
+
+                               Exceptions:
+                                              This does not have any effect on
+                                              kvm.nx_huge_pages when
+                                              kvm.nx_huge_pages=force.
 
                        auto (default)
                                Mitigate all CPU vulnerabilities, but leave SMT
                                be fully mitigated, even if it means losing SMT.
                                Equivalent to: l1tf=flush,nosmt [X86]
                                               mds=full,nosmt [X86]
+                                              tsx_async_abort=full,nosmt [X86]
 
        mminit_loglevel=
                        [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
                        interruptions from clocksource watchdog are not
                        acceptable).
 
+       tsx=            [X86] Control Transactional Synchronization
+                       Extensions (TSX) feature in Intel processors that
+                       support TSX control.
+
+                       This parameter controls the TSX feature. The options are:
+
+                       on      - Enable TSX on the system. Although there are
+                               mitigations for all known security vulnerabilities,
+                               TSX has been known to be an accelerator for
+                               several previous speculation-related CVEs, and
+                               so there may be unknown security risks associated
+                               with leaving it enabled.
+
+                       off     - Disable TSX on the system. (Note that this
+                               option takes effect only on newer CPUs which are
+                               not vulnerable to MDS, i.e., have
+                               MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
+                               the new IA32_TSX_CTRL MSR through a microcode
+                               update. This new MSR allows for the reliable
+                               deactivation of the TSX functionality.)
+
+                       auto    - Disable TSX if X86_BUG_TAA is present,
+                                 otherwise enable TSX on the system.
+
+                       Not specifying this option is equivalent to tsx=off.
+
+                       See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+                       for more details.
+
+       tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
+                       Abort (TAA) vulnerability.
+
+                       Similar to Micro-architectural Data Sampling (MDS)
+                       certain CPUs that support Transactional
+                       Synchronization Extensions (TSX) are vulnerable to an
+                       exploit against CPU internal buffers which can forward
+                       information to a disclosure gadget under certain
+                       conditions.
+
+                       In vulnerable processors, the speculatively forwarded
+                       data can be used in a cache side channel attack, to
+                       access data to which the attacker does not have direct
+                       access.
+
+                       This parameter controls the TAA mitigation.  The
+                       options are:
+
+                       full       - Enable TAA mitigation on vulnerable CPUs
+                                    if TSX is enabled.
+
+                       full,nosmt - Enable TAA mitigation and disable SMT on
+                                    vulnerable CPUs. If TSX is disabled, SMT
+                                    is not disabled because CPU is not
+                                    vulnerable to cross-thread TAA attacks.
+                       off        - Unconditionally disable TAA mitigation
+
+                       Not specifying this option is equivalent to
+                       tsx_async_abort=full.  On CPUs which are MDS affected
+                       and deploy MDS mitigation, TAA mitigation is not
+                       required and doesn't provide any additional
+                       mitigation.
+
+                       For details see:
+                       Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+
        turbografx.map[2|3]=    [HW,JOY]
                        TurboGraFX parallel port interface
                        Format:
index af64c4b..a8de2fb 100644 (file)
@@ -27,6 +27,7 @@ x86-specific Documentation
    mds
    microcode
    resctrl_ui
+   tsx_async_abort
    usb-legacy-support
    i386/index
    x86_64/index
diff --git a/Documentation/x86/tsx_async_abort.rst b/Documentation/x86/tsx_async_abort.rst
new file mode 100644 (file)
index 0000000..583ddc1
--- /dev/null
@@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TSX Async Abort (TAA) mitigation
+================================
+
+.. _tsx_async_abort:
+
+Overview
+--------
+
+TSX Async Abort (TAA) is a side channel attack on internal buffers in some
+Intel processors similar to Microachitectural Data Sampling (MDS).  In this
+case certain loads may speculatively pass invalid data to dependent operations
+when an asynchronous abort condition is pending in a Transactional
+Synchronization Extensions (TSX) transaction.  This includes loads with no
+fault or assist condition. Such loads may speculatively expose stale data from
+the same uarch data structures as in MDS, with same scope of exposure i.e.
+same-thread and cross-thread. This issue affects all current processors that
+support TSX.
+
+Mitigation strategy
+-------------------
+
+a) TSX disable - one of the mitigations is to disable TSX. A new MSR
+IA32_TSX_CTRL will be available in future and current processors after
+microcode update which can be used to disable TSX. In addition, it
+controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
+
+b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
+vulnerability. More details on this approach can be found in
+:ref:`Documentation/admin-guide/hw-vuln/mds.rst <mds>`.
+
+Kernel internal mitigation modes
+--------------------------------
+
+ =============    ============================================================
+ off              Mitigation is disabled. Either the CPU is not affected or
+                  tsx_async_abort=off is supplied on the kernel command line.
+
+ tsx disabled     Mitigation is enabled. TSX feature is disabled by default at
+                  bootup on processors that support TSX control.
+
+ verw             Mitigation is enabled. CPU is affected and MD_CLEAR is
+                  advertised in CPUID.
+
+ ucode needed     Mitigation is enabled. CPU is affected and MD_CLEAR is not
+                  advertised in CPUID. That is mainly for virtualization
+                  scenarios where the host has the updated microcode but the
+                  hypervisor does not expose MD_CLEAR in CPUID. It's a best
+                  effort approach without guarantee.
+ =============    ============================================================
+
+If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
+not provided then the kernel selects an appropriate mitigation depending on the
+status of RTM and MD_CLEAR CPUID bits.
+
+Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
+TAA mitigation, VERW behavior and TSX feature for various combinations of
+MSR_IA32_ARCH_CAPABILITIES bits.
+
+1. "tsx=off"
+
+=========  =========  ============  ============  ==============  ===================  ======================
+MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=off
+----------------------------------  -------------------------------------------------------------------------
+TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
+                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
+=========  =========  ============  ============  ==============  ===================  ======================
+    0          0           0         HW default         Yes           Same as MDS           Same as MDS
+    0          0           1        Invalid case   Invalid case       Invalid case          Invalid case
+    0          1           0         HW default         No         Need ucode update     Need ucode update
+    0          1           1          Disabled          Yes           TSX disabled          TSX disabled
+    1          X           1          Disabled           X             None needed           None needed
+=========  =========  ============  ============  ==============  ===================  ======================
+
+2. "tsx=on"
+
+=========  =========  ============  ============  ==============  ===================  ======================
+MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=on
+----------------------------------  -------------------------------------------------------------------------
+TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
+                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
+=========  =========  ============  ============  ==============  ===================  ======================
+    0          0           0         HW default        Yes            Same as MDS          Same as MDS
+    0          0           1        Invalid case   Invalid case       Invalid case         Invalid case
+    0          1           0         HW default        No          Need ucode update     Need ucode update
+    0          1           1          Enabled          Yes               None              Same as MDS
+    1          X           1          Enabled          X              None needed          None needed
+=========  =========  ============  ============  ==============  ===================  ======================
+
+3. "tsx=auto"
+
+=========  =========  ============  ============  ==============  ===================  ======================
+MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=auto
+----------------------------------  -------------------------------------------------------------------------
+TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
+                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
+=========  =========  ============  ============  ==============  ===================  ======================
+    0          0           0         HW default    Yes                Same as MDS           Same as MDS
+    0          0           1        Invalid case  Invalid case        Invalid case          Invalid case
+    0          1           0         HW default    No              Need ucode update     Need ucode update
+    0          1           1          Disabled      Yes               TSX disabled          TSX disabled
+    1          X           1          Enabled       X                 None needed           None needed
+=========  =========  ============  ============  ==============  ===================  ======================
+
+In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
+indicates whether MSR_IA32_TSX_CTRL is supported.
+
+There are two control bits in IA32_TSX_CTRL MSR:
+
+      Bit 0: When set it disables the Restricted Transactional Memory (RTM)
+             sub-feature of TSX (will force all transactions to abort on the
+             XBEGIN instruction).
+
+      Bit 1: When set it disables the enumeration of the RTM and HLE feature
+             (i.e. it will make CPUID(EAX=7).EBX{bit4} and
+             CPUID(EAX=7).EBX{bit11} read as 0).
index 39681b3..7600494 100644 (file)
@@ -3268,7 +3268,6 @@ S:        Maintained
 F:     drivers/cpufreq/bmips-cpufreq.c
 
 BROADCOM BMIPS MIPS ARCHITECTURE
-M:     Kevin Cernekee <cernekee@gmail.com>
 M:     Florian Fainelli <f.fainelli@gmail.com>
 L:     bcm-kernel-feedback-list@broadcom.com
 L:     linux-mips@vger.kernel.org
@@ -3745,7 +3744,6 @@ F:        drivers/crypto/cavium/cpt/
 
 CAVIUM THUNDERX2 ARM64 SOC
 M:     Robert Richter <rrichter@cavium.com>
-M:     Jayachandran C <jnair@caviumnetworks.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm64/boot/dts/cavium/thunder2-99xx*
index b37d0e8..42bfda2 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 4
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*
@@ -917,6 +917,9 @@ ifeq ($(CONFIG_RELR),y)
 LDFLAGS_vmlinux        += --pack-dyn-relocs=relr
 endif
 
+# make the checker run with the right architecture
+CHECKFLAGS += --arch=$(ARCH)
+
 # insure the checker run with the right endianness
 CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian)
 
index 2a6ce87..9e027b9 100644 (file)
        pinctrl-0 = <&pinctrl_pwm3>;
 };
 
+&snvs_pwrkey {
+       status = "okay";
+};
+
 &ssi2 {
        status = "okay";
 };
index f3404dd..cf62846 100644 (file)
                        accelerometer@1c {
                                compatible = "fsl,mma8451";
                                reg = <0x1c>;
+                               pinctrl-names = "default";
+                               pinctrl-0 = <&pinctrl_mma8451_int>;
                                interrupt-parent = <&gpio6>;
                                interrupts = <31 IRQ_TYPE_LEVEL_LOW>;
                        };
                        >;
                };
 
+               pinctrl_mma8451_int: mma8451intgrp {
+                       fsl,pins = <
+                               MX6QDL_PAD_EIM_BCLK__GPIO6_IO31         0xb0b1
+                       >;
+               };
+
                pinctrl_pwm3: pwm1grp {
                        fsl,pins = <
                                MX6QDL_PAD_SD4_DAT1__PWM3_OUT           0x1b0b1
index 89d29b5..91fc0a3 100644 (file)
 
        ov5640: camera@3c {
                compatible = "ovti,ov5640";
-               pinctrl-names = "default";
-               pinctrl-0 = <&ov5640_pins>;
                reg = <0x3c>;
                clocks = <&clk_ext_camera>;
                clock-names = "xclk";
                DOVDD-supply = <&v2v8>;
-               powerdown-gpios = <&stmfx_pinctrl 18 GPIO_ACTIVE_HIGH>;
-               reset-gpios = <&stmfx_pinctrl 19 GPIO_ACTIVE_LOW>;
+               powerdown-gpios = <&stmfx_pinctrl 18 (GPIO_ACTIVE_HIGH | GPIO_PUSH_PULL)>;
+               reset-gpios = <&stmfx_pinctrl 19 (GPIO_ACTIVE_LOW | GPIO_PUSH_PULL)>;
                rotation = <180>;
                status = "okay";
 
 
                        joystick_pins: joystick {
                                pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4";
-                               drive-push-pull;
                                bias-pull-down;
                        };
-
-                       ov5640_pins: camera {
-                               pins = "agpio2", "agpio3"; /* stmfx pins 18 & 19 */
-                               drive-push-pull;
-                               output-low;
-                       };
                };
        };
 };
index 9b11654..f98e037 100644 (file)
                        interrupt-names = "int0", "int1";
                        clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>;
                        clock-names = "hclk", "cclk";
-                       bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>;
+                       bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>;
                        status = "disabled";
                };
 
                        interrupt-names = "int0", "int1";
                        clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>;
                        clock-names = "hclk", "cclk";
-                       bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>;
+                       bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>;
                        status = "disabled";
                };
 
index 568b90e..3bec3e0 100644 (file)
        vqmmc-supply = <&reg_dldo1>;
        non-removable;
        wakeup-source;
+       keep-power-in-suspend;
        status = "okay";
 
        brcmf: wifi@1 {
index 239084c..26cbce1 100644 (file)
@@ -481,14 +481,18 @@ static void sunxi_mc_smp_cpu_die(unsigned int l_cpu)
 static int sunxi_cpu_powerdown(unsigned int cpu, unsigned int cluster)
 {
        u32 reg;
+       int gating_bit = cpu;
 
        pr_debug("%s: cluster %u cpu %u\n", __func__, cluster, cpu);
        if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS)
                return -EINVAL;
 
+       if (is_a83t && cpu == 0)
+               gating_bit = 4;
+
        /* gate processor power */
        reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
-       reg |= PRCM_PWROFF_GATING_REG_CORE(cpu);
+       reg |= PRCM_PWROFF_GATING_REG_CORE(gating_bit);
        writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
        udelay(20);
 
index d98346d..078a501 100644 (file)
        status = "okay";
 
        i2c-mux@77 {
-               compatible = "nxp,pca9847";
+               compatible = "nxp,pca9547";
                reg = <0x77>;
                #address-cells = <1>;
                #size-cells = <0>;
index 58b8cd0..23c8fad 100644 (file)
                        };
 
                        sdma2: dma-controller@302c0000 {
-                               compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma";
+                               compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma";
                                reg = <0x302c0000 0x10000>;
                                interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clk IMX8MM_CLK_SDMA2_ROOT>,
                        };
 
                        sdma3: dma-controller@302b0000 {
-                               compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma";
+                               compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma";
                                reg = <0x302b0000 0x10000>;
                                interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clk IMX8MM_CLK_SDMA3_ROOT>,
                        };
 
                        sdma1: dma-controller@30bd0000 {
-                               compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma";
+                               compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma";
                                reg = <0x30bd0000 0x10000>;
                                interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>,
index 98496f5..43c4db3 100644 (file)
                        };
 
                        sdma3: dma-controller@302b0000 {
-                               compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma";
+                               compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma";
                                reg = <0x302b0000 0x10000>;
                                interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clk IMX8MN_CLK_SDMA3_ROOT>,
                        };
 
                        sdma2: dma-controller@302c0000 {
-                               compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma";
+                               compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma";
                                reg = <0x302c0000 0x10000>;
                                interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clk IMX8MN_CLK_SDMA2_ROOT>,
                        };
 
                        sdma1: dma-controller@30bd0000 {
-                               compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma";
+                               compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma";
                                reg = <0x30bd0000 0x10000>;
                                interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clk IMX8MN_CLK_SDMA1_ROOT>,
index 087b5b6..32ce149 100644 (file)
@@ -88,7 +88,7 @@
                regulator-name = "0V9_ARM";
                regulator-min-microvolt = <900000>;
                regulator-max-microvolt = <1000000>;
-               gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
+               gpios = <&gpio3 16 GPIO_ACTIVE_HIGH>;
                states = <1000000 0x1
                           900000 0x0>;
                regulator-always-on;
index 0c731bf..0c20a7c 100644 (file)
@@ -30,13 +30,6 @@ int __arm64_get_clock_mode(struct timekeeper *tk)
 }
 #define __arch_get_clock_mode __arm64_get_clock_mode
 
-static __always_inline
-int __arm64_use_vsyscall(struct vdso_data *vdata)
-{
-       return !vdata[CS_HRES_COARSE].clock_mode;
-}
-#define __arch_use_vsyscall __arm64_use_vsyscall
-
 static __always_inline
 void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
 {
index 1953147..00d41b9 100644 (file)
@@ -28,13 +28,6 @@ int __mips_get_clock_mode(struct timekeeper *tk)
 }
 #define __arch_get_clock_mode __mips_get_clock_mode
 
-static __always_inline
-int __mips_use_vsyscall(struct vdso_data *vdata)
-{
-       return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE);
-}
-#define __arch_use_vsyscall __mips_use_vsyscall
-
 /* The asm-generic header needs to be included after the definitions above */
 #include <asm-generic/vdso/vsyscall.h>
 
index ef3847e..e5b6cad 100644 (file)
@@ -38,10 +38,3 @@ config REPLICATE_KTEXT
          Say Y here to enable replicating the kernel text across multiple
          nodes in a NUMA cluster.  This trades memory for speed.
 
-config REPLICATE_EXHANDLERS
-       bool "Exception handler replication support"
-       depends on SGI_IP27
-       help
-         Say Y here to enable replicating the kernel exception handlers
-         across multiple nodes in a NUMA cluster. This trades memory for
-         speed.
index 59d5375..79a52c4 100644 (file)
@@ -69,23 +69,14 @@ static void per_hub_init(cnodeid_t cnode)
 
        hub_rtc_init(cnode);
 
-#ifdef CONFIG_REPLICATE_EXHANDLERS
-       /*
-        * If this is not a headless node initialization,
-        * copy over the caliased exception handlers.
-        */
-       if (get_compact_nodeid() == cnode) {
-               extern char except_vec2_generic, except_vec3_generic;
-               extern void build_tlb_refill_handler(void);
-
-               memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80);
-               memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80);
-               build_tlb_refill_handler();
-               memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80);
-               memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100);
+       if (nasid) {
+               /* copy exception handlers from first node to current node */
+               memcpy((void *)NODE_OFFSET_TO_K0(nasid, 0),
+                      (void *)CKSEG0, 0x200);
                __flush_cache_all();
+               /* switch to node local exception handlers */
+               REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K);
        }
-#endif
 }
 
 void per_cpu_init(void)
index fb077a9..8624a88 100644 (file)
@@ -332,11 +332,7 @@ static void __init mlreset(void)
                 * thinks it is a node 0 address.
                 */
                REMOTE_HUB_S(nasid, PI_REGION_PRESENT, (region_mask | 1));
-#ifdef CONFIG_REPLICATE_EXHANDLERS
-               REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K);
-#else
                REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_0);
-#endif
 
 #ifdef LATER
                /*
index 324a239..997ffe4 100644 (file)
@@ -65,14 +65,14 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS
 #
 # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
 #
-CFLAGS_REMOVE_vdso-note.o = -pg
 CFLAGS_REMOVE_vclock_gettime.o = -pg
+CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 
 $(obj)/%.so: OBJCOPYFLAGS := -S
 $(obj)/%.so: $(obj)/%.so.dbg FORCE
        $(call if_changed,objcopy)
 
-CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1
 
 #This makes sure the $(obj) subdirectory exists even though vdso32/
index d6e1faa..8ef8513 100644 (file)
@@ -1940,6 +1940,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
 
          If unsure, say y.
 
+choice
+       prompt "TSX enable mode"
+       depends on CPU_SUP_INTEL
+       default X86_INTEL_TSX_MODE_OFF
+       help
+         Intel's TSX (Transactional Synchronization Extensions) feature
+         allows to optimize locking protocols through lock elision which
+         can lead to a noticeable performance boost.
+
+         On the other hand it has been shown that TSX can be exploited
+         to form side channel attacks (e.g. TAA) and chances are there
+         will be more of those attacks discovered in the future.
+
+         Therefore TSX is not enabled by default (aka tsx=off). An admin
+         might override this decision by tsx=on the command line parameter.
+         Even with TSX enabled, the kernel will attempt to enable the best
+         possible TAA mitigation setting depending on the microcode available
+         for the particular machine.
+
+         This option allows to set the default tsx mode between tsx=on, =off
+         and =auto. See Documentation/admin-guide/kernel-parameters.txt for more
+         details.
+
+         Say off if not sure, auto if TSX is in use but it should be used on safe
+         platforms or on if TSX is in use and the security aspect of tsx is not
+         relevant.
+
+config X86_INTEL_TSX_MODE_OFF
+       bool "off"
+       help
+         TSX is disabled if possible - equals to tsx=off command line parameter.
+
+config X86_INTEL_TSX_MODE_ON
+       bool "on"
+       help
+         TSX is always enabled on TSX capable HW - equals the tsx=on command
+         line parameter.
+
+config X86_INTEL_TSX_MODE_AUTO
+       bool "auto"
+       help
+         TSX is enabled on TSX capable HW that is believed to be safe against
+         side channel attacks- equals the tsx=auto command line parameter.
+endchoice
+
 config EFI
        bool "EFI runtime service support"
        depends on ACPI
index 0652d3e..c4fbe37 100644 (file)
 #define X86_BUG_MDS                    X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
 #define X86_BUG_MSBDS_ONLY             X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
 #define X86_BUG_SWAPGS                 X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA                    X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT          X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
index 24d6598..4fc6148 100644 (file)
@@ -312,9 +312,12 @@ struct kvm_rmap_head {
 struct kvm_mmu_page {
        struct list_head link;
        struct hlist_node hash_link;
+       struct list_head lpage_disallowed_link;
+
        bool unsync;
        u8 mmu_valid_gen;
        bool mmio_cached;
+       bool lpage_disallowed; /* Can't be replaced by an equiv large page */
 
        /*
         * The following two entries are used to key the shadow page in the
@@ -859,6 +862,7 @@ struct kvm_arch {
         */
        struct list_head active_mmu_pages;
        struct list_head zapped_obsolete_pages;
+       struct list_head lpage_disallowed_mmu_pages;
        struct kvm_page_track_notifier_node mmu_sp_tracker;
        struct kvm_page_track_notifier_head track_notifier_head;
 
@@ -933,6 +937,7 @@ struct kvm_arch {
        bool exception_payload_enabled;
 
        struct kvm_pmu_event_filter *pmu_event_filter;
+       struct task_struct *nx_lpage_recovery_thread;
 };
 
 struct kvm_vm_stat {
@@ -946,6 +951,7 @@ struct kvm_vm_stat {
        ulong mmu_unsync;
        ulong remote_tlb_flush;
        ulong lpages;
+       ulong nx_lpage_splits;
        ulong max_mmu_page_hash_collisions;
 };
 
index 20ce682..6a31246 100644 (file)
                                                  * Microarchitectural Data
                                                  * Sampling (MDS) vulnerabilities.
                                                  */
+#define ARCH_CAP_PSCHANGE_MC_NO                BIT(6)   /*
+                                                 * The processor is not susceptible to a
+                                                 * machine check error due to modifying the
+                                                 * code page size along with either the
+                                                 * physical address or cache type
+                                                 * without TLB invalidation.
+                                                 */
+#define ARCH_CAP_TSX_CTRL_MSR          BIT(7)  /* MSR for TSX control is available. */
+#define ARCH_CAP_TAA_NO                        BIT(8)  /*
+                                                * Not susceptible to
+                                                * TSX Async Abort (TAA) vulnerabilities.
+                                                */
 
 #define MSR_IA32_FLUSH_CMD             0x0000010b
 #define L1D_FLUSH                      BIT(0)  /*
 #define MSR_IA32_BBL_CR_CTL            0x00000119
 #define MSR_IA32_BBL_CR_CTL3           0x0000011e
 
+#define MSR_IA32_TSX_CTRL              0x00000122
+#define TSX_CTRL_RTM_DISABLE           BIT(0)  /* Disable RTM feature */
+#define TSX_CTRL_CPUID_CLEAR           BIT(1)  /* Disable TSX enumeration */
+
 #define MSR_IA32_SYSENTER_CS           0x00000174
 #define MSR_IA32_SYSENTER_ESP          0x00000175
 #define MSR_IA32_SYSENTER_EIP          0x00000176
index 80bc209..5c24a7b 100644 (file)
@@ -314,7 +314,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
 #include <asm/segment.h>
 
 /**
- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
  *
  * This uses the otherwise unused and obsolete VERW instruction in
  * combination with microcode which triggers a CPU buffer flush when the
@@ -337,7 +337,7 @@ static inline void mds_clear_cpu_buffers(void)
 }
 
 /**
- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
  *
  * Clear CPU buffers if the corresponding static key is enabled
  */
index 6e0a3b4..54f5d54 100644 (file)
@@ -988,4 +988,11 @@ enum mds_mitigations {
        MDS_MITIGATION_VMWERV,
 };
 
+enum taa_mitigations {
+       TAA_MITIGATION_OFF,
+       TAA_MITIGATION_UCODE_NEEDED,
+       TAA_MITIGATION_VERW,
+       TAA_MITIGATION_TSX_DISABLED,
+};
+
 #endif /* _ASM_X86_PROCESSOR_H */
index 9e2dd2b..2b0faf8 100644 (file)
@@ -1586,9 +1586,6 @@ static void setup_local_APIC(void)
 {
        int cpu = smp_processor_id();
        unsigned int value;
-#ifdef CONFIG_X86_32
-       int logical_apicid, ldr_apicid;
-#endif
 
        if (disable_apic) {
                disable_ioapic_support();
@@ -1626,16 +1623,21 @@ static void setup_local_APIC(void)
        apic->init_apic_ldr();
 
 #ifdef CONFIG_X86_32
-       /*
-        * APIC LDR is initialized.  If logical_apicid mapping was
-        * initialized during get_smp_config(), make sure it matches the
-        * actual value.
-        */
-       logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
-       ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-       WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid);
-       /* always use the value from LDR */
-       early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
+       if (apic->dest_logical) {
+               int logical_apicid, ldr_apicid;
+
+               /*
+                * APIC LDR is initialized.  If logical_apicid mapping was
+                * initialized during get_smp_config(), make sure it matches
+                * the actual value.
+                */
+               logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+               ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+               if (logical_apicid != BAD_APICID)
+                       WARN_ON(logical_apicid != ldr_apicid);
+               /* Always use the value from LDR. */
+               early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
+       }
 #endif
 
        /*
index d7a1e5a..890f600 100644 (file)
@@ -30,7 +30,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
 
 ifdef CONFIG_CPU_SUP_INTEL
-obj-y                  += intel.o intel_pconfig.o
+obj-y                  += intel.o intel_pconfig.o tsx.o
 obj-$(CONFIG_PM)       += intel_epb.o
 endif
 obj-$(CONFIG_CPU_SUP_AMD)              += amd.o
index 91c2561..4c7b0fa 100644 (file)
@@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
 static void __init l1tf_select_mitigation(void);
 static void __init mds_select_mitigation(void);
+static void __init taa_select_mitigation(void);
 
 /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
 u64 x86_spec_ctrl_base;
@@ -105,6 +106,7 @@ void __init check_bugs(void)
        ssb_select_mitigation();
        l1tf_select_mitigation();
        mds_select_mitigation();
+       taa_select_mitigation();
 
        arch_smt_update();
 
@@ -268,6 +270,100 @@ static int __init mds_cmdline(char *str)
 }
 early_param("mds", mds_cmdline);
 
+#undef pr_fmt
+#define pr_fmt(fmt)    "TAA: " fmt
+
+/* Default mitigation for TAA-affected CPUs */
+static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
+static bool taa_nosmt __ro_after_init;
+
+static const char * const taa_strings[] = {
+       [TAA_MITIGATION_OFF]            = "Vulnerable",
+       [TAA_MITIGATION_UCODE_NEEDED]   = "Vulnerable: Clear CPU buffers attempted, no microcode",
+       [TAA_MITIGATION_VERW]           = "Mitigation: Clear CPU buffers",
+       [TAA_MITIGATION_TSX_DISABLED]   = "Mitigation: TSX disabled",
+};
+
+static void __init taa_select_mitigation(void)
+{
+       u64 ia32_cap;
+
+       if (!boot_cpu_has_bug(X86_BUG_TAA)) {
+               taa_mitigation = TAA_MITIGATION_OFF;
+               return;
+       }
+
+       /* TSX previously disabled by tsx=off */
+       if (!boot_cpu_has(X86_FEATURE_RTM)) {
+               taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
+               goto out;
+       }
+
+       if (cpu_mitigations_off()) {
+               taa_mitigation = TAA_MITIGATION_OFF;
+               return;
+       }
+
+       /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
+       if (taa_mitigation == TAA_MITIGATION_OFF)
+               goto out;
+
+       if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
+               taa_mitigation = TAA_MITIGATION_VERW;
+       else
+               taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+       /*
+        * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
+        * A microcode update fixes this behavior to clear CPU buffers. It also
+        * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
+        * ARCH_CAP_TSX_CTRL_MSR bit.
+        *
+        * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
+        * update is required.
+        */
+       ia32_cap = x86_read_arch_cap_msr();
+       if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
+           !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+               taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+       /*
+        * TSX is enabled, select alternate mitigation for TAA which is
+        * the same as MDS. Enable MDS static branch to clear CPU buffers.
+        *
+        * For guests that can't determine whether the correct microcode is
+        * present on host, enable the mitigation for UCODE_NEEDED as well.
+        */
+       static_branch_enable(&mds_user_clear);
+
+       if (taa_nosmt || cpu_mitigations_auto_nosmt())
+               cpu_smt_disable(false);
+
+out:
+       pr_info("%s\n", taa_strings[taa_mitigation]);
+}
+
+static int __init tsx_async_abort_parse_cmdline(char *str)
+{
+       if (!boot_cpu_has_bug(X86_BUG_TAA))
+               return 0;
+
+       if (!str)
+               return -EINVAL;
+
+       if (!strcmp(str, "off")) {
+               taa_mitigation = TAA_MITIGATION_OFF;
+       } else if (!strcmp(str, "full")) {
+               taa_mitigation = TAA_MITIGATION_VERW;
+       } else if (!strcmp(str, "full,nosmt")) {
+               taa_mitigation = TAA_MITIGATION_VERW;
+               taa_nosmt = true;
+       }
+
+       return 0;
+}
+early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
+
 #undef pr_fmt
 #define pr_fmt(fmt)     "Spectre V1 : " fmt
 
@@ -786,13 +882,10 @@ static void update_mds_branch_idle(void)
 }
 
 #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
+#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
 
 void cpu_bugs_smt_update(void)
 {
-       /* Enhanced IBRS implies STIBP. No update required. */
-       if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
-               return;
-
        mutex_lock(&spec_ctrl_mutex);
 
        switch (spectre_v2_user) {
@@ -819,6 +912,17 @@ void cpu_bugs_smt_update(void)
                break;
        }
 
+       switch (taa_mitigation) {
+       case TAA_MITIGATION_VERW:
+       case TAA_MITIGATION_UCODE_NEEDED:
+               if (sched_smt_active())
+                       pr_warn_once(TAA_MSG_SMT);
+               break;
+       case TAA_MITIGATION_TSX_DISABLED:
+       case TAA_MITIGATION_OFF:
+               break;
+       }
+
        mutex_unlock(&spec_ctrl_mutex);
 }
 
@@ -1149,6 +1253,9 @@ void x86_spec_ctrl_setup_ap(void)
                x86_amd_ssb_disable();
 }
 
+bool itlb_multihit_kvm_mitigation;
+EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
+
 #undef pr_fmt
 #define pr_fmt(fmt)    "L1TF: " fmt
 
@@ -1304,11 +1411,24 @@ static ssize_t l1tf_show_state(char *buf)
                       l1tf_vmx_states[l1tf_vmx_mitigation],
                       sched_smt_active() ? "vulnerable" : "disabled");
 }
+
+static ssize_t itlb_multihit_show_state(char *buf)
+{
+       if (itlb_multihit_kvm_mitigation)
+               return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
+       else
+               return sprintf(buf, "KVM: Vulnerable\n");
+}
 #else
 static ssize_t l1tf_show_state(char *buf)
 {
        return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
 }
+
+static ssize_t itlb_multihit_show_state(char *buf)
+{
+       return sprintf(buf, "Processor vulnerable\n");
+}
 #endif
 
 static ssize_t mds_show_state(char *buf)
@@ -1328,6 +1448,21 @@ static ssize_t mds_show_state(char *buf)
                       sched_smt_active() ? "vulnerable" : "disabled");
 }
 
+static ssize_t tsx_async_abort_show_state(char *buf)
+{
+       if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
+           (taa_mitigation == TAA_MITIGATION_OFF))
+               return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
+
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+               return sprintf(buf, "%s; SMT Host state unknown\n",
+                              taa_strings[taa_mitigation]);
+       }
+
+       return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
+                      sched_smt_active() ? "vulnerable" : "disabled");
+}
+
 static char *stibp_state(void)
 {
        if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
@@ -1398,6 +1533,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
        case X86_BUG_MDS:
                return mds_show_state(buf);
 
+       case X86_BUG_TAA:
+               return tsx_async_abort_show_state(buf);
+
+       case X86_BUG_ITLB_MULTIHIT:
+               return itlb_multihit_show_state(buf);
+
        default:
                break;
        }
@@ -1434,4 +1575,14 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu
 {
        return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
 }
+
+ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
+}
+
+ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
+}
 #endif
index 9ae7d1b..fffe219 100644 (file)
@@ -1016,13 +1016,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #endif
 }
 
-#define NO_SPECULATION BIT(0)
-#define NO_MELTDOWN    BIT(1)
-#define NO_SSB         BIT(2)
-#define NO_L1TF                BIT(3)
-#define NO_MDS         BIT(4)
-#define MSBDS_ONLY     BIT(5)
-#define NO_SWAPGS      BIT(6)
+#define NO_SPECULATION         BIT(0)
+#define NO_MELTDOWN            BIT(1)
+#define NO_SSB                 BIT(2)
+#define NO_L1TF                        BIT(3)
+#define NO_MDS                 BIT(4)
+#define MSBDS_ONLY             BIT(5)
+#define NO_SWAPGS              BIT(6)
+#define NO_ITLB_MULTIHIT       BIT(7)
 
 #define VULNWL(_vendor, _family, _model, _whitelist)   \
        { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -1043,27 +1044,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
        VULNWL(NSC,     5, X86_MODEL_ANY,       NO_SPECULATION),
 
        /* Intel Family 6 */
-       VULNWL_INTEL(ATOM_SALTWELL,             NO_SPECULATION),
-       VULNWL_INTEL(ATOM_SALTWELL_TABLET,      NO_SPECULATION),
-       VULNWL_INTEL(ATOM_SALTWELL_MID,         NO_SPECULATION),
-       VULNWL_INTEL(ATOM_BONNELL,              NO_SPECULATION),
-       VULNWL_INTEL(ATOM_BONNELL_MID,          NO_SPECULATION),
-
-       VULNWL_INTEL(ATOM_SILVERMONT,           NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-       VULNWL_INTEL(ATOM_SILVERMONT_D,         NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-       VULNWL_INTEL(ATOM_SILVERMONT_MID,       NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-       VULNWL_INTEL(ATOM_AIRMONT,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-       VULNWL_INTEL(XEON_PHI_KNL,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-       VULNWL_INTEL(XEON_PHI_KNM,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+       VULNWL_INTEL(ATOM_SALTWELL,             NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SALTWELL_TABLET,      NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SALTWELL_MID,         NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_BONNELL,              NO_SPECULATION | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_BONNELL_MID,          NO_SPECULATION | NO_ITLB_MULTIHIT),
+
+       VULNWL_INTEL(ATOM_SILVERMONT,           NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SILVERMONT_D,         NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_SILVERMONT_MID,       NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_AIRMONT,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(XEON_PHI_KNL,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(XEON_PHI_KNM,              NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
        VULNWL_INTEL(CORE_YONAH,                NO_SSB),
 
-       VULNWL_INTEL(ATOM_AIRMONT_MID,          NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-       VULNWL_INTEL(ATOM_AIRMONT_NP,           NO_L1TF | NO_SWAPGS),
+       VULNWL_INTEL(ATOM_AIRMONT_MID,          NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_AIRMONT_NP,           NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
-       VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS),
-       VULNWL_INTEL(ATOM_GOLDMONT_D,           NO_MDS | NO_L1TF | NO_SWAPGS),
-       VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS),
+       VULNWL_INTEL(ATOM_GOLDMONT,             NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_GOLDMONT_D,           NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_INTEL(ATOM_GOLDMONT_PLUS,        NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
        /*
         * Technically, swapgs isn't serializing on AMD (despite it previously
@@ -1073,15 +1074,17 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
         * good enough for our purposes.
         */
 
+       VULNWL_INTEL(ATOM_TREMONT_D,            NO_ITLB_MULTIHIT),
+
        /* AMD Family 0xf - 0x12 */
-       VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
-       VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
-       VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
-       VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+       VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
        /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
-       VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
-       VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
+       VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+       VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
        {}
 };
 
@@ -1092,19 +1095,30 @@ static bool __init cpu_matches(unsigned long which)
        return m && !!(m->driver_data & which);
 }
 
-static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+u64 x86_read_arch_cap_msr(void)
 {
        u64 ia32_cap = 0;
 
+       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+       return ia32_cap;
+}
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+{
+       u64 ia32_cap = x86_read_arch_cap_msr();
+
+       /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
+       if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+               setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
+
        if (cpu_matches(NO_SPECULATION))
                return;
 
        setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
        setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
 
-       if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
-
        if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
           !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
                setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@@ -1121,6 +1135,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
        if (!cpu_matches(NO_SWAPGS))
                setup_force_cpu_bug(X86_BUG_SWAPGS);
 
+       /*
+        * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
+        *      - TSX is supported or
+        *      - TSX_CTRL is present
+        *
+        * TSX_CTRL check is needed for cases when TSX could be disabled before
+        * the kernel boot e.g. kexec.
+        * TSX_CTRL check alone is not sufficient for cases when the microcode
+        * update is not present or running as guest that don't get TSX_CTRL.
+        */
+       if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+           (cpu_has(c, X86_FEATURE_RTM) ||
+            (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+               setup_force_cpu_bug(X86_BUG_TAA);
+
        if (cpu_matches(NO_MELTDOWN))
                return;
 
@@ -1554,6 +1583,8 @@ void __init identify_boot_cpu(void)
 #endif
        cpu_detect_tlb(&boot_cpu_data);
        setup_cr_pinning();
+
+       tsx_init();
 }
 
 void identify_secondary_cpu(struct cpuinfo_x86 *c)
index c0e2407..38ab6e1 100644 (file)
@@ -44,6 +44,22 @@ struct _tlb_table {
 extern const struct cpu_dev *const __x86_cpu_dev_start[],
                            *const __x86_cpu_dev_end[];
 
+#ifdef CONFIG_CPU_SUP_INTEL
+enum tsx_ctrl_states {
+       TSX_CTRL_ENABLE,
+       TSX_CTRL_DISABLE,
+       TSX_CTRL_NOT_SUPPORTED,
+};
+
+extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
+
+extern void __init tsx_init(void);
+extern void tsx_enable(void);
+extern void tsx_disable(void);
+#else
+static inline void tsx_init(void) { }
+#endif /* CONFIG_CPU_SUP_INTEL */
+
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
@@ -62,4 +78,6 @@ unsigned int aperfmperf_get_khz(int cpu);
 
 extern void x86_spec_ctrl_setup_ap(void);
 
+extern u64 x86_read_arch_cap_msr(void);
+
 #endif /* ARCH_X86_CPU_H */
index c2fdc00..11d5c59 100644 (file)
@@ -762,6 +762,11 @@ static void init_intel(struct cpuinfo_x86 *c)
                detect_tme(c);
 
        init_intel_misc_features(c);
+
+       if (tsx_ctrl_state == TSX_CTRL_ENABLE)
+               tsx_enable();
+       if (tsx_ctrl_state == TSX_CTRL_DISABLE)
+               tsx_disable();
 }
 
 #ifdef CONFIG_X86_32
index efbd54c..055c861 100644 (file)
@@ -522,6 +522,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
        int ret = 0;
 
        rdtgrp = rdtgroup_kn_lock_live(of->kn);
+       if (!rdtgrp) {
+               ret = -ENOENT;
+               goto out;
+       }
 
        md.priv = of->kn->priv;
        resid = md.u.rid;
index a46dee8..2e3b06d 100644 (file)
@@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
        }
 
        rdtgrp = rdtgroup_kn_lock_live(of->kn);
-       rdt_last_cmd_clear();
        if (!rdtgrp) {
                ret = -ENOENT;
-               rdt_last_cmd_puts("Directory was removed\n");
                goto unlock;
        }
 
@@ -2648,10 +2646,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
        int ret;
 
        prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
-       rdt_last_cmd_clear();
        if (!prdtgrp) {
                ret = -ENODEV;
-               rdt_last_cmd_puts("Directory was removed\n");
                goto out_unlock;
        }
 
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
new file mode 100644 (file)
index 0000000..3e20d32
--- /dev/null
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Transactional Synchronization Extensions (TSX) control.
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author:
+ *     Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+ */
+
+#include <linux/cpufeature.h>
+
+#include <asm/cmdline.h>
+
+#include "cpu.h"
+
+enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
+
+void tsx_disable(void)
+{
+       u64 tsx;
+
+       rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+       /* Force all transactions to immediately abort */
+       tsx |= TSX_CTRL_RTM_DISABLE;
+
+       /*
+        * Ensure TSX support is not enumerated in CPUID.
+        * This is visible to userspace and will ensure they
+        * do not waste resources trying TSX transactions that
+        * will always abort.
+        */
+       tsx |= TSX_CTRL_CPUID_CLEAR;
+
+       wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+void tsx_enable(void)
+{
+       u64 tsx;
+
+       rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+       /* Enable the RTM feature in the cpu */
+       tsx &= ~TSX_CTRL_RTM_DISABLE;
+
+       /*
+        * Ensure TSX support is enumerated in CPUID.
+        * This is visible to userspace and will ensure they
+        * can enumerate and use the TSX feature.
+        */
+       tsx &= ~TSX_CTRL_CPUID_CLEAR;
+
+       wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+static bool __init tsx_ctrl_is_supported(void)
+{
+       u64 ia32_cap = x86_read_arch_cap_msr();
+
+       /*
+        * TSX is controlled via MSR_IA32_TSX_CTRL.  However, support for this
+        * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+        *
+        * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+        * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+        * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+        * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+        * tsx= cmdline requests will do nothing on CPUs without
+        * MSR_IA32_TSX_CTRL support.
+        */
+       return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+}
+
+static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
+{
+       if (boot_cpu_has_bug(X86_BUG_TAA))
+               return TSX_CTRL_DISABLE;
+
+       return TSX_CTRL_ENABLE;
+}
+
+void __init tsx_init(void)
+{
+       char arg[5] = {};
+       int ret;
+
+       if (!tsx_ctrl_is_supported())
+               return;
+
+       ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
+       if (ret >= 0) {
+               if (!strcmp(arg, "on")) {
+                       tsx_ctrl_state = TSX_CTRL_ENABLE;
+               } else if (!strcmp(arg, "off")) {
+                       tsx_ctrl_state = TSX_CTRL_DISABLE;
+               } else if (!strcmp(arg, "auto")) {
+                       tsx_ctrl_state = x86_get_tsx_auto_mode();
+               } else {
+                       tsx_ctrl_state = TSX_CTRL_DISABLE;
+                       pr_err("tsx: invalid option, defaulting to off\n");
+               }
+       } else {
+               /* tsx= not provided */
+               if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO))
+                       tsx_ctrl_state = x86_get_tsx_auto_mode();
+               else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF))
+                       tsx_ctrl_state = TSX_CTRL_DISABLE;
+               else
+                       tsx_ctrl_state = TSX_CTRL_ENABLE;
+       }
+
+       if (tsx_ctrl_state == TSX_CTRL_DISABLE) {
+               tsx_disable();
+
+               /*
+                * tsx_disable() will change the state of the
+                * RTM CPUID bit.  Clear it here since it is now
+                * expected to be not set.
+                */
+               setup_clear_cpu_cap(X86_FEATURE_RTM);
+       } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
+
+               /*
+                * HW defaults TSX to be enabled at bootup.
+                * We may still need the TSX enable support
+                * during init for special cases like
+                * kexec after TSX is disabled.
+                */
+               tsx_enable();
+
+               /*
+                * tsx_enable() will change the state of the
+                * RTM CPUID bit.  Force it here since it is now
+                * expected to be set.
+                */
+               setup_force_cpu_cap(X86_FEATURE_RTM);
+       }
+}
index 753b8cf..87b9789 100644 (file)
@@ -94,6 +94,13 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
        BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
 
        begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
+       /*
+        * Handle the case where stack trace is collected _before_
+        * cea_exception_stacks had been initialized.
+        */
+       if (!begin)
+               return false;
+
        end = begin + sizeof(struct cea_exception_stacks);
        /* Bail if @stack is outside the exception stack area. */
        if (stk < begin || stk >= end)
index 6f6b1d0..4cba91e 100644 (file)
@@ -710,6 +710,8 @@ static struct chipset early_qrk[] __initdata = {
         */
        { PCI_VENDOR_ID_INTEL, 0x0f00,
                PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+       { PCI_VENDOR_ID_INTEL, 0x3ec4,
+               PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
        { PCI_VENDOR_ID_BROADCOM, 0x4331,
          PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
        {}
index c59454c..7e322e2 100644 (file)
@@ -1505,6 +1505,9 @@ void __init tsc_init(void)
                return;
        }
 
+       if (tsc_clocksource_reliable || no_tsc_watchdog)
+               clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+
        clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
        detect_art();
 }
index 24c23c6..2ce9da5 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/uaccess.h>
 #include <linux/hash.h>
 #include <linux/kern_levels.h>
+#include <linux/kthread.h>
 
 #include <asm/page.h>
 #include <asm/pat.h>
 #include <asm/kvm_page_track.h>
 #include "trace.h"
 
+extern bool itlb_multihit_kvm_mitigation;
+
+static int __read_mostly nx_huge_pages = -1;
+#ifdef CONFIG_PREEMPT_RT
+/* Recovery can cause latency spikes, disable it for PREEMPT_RT.  */
+static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
+#else
+static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
+#endif
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
+
+static struct kernel_param_ops nx_huge_pages_ops = {
+       .set = set_nx_huge_pages,
+       .get = param_get_bool,
+};
+
+static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
+       .set = set_nx_huge_pages_recovery_ratio,
+       .get = param_get_uint,
+};
+
+module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages, "bool");
+module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
+               &nx_huge_pages_recovery_ratio, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
+
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
  * where the hardware walks 2 page tables:
@@ -352,6 +382,11 @@ static inline bool spte_ad_need_write_protect(u64 spte)
        return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
 }
 
+static bool is_nx_huge_page_enabled(void)
+{
+       return READ_ONCE(nx_huge_pages);
+}
+
 static inline u64 spte_shadow_accessed_mask(u64 spte)
 {
        MMU_WARN_ON(is_mmio_spte(spte));
@@ -1190,6 +1225,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
        kvm_mmu_gfn_disallow_lpage(slot, gfn);
 }
 
+static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+       if (sp->lpage_disallowed)
+               return;
+
+       ++kvm->stat.nx_lpage_splits;
+       list_add_tail(&sp->lpage_disallowed_link,
+                     &kvm->arch.lpage_disallowed_mmu_pages);
+       sp->lpage_disallowed = true;
+}
+
 static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
        struct kvm_memslots *slots;
@@ -1207,6 +1253,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
        kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
 
+static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+       --kvm->stat.nx_lpage_splits;
+       sp->lpage_disallowed = false;
+       list_del(&sp->lpage_disallowed_link);
+}
+
 static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
                                          struct kvm_memory_slot *slot)
 {
@@ -2792,6 +2845,9 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
                        kvm_reload_remote_mmus(kvm);
        }
 
+       if (sp->lpage_disallowed)
+               unaccount_huge_nx_page(kvm, sp);
+
        sp->role.invalid = 1;
        return list_unstable;
 }
@@ -3013,6 +3069,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
        if (!speculative)
                spte |= spte_shadow_accessed_mask(spte);
 
+       if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
+           is_nx_huge_page_enabled()) {
+               pte_access &= ~ACC_EXEC_MASK;
+       }
+
        if (pte_access & ACC_EXEC_MASK)
                spte |= shadow_x_mask;
        else
@@ -3233,9 +3294,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
        __direct_pte_prefetch(vcpu, sp, sptep);
 }
 
+static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
+                                      gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
+{
+       int level = *levelp;
+       u64 spte = *it.sptep;
+
+       if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
+           is_nx_huge_page_enabled() &&
+           is_shadow_present_pte(spte) &&
+           !is_large_pte(spte)) {
+               /*
+                * A small SPTE exists for this pfn, but FNAME(fetch)
+                * and __direct_map would like to create a large PTE
+                * instead: just force them to go down another level,
+                * patching back for them into pfn the next 9 bits of
+                * the address.
+                */
+               u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
+               *pfnp |= gfn & page_mask;
+               (*levelp)--;
+       }
+}
+
 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
                        int map_writable, int level, kvm_pfn_t pfn,
-                       bool prefault)
+                       bool prefault, bool lpage_disallowed)
 {
        struct kvm_shadow_walk_iterator it;
        struct kvm_mmu_page *sp;
@@ -3248,6 +3332,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
 
        trace_kvm_mmu_spte_requested(gpa, level, pfn);
        for_each_shadow_entry(vcpu, gpa, it) {
+               /*
+                * We cannot overwrite existing page tables with an NX
+                * large page, as the leaf could be executable.
+                */
+               disallowed_hugepage_adjust(it, gfn, &pfn, &level);
+
                base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
                if (it.level == level)
                        break;
@@ -3258,6 +3348,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
                                              it.level - 1, true, ACC_ALL);
 
                        link_shadow_page(vcpu, it.sptep, sp);
+                       if (lpage_disallowed)
+                               account_huge_nx_page(vcpu->kvm, sp);
                }
        }
 
@@ -3306,7 +3398,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
         * here.
         */
        if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
-           level == PT_PAGE_TABLE_LEVEL &&
+           !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
            PageTransCompoundMap(pfn_to_page(pfn)) &&
            !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
                unsigned long mask;
@@ -3550,11 +3642,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 {
        int r;
        int level;
-       bool force_pt_level = false;
+       bool force_pt_level;
        kvm_pfn_t pfn;
        unsigned long mmu_seq;
        bool map_writable, write = error_code & PFERR_WRITE_MASK;
+       bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+                               is_nx_huge_page_enabled();
 
+       force_pt_level = lpage_disallowed;
        level = mapping_level(vcpu, gfn, &force_pt_level);
        if (likely(!force_pt_level)) {
                /*
@@ -3588,7 +3683,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
                goto out_unlock;
        if (likely(!force_pt_level))
                transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
-       r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
+       r = __direct_map(vcpu, v, write, map_writable, level, pfn,
+                        prefault, false);
 out_unlock:
        spin_unlock(&vcpu->kvm->mmu_lock);
        kvm_release_pfn_clean(pfn);
@@ -4174,6 +4270,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
        unsigned long mmu_seq;
        int write = error_code & PFERR_WRITE_MASK;
        bool map_writable;
+       bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+                               is_nx_huge_page_enabled();
 
        MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
 
@@ -4184,8 +4282,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
        if (r)
                return r;
 
-       force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
-                                                          PT_DIRECTORY_LEVEL);
+       force_pt_level =
+               lpage_disallowed ||
+               !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
        level = mapping_level(vcpu, gfn, &force_pt_level);
        if (likely(!force_pt_level)) {
                if (level > PT_DIRECTORY_LEVEL &&
@@ -4214,7 +4313,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
                goto out_unlock;
        if (likely(!force_pt_level))
                transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
-       r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
+       r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
+                        prefault, lpage_disallowed);
 out_unlock:
        spin_unlock(&vcpu->kvm->mmu_lock);
        kvm_release_pfn_clean(pfn);
@@ -5914,9 +6014,9 @@ restart:
                 * the guest, and the guest page table is using 4K page size
                 * mapping if the indirect sp has level = 1.
                 */
-               if (sp->role.direct &&
-                       !kvm_is_reserved_pfn(pfn) &&
-                       PageTransCompoundMap(pfn_to_page(pfn))) {
+               if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+                   !kvm_is_zone_device_pfn(pfn) &&
+                   PageTransCompoundMap(pfn_to_page(pfn))) {
                        pte_list_remove(rmap_head, sptep);
 
                        if (kvm_available_flush_tlb_with_range())
@@ -6155,10 +6255,59 @@ static void kvm_set_mmio_spte_mask(void)
        kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
 }
 
+static bool get_nx_auto_mode(void)
+{
+       /* Return true when CPU has the bug, and mitigations are ON */
+       return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
+}
+
+static void __set_nx_huge_pages(bool val)
+{
+       nx_huge_pages = itlb_multihit_kvm_mitigation = val;
+}
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
+{
+       bool old_val = nx_huge_pages;
+       bool new_val;
+
+       /* In "auto" mode deploy workaround only if CPU has the bug. */
+       if (sysfs_streq(val, "off"))
+               new_val = 0;
+       else if (sysfs_streq(val, "force"))
+               new_val = 1;
+       else if (sysfs_streq(val, "auto"))
+               new_val = get_nx_auto_mode();
+       else if (strtobool(val, &new_val) < 0)
+               return -EINVAL;
+
+       __set_nx_huge_pages(new_val);
+
+       if (new_val != old_val) {
+               struct kvm *kvm;
+
+               mutex_lock(&kvm_lock);
+
+               list_for_each_entry(kvm, &vm_list, vm_list) {
+                       mutex_lock(&kvm->slots_lock);
+                       kvm_mmu_zap_all_fast(kvm);
+                       mutex_unlock(&kvm->slots_lock);
+
+                       wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+               }
+               mutex_unlock(&kvm_lock);
+       }
+
+       return 0;
+}
+
 int kvm_mmu_module_init(void)
 {
        int ret = -ENOMEM;
 
+       if (nx_huge_pages == -1)
+               __set_nx_huge_pages(get_nx_auto_mode());
+
        /*
         * MMU roles use union aliasing which is, generally speaking, an
         * undefined behavior. However, we supposedly know how compilers behave
@@ -6238,3 +6387,116 @@ void kvm_mmu_module_exit(void)
        unregister_shrinker(&mmu_shrinker);
        mmu_audit_disable();
 }
+
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
+{
+       unsigned int old_val;
+       int err;
+
+       old_val = nx_huge_pages_recovery_ratio;
+       err = param_set_uint(val, kp);
+       if (err)
+               return err;
+
+       if (READ_ONCE(nx_huge_pages) &&
+           !old_val && nx_huge_pages_recovery_ratio) {
+               struct kvm *kvm;
+
+               mutex_lock(&kvm_lock);
+
+               list_for_each_entry(kvm, &vm_list, vm_list)
+                       wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+
+               mutex_unlock(&kvm_lock);
+       }
+
+       return err;
+}
+
+static void kvm_recover_nx_lpages(struct kvm *kvm)
+{
+       int rcu_idx;
+       struct kvm_mmu_page *sp;
+       unsigned int ratio;
+       LIST_HEAD(invalid_list);
+       ulong to_zap;
+
+       rcu_idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+
+       ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+       to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
+       while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
+               /*
+                * We use a separate list instead of just using active_mmu_pages
+                * because the number of lpage_disallowed pages is expected to
+                * be relatively small compared to the total.
+                */
+               sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
+                                     struct kvm_mmu_page,
+                                     lpage_disallowed_link);
+               WARN_ON_ONCE(!sp->lpage_disallowed);
+               kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+               WARN_ON_ONCE(sp->lpage_disallowed);
+
+               if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+                       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+                       if (to_zap)
+                               cond_resched_lock(&kvm->mmu_lock);
+               }
+       }
+
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, rcu_idx);
+}
+
+static long get_nx_lpage_recovery_timeout(u64 start_time)
+{
+       return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
+               ? start_time + 60 * HZ - get_jiffies_64()
+               : MAX_SCHEDULE_TIMEOUT;
+}
+
+static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
+{
+       u64 start_time;
+       long remaining_time;
+
+       while (true) {
+               start_time = get_jiffies_64();
+               remaining_time = get_nx_lpage_recovery_timeout(start_time);
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               while (!kthread_should_stop() && remaining_time > 0) {
+                       schedule_timeout(remaining_time);
+                       remaining_time = get_nx_lpage_recovery_timeout(start_time);
+                       set_current_state(TASK_INTERRUPTIBLE);
+               }
+
+               set_current_state(TASK_RUNNING);
+
+               if (kthread_should_stop())
+                       return 0;
+
+               kvm_recover_nx_lpages(kvm);
+       }
+}
+
+int kvm_mmu_post_init_vm(struct kvm *kvm)
+{
+       int err;
+
+       err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
+                                         "kvm-nx-lpage-recovery",
+                                         &kvm->arch.nx_lpage_recovery_thread);
+       if (!err)
+               kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
+
+       return err;
+}
+
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
+{
+       if (kvm->arch.nx_lpage_recovery_thread)
+               kthread_stop(kvm->arch.nx_lpage_recovery_thread);
+}
index 11f8ec8..d55674f 100644 (file)
@@ -210,4 +210,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
                                    struct kvm_memory_slot *slot, u64 gfn);
 int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
+
+int kvm_mmu_post_init_vm(struct kvm *kvm);
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
+
 #endif
index 7d5cdb3..97b21e7 100644 (file)
@@ -614,13 +614,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                         struct guest_walker *gw,
                         int write_fault, int hlevel,
-                        kvm_pfn_t pfn, bool map_writable, bool prefault)
+                        kvm_pfn_t pfn, bool map_writable, bool prefault,
+                        bool lpage_disallowed)
 {
        struct kvm_mmu_page *sp = NULL;
        struct kvm_shadow_walk_iterator it;
        unsigned direct_access, access = gw->pt_access;
        int top_level, ret;
-       gfn_t base_gfn;
+       gfn_t gfn, base_gfn;
 
        direct_access = gw->pte_access;
 
@@ -665,13 +666,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                        link_shadow_page(vcpu, it.sptep, sp);
        }
 
-       base_gfn = gw->gfn;
+       /*
+        * FNAME(page_fault) might have clobbered the bottom bits of
+        * gw->gfn, restore them from the virtual address.
+        */
+       gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
+       base_gfn = gfn;
 
        trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
 
        for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
                clear_sp_write_flooding_count(it.sptep);
-               base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+
+               /*
+                * We cannot overwrite existing page tables with an NX
+                * large page, as the leaf could be executable.
+                */
+               disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
+
+               base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
                if (it.level == hlevel)
                        break;
 
@@ -683,6 +696,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
                        sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
                                              it.level - 1, true, direct_access);
                        link_shadow_page(vcpu, it.sptep, sp);
+                       if (lpage_disallowed)
+                               account_huge_nx_page(vcpu->kvm, sp);
                }
        }
 
@@ -759,9 +774,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
        int r;
        kvm_pfn_t pfn;
        int level = PT_PAGE_TABLE_LEVEL;
-       bool force_pt_level = false;
        unsigned long mmu_seq;
        bool map_writable, is_self_change_mapping;
+       bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+                               is_nx_huge_page_enabled();
+       bool force_pt_level = lpage_disallowed;
 
        pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 
@@ -851,7 +868,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
        if (!force_pt_level)
                transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
        r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
-                        level, pfn, map_writable, prefault);
+                        level, pfn, map_writable, prefault, lpage_disallowed);
        kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
 
 out_unlock:
index 5d21a4a..04a8212 100644 (file)
@@ -1268,6 +1268,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
        if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
                return;
 
+       /*
+        * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+        * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
+        * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
+        * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
+        * correctly.
+        */
+       if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
+               pi_clear_sn(pi_desc);
+               goto after_clear_sn;
+       }
+
        /* The full case.  */
        do {
                old.control = new.control = pi_desc->control;
@@ -1283,6 +1295,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
        } while (cmpxchg64(&pi_desc->control, old.control,
                           new.control) != old.control);
 
+after_clear_sn:
+
        /*
         * Clear SN before reading the bitmap.  The VT-d firmware
         * writes the bitmap and reads SN atomically (5.2.3 in the
@@ -1291,7 +1305,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
         */
        smp_mb__after_atomic();
 
-       if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS))
+       if (!pi_is_pir_empty(pi_desc))
                pi_set_on(pi_desc);
 }
 
@@ -6137,7 +6151,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
        if (pi_test_on(&vmx->pi_desc)) {
                pi_clear_on(&vmx->pi_desc);
                /*
-                * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+                * IOMMU can write to PID.ON, so the barrier matters even on UP.
                 * But on x86 this is just a compiler barrier anyway.
                 */
                smp_mb__after_atomic();
@@ -6167,7 +6181,10 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 
 static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
 {
-       return pi_test_on(vcpu_to_pi_desc(vcpu));
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+       return pi_test_on(pi_desc) ||
+               (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
 }
 
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
index bee1668..5a0f34b 100644 (file)
@@ -355,6 +355,11 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
        return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+       return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
+}
+
 static inline void pi_set_sn(struct pi_desc *pi_desc)
 {
        set_bit(POSTED_INTR_SN,
@@ -373,6 +378,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc)
                (unsigned long *)&pi_desc->control);
 }
 
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+       clear_bit(POSTED_INTR_SN,
+               (unsigned long *)&pi_desc->control);
+}
+
 static inline int pi_test_on(struct pi_desc *pi_desc)
 {
        return test_bit(POSTED_INTR_ON,
index ff395f8..5d53052 100644 (file)
@@ -213,6 +213,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "mmu_unsync", VM_STAT(mmu_unsync) },
        { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
        { "largepages", VM_STAT(lpages, .mode = 0444) },
+       { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
        { "max_mmu_page_hash_collisions",
                VM_STAT(max_mmu_page_hash_collisions) },
        { NULL }
@@ -1132,13 +1133,15 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
  *
- * This list is modified at module load time to reflect the
+ * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
+ * extract the supported MSRs from the related const lists.
+ * msrs_to_save is selected from the msrs_to_save_all to reflect the
  * capabilities of the host cpu. This capabilities test skips MSRs that are
- * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
+ * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
  * may depend on host virtualization features rather than host cpu features.
  */
 
-static u32 msrs_to_save[] = {
+static const u32 msrs_to_save_all[] = {
        MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
        MSR_STAR,
 #ifdef CONFIG_X86_64
@@ -1179,9 +1182,10 @@ static u32 msrs_to_save[] = {
        MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
 };
 
+static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
 static unsigned num_msrs_to_save;
 
-static u32 emulated_msrs[] = {
+static const u32 emulated_msrs_all[] = {
        MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
        MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
        HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
@@ -1220,7 +1224,7 @@ static u32 emulated_msrs[] = {
         * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
         * We always support the "true" VMX control MSRs, even if the host
         * processor does not, so I am putting these registers here rather
-        * than in msrs_to_save.
+        * than in msrs_to_save_all.
         */
        MSR_IA32_VMX_BASIC,
        MSR_IA32_VMX_TRUE_PINBASED_CTLS,
@@ -1239,13 +1243,14 @@ static u32 emulated_msrs[] = {
        MSR_KVM_POLL_CONTROL,
 };
 
+static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
 static unsigned num_emulated_msrs;
 
 /*
  * List of msr numbers which are used to expose MSR-based features that
  * can be used by a hypervisor to validate requested CPU features.
  */
-static u32 msr_based_features[] = {
+static const u32 msr_based_features_all[] = {
        MSR_IA32_VMX_BASIC,
        MSR_IA32_VMX_TRUE_PINBASED_CTLS,
        MSR_IA32_VMX_PINBASED_CTLS,
@@ -1270,6 +1275,7 @@ static u32 msr_based_features[] = {
        MSR_IA32_ARCH_CAPABILITIES,
 };
 
+static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
 static unsigned int num_msr_based_features;
 
 static u64 kvm_get_arch_capabilities(void)
@@ -1279,6 +1285,14 @@ static u64 kvm_get_arch_capabilities(void)
        if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
                rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
 
+       /*
+        * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
+        * the nested hypervisor runs with NX huge pages.  If it is not,
+        * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
+        * L1 guests, so it need not worry about its own (L2) guests.
+        */
+       data |= ARCH_CAP_PSCHANGE_MC_NO;
+
        /*
         * If we're doing cache flushes (either "always" or "cond")
         * we will do one whenever the guest does a vmlaunch/vmresume.
@@ -1298,6 +1312,25 @@ static u64 kvm_get_arch_capabilities(void)
        if (!boot_cpu_has_bug(X86_BUG_MDS))
                data |= ARCH_CAP_MDS_NO;
 
+       /*
+        * On TAA affected systems, export MDS_NO=0 when:
+        *      - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
+        *      - Updated microcode is present. This is detected by
+        *        the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
+        *        that VERW clears CPU buffers.
+        *
+        * When MDS_NO=0 is exported, guests deploy clear CPU buffer
+        * mitigation and don't complain:
+        *
+        *      "Vulnerable: Clear CPU buffers attempted, no microcode"
+        *
+        * If TSX is disabled on the system, guests are also mitigated against
+        * TAA and clear CPU buffer mitigation is not required for guests.
+        */
+       if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
+           (data & ARCH_CAP_TSX_CTRL_MSR))
+               data &= ~ARCH_CAP_MDS_NO;
+
        return data;
 }
 
@@ -5090,22 +5123,26 @@ static void kvm_init_msr_list(void)
 {
        struct x86_pmu_capability x86_pmu;
        u32 dummy[2];
-       unsigned i, j;
+       unsigned i;
 
        BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
-                        "Please update the fixed PMCs in msrs_to_save[]");
+                        "Please update the fixed PMCs in msrs_to_saved_all[]");
 
        perf_get_x86_pmu_capability(&x86_pmu);
 
-       for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
-               if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
+       num_msrs_to_save = 0;
+       num_emulated_msrs = 0;
+       num_msr_based_features = 0;
+
+       for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
+               if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
                        continue;
 
                /*
                 * Even MSRs that are valid in the host may not be exposed
                 * to the guests in some cases.
                 */
-               switch (msrs_to_save[i]) {
+               switch (msrs_to_save_all[i]) {
                case MSR_IA32_BNDCFGS:
                        if (!kvm_mpx_supported())
                                continue;
@@ -5133,17 +5170,17 @@ static void kvm_init_msr_list(void)
                        break;
                case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
                        if (!kvm_x86_ops->pt_supported() ||
-                               msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
+                               msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
                                intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
                                continue;
                        break;
                case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
-                       if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
+                       if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
                            min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
                                continue;
                        break;
                case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
-                       if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
+                       if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
                            min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
                                continue;
                }
@@ -5151,34 +5188,25 @@ static void kvm_init_msr_list(void)
                        break;
                }
 
-               if (j < i)
-                       msrs_to_save[j] = msrs_to_save[i];
-               j++;
+               msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
        }
-       num_msrs_to_save = j;
 
-       for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
-               if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+       for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
+               if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
                        continue;
 
-               if (j < i)
-                       emulated_msrs[j] = emulated_msrs[i];
-               j++;
+               emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
        }
-       num_emulated_msrs = j;
 
-       for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+       for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
                struct kvm_msr_entry msr;
 
-               msr.index = msr_based_features[i];
+               msr.index = msr_based_features_all[i];
                if (kvm_get_msr_feature(&msr))
                        continue;
 
-               if (j < i)
-                       msr_based_features[j] = msr_based_features[i];
-               j++;
+               msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
        }
-       num_msr_based_features = j;
 }
 
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -9428,6 +9456,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
        INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
+       INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
        atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 
@@ -9456,6 +9485,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        return kvm_x86_ops->vm_init(kvm);
 }
 
+int kvm_arch_post_init_vm(struct kvm *kvm)
+{
+       return kvm_mmu_post_init_vm(kvm);
+}
+
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
        vcpu_load(vcpu);
@@ -9557,6 +9591,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
 }
 EXPORT_SYMBOL_GPL(x86_set_memory_region);
 
+void kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+       kvm_mmu_pre_destroy_vm(kvm);
+}
+
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
        if (current->mm == kvm->mm) {
index 0319d63..0c62144 100644 (file)
@@ -2713,6 +2713,28 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
        }
 }
 
+
+static
+void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+       /*
+        * To prevent bfqq's service guarantees from being violated,
+        * bfqq may be left busy, i.e., queued for service, even if
+        * empty (see comments in __bfq_bfqq_expire() for
+        * details). But, if no process will send requests to bfqq any
+        * longer, then there is no point in keeping bfqq queued for
+        * service. In addition, keeping bfqq queued for service, but
+        * with no process ref any longer, may have caused bfqq to be
+        * freed when dequeued from service. But this is assumed to
+        * never happen.
+        */
+       if (bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) &&
+           bfqq != bfqd->in_service_queue)
+               bfq_del_bfqq_busy(bfqd, bfqq, false);
+
+       bfq_put_queue(bfqq);
+}
+
 static void
 bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
                struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
@@ -2783,8 +2805,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
         */
        new_bfqq->pid = -1;
        bfqq->bic = NULL;
-       /* release process reference to bfqq */
-       bfq_put_queue(bfqq);
+       bfq_release_process_ref(bfqd, bfqq);
 }
 
 static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -4899,7 +4920,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
 
        bfq_put_cooperator(bfqq);
 
-       bfq_put_queue(bfqq); /* release process reference */
+       bfq_release_process_ref(bfqd, bfqq);
 }
 
 static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -5001,8 +5022,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
 
        bfqq = bic_to_bfqq(bic, false);
        if (bfqq) {
-               /* release process reference on this queue */
-               bfq_put_queue(bfqq);
+               bfq_release_process_ref(bfqd, bfqq);
                bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
                bic_set_bfqq(bic, bfqq, false);
        }
@@ -5963,7 +5983,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
 
        bfq_put_cooperator(bfqq);
 
-       bfq_put_queue(bfqq);
+       bfq_release_process_ref(bfqq->bfqd, bfqq);
        return NULL;
 }
 
index 8f0ed62..b1170ec 100644 (file)
@@ -751,7 +751,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
        if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
                return false;
 
-       if (bio->bi_vcnt > 0) {
+       if (bio->bi_vcnt > 0 && !bio_full(bio, len)) {
                struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
 
                if (page_is_mergeable(bv, page, len, off, same_page)) {
index a7ed434..e01267f 100644 (file)
@@ -1057,9 +1057,12 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
        atomic64_set(&iocg->active_period, cur_period);
 
        /* already activated or breaking leaf-only constraint? */
-       for (i = iocg->level; i > 0; i--)
-               if (!list_empty(&iocg->active_list))
+       if (!list_empty(&iocg->active_list))
+               goto succeed_unlock;
+       for (i = iocg->level - 1; i > 0; i--)
+               if (!list_empty(&iocg->ancestors[i]->active_list))
                        goto fail_unlock;
+
        if (iocg->child_active_sum)
                goto fail_unlock;
 
@@ -1101,6 +1104,7 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
                ioc_start_period(ioc, now);
        }
 
+succeed_unlock:
        spin_unlock_irq(&ioc->lock);
        return true;
 
index cc37511..6265871 100644 (file)
@@ -554,12 +554,27 @@ ssize_t __weak cpu_show_mds(struct device *dev,
        return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
 static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
+static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
+static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
        &dev_attr_meltdown.attr,
@@ -568,6 +583,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
        &dev_attr_spec_store_bypass.attr,
        &dev_attr_l1tf.attr,
        &dev_attr_mds.attr,
+       &dev_attr_tsx_async_abort.attr,
+       &dev_attr_itlb_multihit.attr,
        NULL
 };
 
index 55907c2..84c4e1f 100644 (file)
@@ -872,3 +872,39 @@ int walk_memory_blocks(unsigned long start, unsigned long size,
        }
        return ret;
 }
+
+struct for_each_memory_block_cb_data {
+       walk_memory_blocks_func_t func;
+       void *arg;
+};
+
+static int for_each_memory_block_cb(struct device *dev, void *data)
+{
+       struct memory_block *mem = to_memory_block(dev);
+       struct for_each_memory_block_cb_data *cb_data = data;
+
+       return cb_data->func(mem, cb_data->arg);
+}
+
+/**
+ * for_each_memory_block - walk through all present memory blocks
+ *
+ * @arg: argument passed to func
+ * @func: callback for each memory block walked
+ *
+ * This function walks through all present memory blocks, calling func on
+ * each memory block.
+ *
+ * In case func() returns an error, walking is aborted and the error is
+ * returned.
+ */
+int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
+{
+       struct for_each_memory_block_cb_data cb_data = {
+               .func = func,
+               .arg = arg,
+       };
+
+       return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
+                               for_each_memory_block_cb);
+}
index 3913667..13527a0 100644 (file)
@@ -2087,7 +2087,7 @@ static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
        struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
        struct ceph_osd_data *osd_data;
        u64 objno;
-       u8 state, new_state, current_state;
+       u8 state, new_state, uninitialized_var(current_state);
        bool has_current_state;
        void *p;
 
index 76b73dd..10f6368 100644 (file)
@@ -1000,8 +1000,10 @@ static void rsxx_pci_remove(struct pci_dev *dev)
 
        cancel_work_sync(&card->event_work);
 
+       destroy_workqueue(card->event_wq);
        rsxx_destroy_dev(card);
        rsxx_dma_destroy(card);
+       destroy_workqueue(card->creg_ctrl.creg_wq);
 
        spin_lock_irqsave(&card->irq_lock, flags);
        rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
index 80b850e..8d53b8e 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/err.h>
-#include <linux/freezer.h>
 #include <linux/fs.h>
 #include <linux/hw_random.h>
 #include <linux/kernel.h>
@@ -422,9 +421,7 @@ static int hwrng_fillfn(void *unused)
 {
        long rc;
 
-       set_freezable();
-
-       while (!kthread_freezable_should_stop(NULL)) {
+       while (!kthread_should_stop()) {
                struct hwrng *rng;
 
                rng = get_current_rng();
index de434fe..01b8868 100644 (file)
 #include <linux/percpu.h>
 #include <linux/cryptohash.h>
 #include <linux/fips.h>
-#include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/workqueue.h>
 #include <linux/irq.h>
@@ -2500,8 +2499,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
         * We'll be woken up again once below random_write_wakeup_thresh,
         * or when the calling thread is about to terminate.
         */
-       wait_event_freezable(random_write_wait,
-                       kthread_should_stop() ||
+       wait_event_interruptible(random_write_wait, kthread_should_stop() ||
                        ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
        mix_pool_bytes(poolp, buffer, count);
        credit_entropy_bits(poolp, entropy);
index 354b27d..62812f8 100644 (file)
@@ -328,12 +328,13 @@ static int sh_mtu2_register(struct sh_mtu2_channel *ch, const char *name)
        return 0;
 }
 
+static const unsigned int sh_mtu2_channel_offsets[] = {
+       0x300, 0x380, 0x000,
+};
+
 static int sh_mtu2_setup_channel(struct sh_mtu2_channel *ch, unsigned int index,
                                 struct sh_mtu2_device *mtu)
 {
-       static const unsigned int channel_offsets[] = {
-               0x300, 0x380, 0x000,
-       };
        char name[6];
        int irq;
        int ret;
@@ -356,7 +357,7 @@ static int sh_mtu2_setup_channel(struct sh_mtu2_channel *ch, unsigned int index,
                return ret;
        }
 
-       ch->base = mtu->mapbase + channel_offsets[index];
+       ch->base = mtu->mapbase + sh_mtu2_channel_offsets[index];
        ch->index = index;
 
        return sh_mtu2_register(ch, dev_name(&mtu->pdev->dev));
@@ -408,7 +409,12 @@ static int sh_mtu2_setup(struct sh_mtu2_device *mtu,
        }
 
        /* Allocate and setup the channels. */
-       mtu->num_channels = 3;
+       ret = platform_irq_count(pdev);
+       if (ret < 0)
+               goto err_unmap;
+
+       mtu->num_channels = min_t(unsigned int, ret,
+                                 ARRAY_SIZE(sh_mtu2_channel_offsets));
 
        mtu->channels = kcalloc(mtu->num_channels, sizeof(*mtu->channels),
                                GFP_KERNEL);
index a562f49..9318edc 100644 (file)
@@ -268,15 +268,12 @@ static int __init mtk_syst_init(struct device_node *node)
 
        ret = timer_of_init(node, &to);
        if (ret)
-               goto err;
+               return ret;
 
        clockevents_config_and_register(&to.clkevt, timer_of_rate(&to),
                                        TIMER_SYNC_TICKS, 0xffffffff);
 
        return 0;
-err:
-       timer_of_cleanup(&to);
-       return ret;
 }
 
 static int __init mtk_gpt_init(struct device_node *node)
@@ -293,7 +290,7 @@ static int __init mtk_gpt_init(struct device_node *node)
 
        ret = timer_of_init(node, &to);
        if (ret)
-               goto err;
+               return ret;
 
        /* Configure clock source */
        mtk_gpt_setup(&to, TIMER_CLK_SRC, GPT_CTRL_OP_FREERUN);
@@ -311,9 +308,6 @@ static int __init mtk_gpt_init(struct device_node *node)
        mtk_gpt_enable_irq(&to, TIMER_CLK_EVT);
 
        return 0;
-err:
-       timer_of_cleanup(&to);
-       return ret;
 }
 TIMER_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_gpt_init);
 TIMER_OF_DECLARE(mtk_mt6765, "mediatek,mt6765-timer", mtk_syst_init);
index 4d71537..a460900 100644 (file)
@@ -950,21 +950,7 @@ static void psp_print_fw_hdr(struct psp_context *psp,
                             struct amdgpu_firmware_info *ucode)
 {
        struct amdgpu_device *adev = psp->adev;
-       const struct sdma_firmware_header_v1_0 *sdma_hdr =
-               (const struct sdma_firmware_header_v1_0 *)
-               adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
-       const struct gfx_firmware_header_v1_0 *ce_hdr =
-               (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
-       const struct gfx_firmware_header_v1_0 *pfp_hdr =
-               (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
-       const struct gfx_firmware_header_v1_0 *me_hdr =
-               (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
-       const struct gfx_firmware_header_v1_0 *mec_hdr =
-               (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
-       const struct rlc_firmware_header_v2_0 *rlc_hdr =
-               (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
-       const struct smc_firmware_header_v1_0 *smc_hdr =
-               (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
+       struct common_firmware_header *hdr;
 
        switch (ucode->ucode_id) {
        case AMDGPU_UCODE_ID_SDMA0:
@@ -975,25 +961,33 @@ static void psp_print_fw_hdr(struct psp_context *psp,
        case AMDGPU_UCODE_ID_SDMA5:
        case AMDGPU_UCODE_ID_SDMA6:
        case AMDGPU_UCODE_ID_SDMA7:
-               amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header);
+               hdr = (struct common_firmware_header *)
+                       adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
+               amdgpu_ucode_print_sdma_hdr(hdr);
                break;
        case AMDGPU_UCODE_ID_CP_CE:
-               amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+               hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data;
+               amdgpu_ucode_print_gfx_hdr(hdr);
                break;
        case AMDGPU_UCODE_ID_CP_PFP:
-               amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+               hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data;
+               amdgpu_ucode_print_gfx_hdr(hdr);
                break;
        case AMDGPU_UCODE_ID_CP_ME:
-               amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+               hdr = (struct common_firmware_header *)adev->gfx.me_fw->data;
+               amdgpu_ucode_print_gfx_hdr(hdr);
                break;
        case AMDGPU_UCODE_ID_CP_MEC1:
-               amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+               hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data;
+               amdgpu_ucode_print_gfx_hdr(hdr);
                break;
        case AMDGPU_UCODE_ID_RLC_G:
-               amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header);
+               hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data;
+               amdgpu_ucode_print_rlc_hdr(hdr);
                break;
        case AMDGPU_UCODE_ID_SMC:
-               amdgpu_ucode_print_smc_hdr(&smc_hdr->header);
+               hdr = (struct common_firmware_header *)adev->pm.fw->data;
+               amdgpu_ucode_print_smc_hdr(hdr);
                break;
        default:
                break;
index 1209976..c002f23 100644 (file)
@@ -4896,6 +4896,9 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume)
 
        power_domains->initializing = true;
 
+       /* Must happen before power domain init on VLV/CHV */
+       intel_update_rawclk(i915);
+
        if (INTEL_GEN(i915) >= 11) {
                icl_display_core_init(i915, resume);
        } else if (IS_CANNONLAKE(i915)) {
index 1cdfe05..e41fd94 100644 (file)
@@ -319,6 +319,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
        free_engines(rcu_access_pointer(ctx->engines));
        mutex_destroy(&ctx->engines_mutex);
 
+       kfree(ctx->jump_whitelist);
+
        if (ctx->timeline)
                intel_timeline_put(ctx->timeline);
 
@@ -441,6 +443,9 @@ __create_context(struct drm_i915_private *i915)
        for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
                ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
+       ctx->jump_whitelist = NULL;
+       ctx->jump_whitelist_cmds = 0;
+
        return ctx;
 
 err_free:
index 260d59c..00537b9 100644 (file)
@@ -192,6 +192,13 @@ struct i915_gem_context {
         * per vm, which may be one per context or shared with the global GTT)
         */
        struct radix_tree_root handles_vma;
+
+       /** jump_whitelist: Bit array for tracking cmds during cmdparsing
+        *  Guarded by struct_mutex
+        */
+       unsigned long *jump_whitelist;
+       /** jump_whitelist_cmds: No of cmd slots available */
+       u32 jump_whitelist_cmds;
 };
 
 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */
index b5f6937..e635e1e 100644 (file)
@@ -296,7 +296,9 @@ static inline u64 gen8_noncanonical_addr(u64 address)
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 {
-       return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
+       return intel_engine_requires_cmd_parser(eb->engine) ||
+               (intel_engine_using_cmd_parser(eb->engine) &&
+                eb->args->batch_len);
 }
 
 static int eb_create(struct i915_execbuffer *eb)
@@ -1955,40 +1957,94 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
        return 0;
 }
 
-static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
+static struct i915_vma *
+shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
+{
+       struct drm_i915_private *dev_priv = eb->i915;
+       struct i915_vma * const vma = *eb->vma;
+       struct i915_address_space *vm;
+       u64 flags;
+
+       /*
+        * PPGTT backed shadow buffers must be mapped RO, to prevent
+        * post-scan tampering
+        */
+       if (CMDPARSER_USES_GGTT(dev_priv)) {
+               flags = PIN_GLOBAL;
+               vm = &dev_priv->ggtt.vm;
+       } else if (vma->vm->has_read_only) {
+               flags = PIN_USER;
+               vm = vma->vm;
+               i915_gem_object_set_readonly(obj);
+       } else {
+               DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
+               return ERR_PTR(-EINVAL);
+       }
+
+       return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags);
+}
+
+static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
 {
        struct intel_engine_pool_node *pool;
        struct i915_vma *vma;
+       u64 batch_start;
+       u64 shadow_batch_start;
        int err;
 
        pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len);
        if (IS_ERR(pool))
                return ERR_CAST(pool);
 
-       err = intel_engine_cmd_parser(eb->engine,
+       vma = shadow_batch_pin(eb, pool->obj);
+       if (IS_ERR(vma))
+               goto err;
+
+       batch_start = gen8_canonical_addr(eb->batch->node.start) +
+                     eb->batch_start_offset;
+
+       shadow_batch_start = gen8_canonical_addr(vma->node.start);
+
+       err = intel_engine_cmd_parser(eb->gem_context,
+                                     eb->engine,
                                      eb->batch->obj,
-                                     pool->obj,
+                                     batch_start,
                                      eb->batch_start_offset,
                                      eb->batch_len,
-                                     is_master);
+                                     pool->obj,
+                                     shadow_batch_start);
+
        if (err) {
-               if (err == -EACCES) /* unhandled chained batch */
+               i915_vma_unpin(vma);
+
+               /*
+                * Unsafe GGTT-backed buffers can still be submitted safely
+                * as non-secure.
+                * For PPGTT backing however, we have no choice but to forcibly
+                * reject unsafe buffers
+                */
+               if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES))
+                       /* Execute original buffer non-secure */
                        vma = NULL;
                else
                        vma = ERR_PTR(err);
                goto err;
        }
 
-       vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0);
-       if (IS_ERR(vma))
-               goto err;
-
        eb->vma[eb->buffer_count] = i915_vma_get(vma);
        eb->flags[eb->buffer_count] =
                __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
        vma->exec_flags = &eb->flags[eb->buffer_count];
        eb->buffer_count++;
 
+       eb->batch_start_offset = 0;
+       eb->batch = vma;
+
+       if (CMDPARSER_USES_GGTT(eb->i915))
+               eb->batch_flags |= I915_DISPATCH_SECURE;
+
+       /* eb->batch_len unchanged */
+
        vma->private = pool;
        return vma;
 
@@ -2421,6 +2477,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                       struct drm_i915_gem_exec_object2 *exec,
                       struct drm_syncobj **fences)
 {
+       struct drm_i915_private *i915 = to_i915(dev);
        struct i915_execbuffer eb;
        struct dma_fence *in_fence = NULL;
        struct dma_fence *exec_fence = NULL;
@@ -2432,7 +2489,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
                     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
 
-       eb.i915 = to_i915(dev);
+       eb.i915 = i915;
        eb.file = file;
        eb.args = args;
        if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
@@ -2452,8 +2509,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 
        eb.batch_flags = 0;
        if (args->flags & I915_EXEC_SECURE) {
+               if (INTEL_GEN(i915) >= 11)
+                       return -ENODEV;
+
+               /* Return -EPERM to trigger fallback code on old binaries. */
+               if (!HAS_SECURE_BATCHES(i915))
+                       return -EPERM;
+
                if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
-                   return -EPERM;
+                       return -EPERM;
 
                eb.batch_flags |= I915_DISPATCH_SECURE;
        }
@@ -2530,34 +2594,19 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                goto err_vma;
        }
 
+       if (eb.batch_len == 0)
+               eb.batch_len = eb.batch->size - eb.batch_start_offset;
+
        if (eb_use_cmdparser(&eb)) {
                struct i915_vma *vma;
 
-               vma = eb_parse(&eb, drm_is_current_master(file));
+               vma = eb_parse(&eb);
                if (IS_ERR(vma)) {
                        err = PTR_ERR(vma);
                        goto err_vma;
                }
-
-               if (vma) {
-                       /*
-                        * Batch parsed and accepted:
-                        *
-                        * Set the DISPATCH_SECURE bit to remove the NON_SECURE
-                        * bit from MI_BATCH_BUFFER_START commands issued in
-                        * the dispatch_execbuffer implementations. We
-                        * specifically don't want that set on batches the
-                        * command parser has accepted.
-                        */
-                       eb.batch_flags |= I915_DISPATCH_SECURE;
-                       eb.batch_start_offset = 0;
-                       eb.batch = vma;
-               }
        }
 
-       if (eb.batch_len == 0)
-               eb.batch_len = eb.batch->size - eb.batch_start_offset;
-
        /*
         * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
         * batch" bit. Hence we need to pin secure batches into the global gtt.
index a82cea9..9dd8c29 100644 (file)
@@ -475,12 +475,13 @@ struct intel_engine_cs {
 
        struct intel_engine_hangcheck hangcheck;
 
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_USING_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
        unsigned int flags;
 
        /*
@@ -541,9 +542,15 @@ struct intel_engine_cs {
 };
 
 static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
 {
-       return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+       return engine->flags & I915_ENGINE_USING_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
+{
+       return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
 }
 
 static inline bool
index 1363e06..fac75af 100644 (file)
@@ -38,6 +38,9 @@ static int __gt_unpark(struct intel_wakeref *wf)
        gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
        GEM_BUG_ON(!gt->awake);
 
+       if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+               intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+
        intel_enable_gt_powersave(i915);
 
        i915_update_gfx_val(i915);
@@ -67,6 +70,11 @@ static int __gt_park(struct intel_wakeref *wf)
        if (INTEL_GEN(i915) >= 6)
                gen6_rps_idle(i915);
 
+       if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
+               i915_rc6_ctx_wa_check(i915);
+               intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+       }
+
        /* Everything switched off, flush any residual interrupt just in case */
        intel_synchronize_irq(i915);
 
index 728704b..cea184a 100644 (file)
@@ -199,14 +199,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
        MOCS_ENTRY(15, \
                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
                   L3_3_WB), \
-       /* Bypass LLC - Uncached (EHL+) */ \
-       MOCS_ENTRY(16, \
-                  LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
-                  L3_1_UC), \
-       /* Bypass LLC - L3 (Read-Only) (EHL+) */ \
-       MOCS_ENTRY(17, \
-                  LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
-                  L3_3_WB), \
        /* Self-Snoop - L3 + LLC */ \
        MOCS_ENTRY(18, \
                   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
@@ -270,7 +262,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
                   L3_1_UC),
        /* HW Special Case (Displayable) */
        MOCS_ENTRY(61,
-                  LE_1_UC | LE_TC_1_LLC | LE_SCF(1),
+                  LE_1_UC | LE_TC_1_LLC,
                   L3_3_WB),
 };
 
index 13044c0..4bfaefd 100644 (file)
@@ -498,8 +498,6 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
                goto out_free_gem;
        }
 
-       i915_gem_object_put(obj);
-
        ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR);
        if (ret < 0) {
                gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret);
@@ -524,6 +522,8 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
                    file_count(dmabuf->file),
                    kref_read(&obj->base.refcount));
 
+       i915_gem_object_put(obj);
+
        return dmabuf_fd;
 
 out_free_dmabuf:
index 2455510..f24096e 100644 (file)
  * granting userspace undue privileges. There are three categories of privilege.
  *
  * First, commands which are explicitly defined as privileged or which should
- * only be used by the kernel driver. The parser generally rejects such
- * commands, though it may allow some from the drm master process.
+ * only be used by the kernel driver. The parser rejects such commands
  *
  * Second, commands which access registers. To support correct/enhanced
  * userspace functionality, particularly certain OpenGL extensions, the parser
- * provides a whitelist of registers which userspace may safely access (for both
- * normal and drm master processes).
+ * provides a whitelist of registers which userspace may safely access
  *
  * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc).
  * The parser always rejects such commands.
@@ -84,9 +82,9 @@
  * in the per-engine command tables.
  *
  * Other command table entries map fairly directly to high level categories
- * mentioned above: rejected, master-only, register whitelist. The parser
- * implements a number of checks, including the privileged memory checks, via a
- * general bitmasking mechanism.
+ * mentioned above: rejected, register whitelist. The parser implements a number
+ * of checks, including the privileged memory checks, via a general bitmasking
+ * mechanism.
  */
 
 /*
@@ -104,8 +102,6 @@ struct drm_i915_cmd_descriptor {
         * CMD_DESC_REJECT: The command is never allowed
         * CMD_DESC_REGISTER: The command should be checked against the
         *                    register whitelist for the appropriate ring
-        * CMD_DESC_MASTER: The command is allowed if the submitting process
-        *                  is the DRM master
         */
        u32 flags;
 #define CMD_DESC_FIXED    (1<<0)
@@ -113,7 +109,6 @@ struct drm_i915_cmd_descriptor {
 #define CMD_DESC_REJECT   (1<<2)
 #define CMD_DESC_REGISTER (1<<3)
 #define CMD_DESC_BITMASK  (1<<4)
-#define CMD_DESC_MASTER   (1<<5)
 
        /*
         * The command's unique identification bits and the bitmask to get them.
@@ -194,7 +189,7 @@ struct drm_i915_cmd_table {
 #define CMD(op, opm, f, lm, fl, ...)                           \
        {                                                       \
                .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0),     \
-               .cmd = { (op), ~0u << (opm) },                  \
+               .cmd = { (op & ~0u << (opm)), ~0u << (opm) },   \
                .length = { (lm) },                             \
                __VA_ARGS__                                     \
        }
@@ -209,14 +204,13 @@ struct drm_i915_cmd_table {
 #define R CMD_DESC_REJECT
 #define W CMD_DESC_REGISTER
 #define B CMD_DESC_BITMASK
-#define M CMD_DESC_MASTER
 
 /*            Command                          Mask   Fixed Len   Action
              ---------------------------------------------------------- */
-static const struct drm_i915_cmd_descriptor common_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
        CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
        CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      R  ),
-       CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      M  ),
+       CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      R  ),
        CMD(  MI_ARB_CHECK,                     SMI,    F,  1,      S  ),
        CMD(  MI_REPORT_HEAD,                   SMI,    F,  1,      S  ),
        CMD(  MI_SUSPEND_FLUSH,                 SMI,    F,  1,      S  ),
@@ -246,7 +240,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
        CMD(  MI_BATCH_BUFFER_START,            SMI,   !F,  0xFF,   S  ),
 };
 
-static const struct drm_i915_cmd_descriptor render_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = {
        CMD(  MI_FLUSH,                         SMI,    F,  1,      S  ),
        CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
        CMD(  MI_PREDICATE,                     SMI,    F,  1,      S  ),
@@ -313,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
        CMD(  MI_URB_ATOMIC_ALLOC,              SMI,    F,  1,      S  ),
        CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
        CMD(  MI_RS_CONTEXT,                    SMI,    F,  1,      S  ),
-       CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
+       CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   R  ),
        CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
        CMD(  MI_LOAD_REGISTER_REG,             SMI,   !F,  0xFF,   W,
              .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 }    ),
@@ -330,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
        CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS,  S3D,   !F,  0x1FF,  S  ),
 };
 
-static const struct drm_i915_cmd_descriptor video_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = {
        CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
        CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
        CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
@@ -374,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = {
        CMD(  MFX_WAIT,                         SMFX,   F,  1,      S  ),
 };
 
-static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = {
        CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
        CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
        CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
@@ -412,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
              }},                                                      ),
 };
 
-static const struct drm_i915_cmd_descriptor blt_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = {
        CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
        CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3FF,  B,
              .bits = {{
@@ -446,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
 };
 
 static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
-       CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
+       CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   R  ),
        CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
 };
 
+/*
+ * For Gen9 we can still rely on the h/w to enforce cmd security, and only
+ * need to re-enforce the register access checks. We therefore only need to
+ * teach the cmdparser how to find the end of each command, and identify
+ * register accesses. The table doesn't need to reject any commands, and so
+ * the only commands listed here are:
+ *   1) Those that touch registers
+ *   2) Those that do not have the default 8-bit length
+ *
+ * Note that the default MI length mask chosen for this table is 0xFF, not
+ * the 0x3F used on older devices. This is because the vast majority of MI
+ * cmds on Gen9 use a standard 8-bit Length field.
+ * All the Gen9 blitter instructions are standard 0xFF length mask, and
+ * none allow access to non-general registers, so in fact no BLT cmds are
+ * included in the table at all.
+ *
+ */
+static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = {
+       CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
+       CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      S  ),
+       CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      S  ),
+       CMD(  MI_FLUSH,                         SMI,    F,  1,      S  ),
+       CMD(  MI_ARB_CHECK,                     SMI,    F,  1,      S  ),
+       CMD(  MI_REPORT_HEAD,                   SMI,    F,  1,      S  ),
+       CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      S  ),
+       CMD(  MI_SUSPEND_FLUSH,                 SMI,    F,  1,      S  ),
+       CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   S  ),
+       CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   S  ),
+       CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3FF,  S  ),
+       CMD(  MI_LOAD_REGISTER_IMM(1),          SMI,   !F,  0xFF,   W,
+             .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 }    ),
+       CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0x3FF,  S  ),
+       CMD(  MI_STORE_REGISTER_MEM_GEN8,       SMI,    F,  4,      W,
+             .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
+       CMD(  MI_FLUSH_DW,                      SMI,   !F,  0x3F,   S  ),
+       CMD(  MI_LOAD_REGISTER_MEM_GEN8,        SMI,    F,  4,      W,
+             .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
+       CMD(  MI_LOAD_REGISTER_REG,             SMI,    !F,  0xFF,  W,
+             .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 }    ),
+
+       /*
+        * We allow BB_START but apply further checks. We just sanitize the
+        * basic fields here.
+        */
+#define MI_BB_START_OPERAND_MASK   GENMASK(SMI-1, 0)
+#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1)
+       CMD(  MI_BATCH_BUFFER_START_GEN8,       SMI,    !F,  0xFF,  B,
+             .bits = {{
+                       .offset = 0,
+                       .mask = MI_BB_START_OPERAND_MASK,
+                       .expected = MI_BB_START_OPERAND_EXPECT,
+             }},                                                      ),
+};
+
 static const struct drm_i915_cmd_descriptor noop_desc =
        CMD(MI_NOOP, SMI, F, 1, S);
 
@@ -463,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc =
 #undef R
 #undef W
 #undef B
-#undef M
 
-static const struct drm_i915_cmd_table gen7_render_cmds[] = {
-       { common_cmds, ARRAY_SIZE(common_cmds) },
-       { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
+       { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+       { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
 };
 
-static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
-       { common_cmds, ARRAY_SIZE(common_cmds) },
-       { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = {
+       { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+       { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
        { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
 };
 
-static const struct drm_i915_cmd_table gen7_video_cmds[] = {
-       { common_cmds, ARRAY_SIZE(common_cmds) },
-       { video_cmds, ARRAY_SIZE(video_cmds) },
+static const struct drm_i915_cmd_table gen7_video_cmd_table[] = {
+       { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+       { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) },
 };
 
-static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
-       { common_cmds, ARRAY_SIZE(common_cmds) },
-       { vecs_cmds, ARRAY_SIZE(vecs_cmds) },
+static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = {
+       { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+       { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) },
 };
 
-static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
-       { common_cmds, ARRAY_SIZE(common_cmds) },
-       { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = {
+       { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+       { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
 };
 
-static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
-       { common_cmds, ARRAY_SIZE(common_cmds) },
-       { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = {
+       { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+       { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
        { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
 };
 
+static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = {
+       { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) },
+};
+
+
 /*
  * Register whitelists, sorted by increasing register offset.
  */
@@ -612,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
        REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
 };
 
-static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
-       REG32(FORCEWAKE_MT),
-       REG32(DERRMR),
-       REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)),
-       REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)),
-       REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)),
-};
-
-static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
-       REG32(FORCEWAKE_MT),
-       REG32(DERRMR),
+static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
+       REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
+       REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
+       REG32(BCS_SWCTRL),
+       REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
+       REG64_IDX(BCS_GPR, 0),
+       REG64_IDX(BCS_GPR, 1),
+       REG64_IDX(BCS_GPR, 2),
+       REG64_IDX(BCS_GPR, 3),
+       REG64_IDX(BCS_GPR, 4),
+       REG64_IDX(BCS_GPR, 5),
+       REG64_IDX(BCS_GPR, 6),
+       REG64_IDX(BCS_GPR, 7),
+       REG64_IDX(BCS_GPR, 8),
+       REG64_IDX(BCS_GPR, 9),
+       REG64_IDX(BCS_GPR, 10),
+       REG64_IDX(BCS_GPR, 11),
+       REG64_IDX(BCS_GPR, 12),
+       REG64_IDX(BCS_GPR, 13),
+       REG64_IDX(BCS_GPR, 14),
+       REG64_IDX(BCS_GPR, 15),
 };
 
 #undef REG64
@@ -631,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
 struct drm_i915_reg_table {
        const struct drm_i915_reg_descriptor *regs;
        int num_regs;
-       bool master;
 };
 
 static const struct drm_i915_reg_table ivb_render_reg_tables[] = {
-       { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
-       { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+       { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
 };
 
 static const struct drm_i915_reg_table ivb_blt_reg_tables[] = {
-       { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
-       { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+       { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
 };
 
 static const struct drm_i915_reg_table hsw_render_reg_tables[] = {
-       { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
-       { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false },
-       { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+       { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
+       { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) },
 };
 
 static const struct drm_i915_reg_table hsw_blt_reg_tables[] = {
-       { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
-       { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+       { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
+};
+
+static const struct drm_i915_reg_table gen9_blt_reg_tables[] = {
+       { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) },
 };
 
 static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
@@ -710,6 +771,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
        return 0;
 }
 
+static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
+{
+       u32 client = cmd_header >> INSTR_CLIENT_SHIFT;
+
+       if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
+               return 0xFF;
+
+       DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+       return 0;
+}
+
 static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
                                 const struct drm_i915_cmd_table *cmd_tables,
                                 int cmd_table_count)
@@ -867,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
        int cmd_table_count;
        int ret;
 
-       if (!IS_GEN(engine->i915, 7))
+       if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) &&
+                                         engine->class == COPY_ENGINE_CLASS))
                return;
 
        switch (engine->class) {
        case RENDER_CLASS:
                if (IS_HASWELL(engine->i915)) {
-                       cmd_tables = hsw_render_ring_cmds;
+                       cmd_tables = hsw_render_ring_cmd_table;
                        cmd_table_count =
-                               ARRAY_SIZE(hsw_render_ring_cmds);
+                               ARRAY_SIZE(hsw_render_ring_cmd_table);
                } else {
-                       cmd_tables = gen7_render_cmds;
-                       cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
+                       cmd_tables = gen7_render_cmd_table;
+                       cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table);
                }
 
                if (IS_HASWELL(engine->i915)) {
@@ -888,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
                        engine->reg_tables = ivb_render_reg_tables;
                        engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables);
                }
-
                engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
                break;
        case VIDEO_DECODE_CLASS:
-               cmd_tables = gen7_video_cmds;
-               cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
+               cmd_tables = gen7_video_cmd_table;
+               cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table);
                engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
                break;
        case COPY_ENGINE_CLASS:
-               if (IS_HASWELL(engine->i915)) {
-                       cmd_tables = hsw_blt_ring_cmds;
-                       cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
+               engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
+               if (IS_GEN(engine->i915, 9)) {
+                       cmd_tables = gen9_blt_cmd_table;
+                       cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table);
+                       engine->get_cmd_length_mask =
+                               gen9_blt_get_cmd_length_mask;
+
+                       /* BCS Engine unsafe without parser */
+                       engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER;
+               } else if (IS_HASWELL(engine->i915)) {
+                       cmd_tables = hsw_blt_ring_cmd_table;
+                       cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
                } else {
-                       cmd_tables = gen7_blt_cmds;
-                       cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
+                       cmd_tables = gen7_blt_cmd_table;
+                       cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
                }
 
-               if (IS_HASWELL(engine->i915)) {
+               if (IS_GEN(engine->i915, 9)) {
+                       engine->reg_tables = gen9_blt_reg_tables;
+                       engine->reg_table_count =
+                               ARRAY_SIZE(gen9_blt_reg_tables);
+               } else if (IS_HASWELL(engine->i915)) {
                        engine->reg_tables = hsw_blt_reg_tables;
                        engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables);
                } else {
                        engine->reg_tables = ivb_blt_reg_tables;
                        engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables);
                }
-
-               engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
                break;
        case VIDEO_ENHANCEMENT_CLASS:
-               cmd_tables = hsw_vebox_cmds;
-               cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
+               cmd_tables = hsw_vebox_cmd_table;
+               cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table);
                /* VECS can use the same length_mask function as VCS */
                engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
                break;
@@ -943,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
                return;
        }
 
-       engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER;
+       engine->flags |= I915_ENGINE_USING_CMD_PARSER;
 }
 
 /**
@@ -955,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
  */
 void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
 {
-       if (!intel_engine_needs_cmd_parser(engine))
+       if (!intel_engine_using_cmd_parser(engine))
                return;
 
        fini_hash_table(engine);
@@ -1029,22 +1112,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
 }
 
 static const struct drm_i915_reg_descriptor *
-find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
+find_reg(const struct intel_engine_cs *engine, u32 addr)
 {
        const struct drm_i915_reg_table *table = engine->reg_tables;
+       const struct drm_i915_reg_descriptor *reg = NULL;
        int count = engine->reg_table_count;
 
-       for (; count > 0; ++table, --count) {
-               if (!table->master || is_master) {
-                       const struct drm_i915_reg_descriptor *reg;
+       for (; !reg && (count > 0); ++table, --count)
+               reg = __find_reg(table->regs, table->num_regs, addr);
 
-                       reg = __find_reg(table->regs, table->num_regs, addr);
-                       if (reg != NULL)
-                               return reg;
-               }
-       }
-
-       return NULL;
+       return reg;
 }
 
 /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
@@ -1128,8 +1205,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 
 static bool check_cmd(const struct intel_engine_cs *engine,
                      const struct drm_i915_cmd_descriptor *desc,
-                     const u32 *cmd, u32 length,
-                     const bool is_master)
+                     const u32 *cmd, u32 length)
 {
        if (desc->flags & CMD_DESC_SKIP)
                return true;
@@ -1139,12 +1215,6 @@ static bool check_cmd(const struct intel_engine_cs *engine,
                return false;
        }
 
-       if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
-               DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
-                                *cmd);
-               return false;
-       }
-
        if (desc->flags & CMD_DESC_REGISTER) {
                /*
                 * Get the distance between individual register offset
@@ -1158,7 +1228,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
                     offset += step) {
                        const u32 reg_addr = cmd[offset] & desc->reg.mask;
                        const struct drm_i915_reg_descriptor *reg =
-                               find_reg(engine, is_master, reg_addr);
+                               find_reg(engine, reg_addr);
 
                        if (!reg) {
                                DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
@@ -1236,16 +1306,112 @@ static bool check_cmd(const struct intel_engine_cs *engine,
        return true;
 }
 
+static int check_bbstart(const struct i915_gem_context *ctx,
+                        u32 *cmd, u32 offset, u32 length,
+                        u32 batch_len,
+                        u64 batch_start,
+                        u64 shadow_batch_start)
+{
+       u64 jump_offset, jump_target;
+       u32 target_cmd_offset, target_cmd_index;
+
+       /* For igt compatibility on older platforms */
+       if (CMDPARSER_USES_GGTT(ctx->i915)) {
+               DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
+               return -EACCES;
+       }
+
+       if (length != 3) {
+               DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n",
+                         length);
+               return -EINVAL;
+       }
+
+       jump_target = *(u64*)(cmd+1);
+       jump_offset = jump_target - batch_start;
+
+       /*
+        * Any underflow of jump_target is guaranteed to be outside the range
+        * of a u32, so >= test catches both too large and too small
+        */
+       if (jump_offset >= batch_len) {
+               DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
+                         jump_target);
+               return -EINVAL;
+       }
+
+       /*
+        * This cannot overflow a u32 because we already checked jump_offset
+        * is within the BB, and the batch_len is a u32
+        */
+       target_cmd_offset = lower_32_bits(jump_offset);
+       target_cmd_index = target_cmd_offset / sizeof(u32);
+
+       *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
+
+       if (target_cmd_index == offset)
+               return 0;
+
+       if (ctx->jump_whitelist_cmds <= target_cmd_index) {
+               DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
+               return -EINVAL;
+       } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
+               DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
+                         jump_target);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len)
+{
+       const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
+       const u32 exact_size = BITS_TO_LONGS(batch_cmds);
+       u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
+       unsigned long *next_whitelist;
+
+       if (CMDPARSER_USES_GGTT(ctx->i915))
+               return;
+
+       if (batch_cmds <= ctx->jump_whitelist_cmds) {
+               bitmap_zero(ctx->jump_whitelist, batch_cmds);
+               return;
+       }
+
+again:
+       next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
+       if (next_whitelist) {
+               kfree(ctx->jump_whitelist);
+               ctx->jump_whitelist = next_whitelist;
+               ctx->jump_whitelist_cmds =
+                       next_size * BITS_PER_BYTE * sizeof(long);
+               return;
+       }
+
+       if (next_size > exact_size) {
+               next_size = exact_size;
+               goto again;
+       }
+
+       DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
+       bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
+
+       return;
+}
+
 #define LENGTH_BIAS 2
 
 /**
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
+ * @ctx: the context in which the batch is to execute
  * @engine: the engine on which the batch is to execute
  * @batch_obj: the batch buffer in question
- * @shadow_batch_obj: copy of the batch buffer in question
+ * @batch_start: Canonical base address of batch
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @batch_len: length of the commands in batch_obj
- * @is_master: is the submitting process the drm master?
+ * @shadow_batch_obj: copy of the batch buffer in question
+ * @shadow_batch_start: Canonical base address of shadow_batch_obj
  *
  * Parses the specified batch buffer looking for privilege violations as
  * described in the overview.
@@ -1253,14 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine,
  * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
  * if the batch appears legal but should use hardware parsing
  */
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+
+int intel_engine_cmd_parser(struct i915_gem_context *ctx,
+                           struct intel_engine_cs *engine,
                            struct drm_i915_gem_object *batch_obj,
-                           struct drm_i915_gem_object *shadow_batch_obj,
+                           u64 batch_start,
                            u32 batch_start_offset,
                            u32 batch_len,
-                           bool is_master)
+                           struct drm_i915_gem_object *shadow_batch_obj,
+                           u64 shadow_batch_start)
 {
-       u32 *cmd, *batch_end;
+       u32 *cmd, *batch_end, offset = 0;
        struct drm_i915_cmd_descriptor default_desc = noop_desc;
        const struct drm_i915_cmd_descriptor *desc = &default_desc;
        bool needs_clflush_after = false;
@@ -1274,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
                return PTR_ERR(cmd);
        }
 
+       init_whitelist(ctx, batch_len);
+
        /*
         * We use the batch length as size because the shadow object is as
         * large or larger and copy_batch() will write MI_NOPs to the extra
@@ -1283,31 +1454,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
        do {
                u32 length;
 
-               if (*cmd == MI_BATCH_BUFFER_END) {
-                       if (needs_clflush_after) {
-                               void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
-                               drm_clflush_virt_range(ptr,
-                                                      (void *)(cmd + 1) - ptr);
-                       }
+               if (*cmd == MI_BATCH_BUFFER_END)
                        break;
-               }
 
                desc = find_cmd(engine, *cmd, desc, &default_desc);
                if (!desc) {
                        DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
                                         *cmd);
                        ret = -EINVAL;
-                       break;
-               }
-
-               /*
-                * If the batch buffer contains a chained batch, return an
-                * error that tells the caller to abort and dispatch the
-                * workload as a non-secure batch.
-                */
-               if (desc->cmd.value == MI_BATCH_BUFFER_START) {
-                       ret = -EACCES;
-                       break;
+                       goto err;
                }
 
                if (desc->flags & CMD_DESC_FIXED)
@@ -1321,22 +1476,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
                                         length,
                                         batch_end - cmd);
                        ret = -EINVAL;
-                       break;
+                       goto err;
                }
 
-               if (!check_cmd(engine, desc, cmd, length, is_master)) {
+               if (!check_cmd(engine, desc, cmd, length)) {
                        ret = -EACCES;
+                       goto err;
+               }
+
+               if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+                       ret = check_bbstart(ctx, cmd, offset, length,
+                                           batch_len, batch_start,
+                                           shadow_batch_start);
+
+                       if (ret)
+                               goto err;
                        break;
                }
 
+               if (ctx->jump_whitelist_cmds > offset)
+                       set_bit(offset, ctx->jump_whitelist);
+
                cmd += length;
+               offset += length;
                if  (cmd >= batch_end) {
                        DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
                        ret = -EINVAL;
-                       break;
+                       goto err;
                }
        } while (1);
 
+       if (needs_clflush_after) {
+               void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
+
+               drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
+       }
+
+err:
        i915_gem_object_unpin_map(shadow_batch_obj);
        return ret;
 }
@@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
 
        /* If the command parser is not enabled, report 0 - unsupported */
        for_each_uabi_engine(engine, dev_priv) {
-               if (intel_engine_needs_cmd_parser(engine)) {
+               if (intel_engine_using_cmd_parser(engine)) {
                        active = true;
                        break;
                }
@@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
         *    the parser enabled.
         * 9. Don't whitelist or handle oacontrol specially, as ownership
         *    for oacontrol state is moving to i915-perf.
+        * 10. Support for Gen9 BCS Parsing
         */
-       return 9;
+       return 10;
 }
index bb6f86c..3d717e2 100644 (file)
@@ -364,9 +364,6 @@ static int i915_driver_modeset_probe(struct drm_device *dev)
        if (ret)
                goto cleanup_vga_client;
 
-       /* must happen before intel_power_domains_init_hw() on VLV/CHV */
-       intel_update_rawclk(dev_priv);
-
        intel_power_domains_init_hw(dev_priv, false);
 
        intel_csr_ucode_init(dev_priv);
@@ -1850,6 +1847,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 
        i915_gem_suspend_late(dev_priv);
 
+       i915_rc6_ctx_wa_suspend(dev_priv);
+
        intel_uncore_suspend(&dev_priv->uncore);
 
        intel_power_domains_suspend(dev_priv,
@@ -2053,6 +2052,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
 
        intel_power_domains_resume(dev_priv);
 
+       i915_rc6_ctx_wa_resume(dev_priv);
+
        intel_gt_sanitize(&dev_priv->gt, true);
 
        enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
index 953e1d1..89b6112 100644 (file)
@@ -593,6 +593,8 @@ struct intel_rps {
 
 struct intel_rc6 {
        bool enabled;
+       bool ctx_corrupted;
+       intel_wakeref_t ctx_corrupted_wakeref;
        u64 prev_hw_residency[4];
        u64 cur_residency[4];
 };
@@ -2075,9 +2077,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define VEBOX_MASK(dev_priv) \
        ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS)
 
+/*
+ * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
+ * All later gens can run the final buffer from the ppgtt
+ */
+#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7)
+
 #define HAS_LLC(dev_priv)      (INTEL_INFO(dev_priv)->has_llc)
 #define HAS_SNOOP(dev_priv)    (INTEL_INFO(dev_priv)->has_snoop)
 #define HAS_EDRAM(dev_priv)    ((dev_priv)->edram_size_mb)
+#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
 #define HAS_WT(dev_priv)       ((IS_HASWELL(dev_priv) || \
                                 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
 
@@ -2110,10 +2119,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 /* Early gen2 have a totally busted CS tlb and require pinned batches. */
 #define HAS_BROKEN_CS_TLB(dev_priv)    (IS_I830(dev_priv) || IS_I845G(dev_priv))
 
+#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv)  \
+       (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9))
+
 /* WaRsDisableCoarsePowerGating:skl,cnl */
 #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
-       (IS_CANNONLAKE(dev_priv) || \
-        IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
+       (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9))
 
 #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4)
 #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \
@@ -2284,6 +2295,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
                           unsigned long flags);
 #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
 
+struct i915_vma * __must_check
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+                   struct i915_address_space *vm,
+                   const struct i915_ggtt_view *view,
+                   u64 size,
+                   u64 alignment,
+                   u64 flags);
+
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
 static inline int __must_check
@@ -2393,12 +2412,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
 void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
 void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+int intel_engine_cmd_parser(struct i915_gem_context *cxt,
+                           struct intel_engine_cs *engine,
                            struct drm_i915_gem_object *batch_obj,
-                           struct drm_i915_gem_object *shadow_batch_obj,
+                           u64 user_batch_start,
                            u32 batch_start_offset,
                            u32 batch_len,
-                           bool is_master);
+                           struct drm_i915_gem_object *shadow_batch_obj,
+                           u64 shadow_batch_start);
 
 /* intel_device_info.c */
 static inline struct intel_device_info *
index d0f94f2..98305d9 100644 (file)
@@ -964,6 +964,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 {
        struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
        struct i915_address_space *vm = &dev_priv->ggtt.vm;
+
+       return i915_gem_object_pin(obj, vm, view, size, alignment,
+                                  flags | PIN_GLOBAL);
+}
+
+struct i915_vma *
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+                   struct i915_address_space *vm,
+                   const struct i915_ggtt_view *view,
+                   u64 size,
+                   u64 alignment,
+                   u64 flags)
+{
+       struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
        struct i915_vma *vma;
        int ret;
 
@@ -1038,7 +1052,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
                        return ERR_PTR(ret);
        }
 
-       ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+       ret = i915_vma_pin(vma, size, alignment, flags);
        if (ret)
                return ERR_PTR(ret);
 
index 5d91013..9f1517a 100644 (file)
@@ -62,7 +62,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
                value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES);
                break;
        case I915_PARAM_HAS_SECURE_BATCHES:
-               value = capable(CAP_SYS_ADMIN);
+               value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN);
                break;
        case I915_PARAM_CMD_PARSER_VERSION:
                value = i915_cmd_parser_get_version(i915);
index 2abd199..f8ee9ab 100644 (file)
@@ -471,6 +471,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   ECOCHK_PPGTT_WT_HSW          (0x2 << 3)
 #define   ECOCHK_PPGTT_WB_HSW          (0x3 << 3)
 
+#define GEN8_RC6_CTX_INFO              _MMIO(0x8504)
+
 #define GAC_ECO_BITS                   _MMIO(0x14090)
 #define   ECOBITS_SNB_BIT              (1 << 13)
 #define   ECOBITS_PPGTT_CACHE64B       (3 << 8)
@@ -555,6 +557,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
  */
 #define BCS_SWCTRL _MMIO(0x22200)
 
+/* There are 16 GPR registers */
+#define BCS_GPR(n)     _MMIO(0x22600 + (n) * 8)
+#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4)
+
 #define GPGPU_THREADS_DISPATCHED        _MMIO(0x2290)
 #define GPGPU_THREADS_DISPATCHED_UDW   _MMIO(0x2290 + 4)
 #define HS_INVOCATION_COUNT             _MMIO(0x2300)
@@ -7211,6 +7217,10 @@ enum {
 #define TGL_DMC_DEBUG_DC5_COUNT        _MMIO(0x101084)
 #define TGL_DMC_DEBUG_DC6_COUNT        _MMIO(0x101088)
 
+/* Display Internal Timeout Register */
+#define RM_TIMEOUT             _MMIO(0x42060)
+#define  MMIO_TIMEOUT_US(us)   ((us) << 0)
+
 /* interrupts */
 #define DE_MASTER_IRQ_CONTROL   (1 << 31)
 #define DE_SPRITEB_FLIP_DONE    (1 << 29)
index 75ee027..2efe1d1 100644 (file)
@@ -126,6 +126,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
         */
        I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
                   PWM1_GATING_DIS | PWM2_GATING_DIS);
+
+       /*
+        * Lower the display internal timeout.
+        * This is needed to avoid any hard hangs when DSI port PLL
+        * is off and a MMIO access is attempted by any privilege
+        * application, using batch buffers or any other means.
+        */
+       I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
 }
 
 static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -8544,6 +8552,100 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
        dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
 }
 
+static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv)
+{
+       return !I915_READ(GEN8_RC6_CTX_INFO);
+}
+
+static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915)
+{
+       if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+               return;
+
+       if (i915_rc6_ctx_corrupted(i915)) {
+               DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
+               i915->gt_pm.rc6.ctx_corrupted = true;
+               i915->gt_pm.rc6.ctx_corrupted_wakeref =
+                       intel_runtime_pm_get(&i915->runtime_pm);
+       }
+}
+
+static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915)
+{
+       if (i915->gt_pm.rc6.ctx_corrupted) {
+               intel_runtime_pm_put(&i915->runtime_pm,
+                                    i915->gt_pm.rc6.ctx_corrupted_wakeref);
+               i915->gt_pm.rc6.ctx_corrupted = false;
+       }
+}
+
+/**
+ * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA
+ * @i915: i915 device
+ *
+ * Perform any steps needed to clean up the RC6 CTX WA before system suspend.
+ */
+void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915)
+{
+       if (i915->gt_pm.rc6.ctx_corrupted)
+               intel_runtime_pm_put(&i915->runtime_pm,
+                                    i915->gt_pm.rc6.ctx_corrupted_wakeref);
+}
+
+/**
+ * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
+ * @i915: i915 device
+ *
+ * Perform any steps needed to re-init the RC6 CTX WA after system resume.
+ */
+void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915)
+{
+       if (!i915->gt_pm.rc6.ctx_corrupted)
+               return;
+
+       if (i915_rc6_ctx_corrupted(i915)) {
+               i915->gt_pm.rc6.ctx_corrupted_wakeref =
+                       intel_runtime_pm_get(&i915->runtime_pm);
+               return;
+       }
+
+       DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
+       i915->gt_pm.rc6.ctx_corrupted = false;
+}
+
+static void intel_disable_rc6(struct drm_i915_private *dev_priv);
+
+/**
+ * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption
+ * @i915: i915 device
+ *
+ * Check if an RC6 CTX corruption has happened since the last check and if so
+ * disable RC6 and runtime power management.
+ *
+ * Return false if no context corruption has happened since the last call of
+ * this function, true otherwise.
+*/
+bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915)
+{
+       if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+               return false;
+
+       if (i915->gt_pm.rc6.ctx_corrupted)
+               return false;
+
+       if (!i915_rc6_ctx_corrupted(i915))
+               return false;
+
+       DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
+
+       intel_disable_rc6(i915);
+       i915->gt_pm.rc6.ctx_corrupted = true;
+       i915->gt_pm.rc6.ctx_corrupted_wakeref =
+               intel_runtime_pm_get_noresume(&i915->runtime_pm);
+
+       return true;
+}
+
 void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 {
        struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -8557,6 +8659,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
                pm_runtime_get(&dev_priv->drm.pdev->dev);
        }
 
+       i915_rc6_ctx_wa_init(dev_priv);
+
        /* Initialize RPS limits (for userspace) */
        if (IS_CHERRYVIEW(dev_priv))
                cherryview_init_gt_powersave(dev_priv);
@@ -8595,6 +8699,8 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
        if (IS_VALLEYVIEW(dev_priv))
                valleyview_cleanup_gt_powersave(dev_priv);
 
+       i915_rc6_ctx_wa_cleanup(dev_priv);
+
        if (!HAS_RC6(dev_priv))
                pm_runtime_put(&dev_priv->drm.pdev->dev);
 }
@@ -8623,7 +8729,7 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
        i915->gt_pm.llc_pstate.enabled = false;
 }
 
-static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+static void __intel_disable_rc6(struct drm_i915_private *dev_priv)
 {
        lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
 
@@ -8642,6 +8748,15 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv)
        dev_priv->gt_pm.rc6.enabled = false;
 }
 
+static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+{
+       struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+       mutex_lock(&rps->lock);
+       __intel_disable_rc6(dev_priv);
+       mutex_unlock(&rps->lock);
+}
+
 static void intel_disable_rps(struct drm_i915_private *dev_priv)
 {
        lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
@@ -8667,7 +8782,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
 {
        mutex_lock(&dev_priv->gt_pm.rps.lock);
 
-       intel_disable_rc6(dev_priv);
+       __intel_disable_rc6(dev_priv);
        intel_disable_rps(dev_priv);
        if (HAS_LLC(dev_priv))
                intel_disable_llc_pstate(dev_priv);
@@ -8694,6 +8809,9 @@ static void intel_enable_rc6(struct drm_i915_private *dev_priv)
        if (dev_priv->gt_pm.rc6.enabled)
                return;
 
+       if (dev_priv->gt_pm.rc6.ctx_corrupted)
+               return;
+
        if (IS_CHERRYVIEW(dev_priv))
                cherryview_enable_rc6(dev_priv);
        else if (IS_VALLEYVIEW(dev_priv))
index e3573e1..0f7390c 100644 (file)
@@ -36,6 +36,9 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
+bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915);
+void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915);
+void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915);
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct i915_request *rq);
index 04c721d..b89439e 100644 (file)
@@ -488,7 +488,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
 
        WARN_ON(!tcon->quirks->has_channel_0);
 
-       tcon->dclk_min_div = 6;
+       tcon->dclk_min_div = 1;
        tcon->dclk_max_div = 127;
        sun4i_tcon0_mode_set_common(tcon, mode);
 
index fa9d34a..f72803a 100644 (file)
@@ -626,6 +626,9 @@ static void intel_th_gth_switch(struct intel_th_device *thdev,
        if (!count)
                dev_dbg(&thdev->dev, "timeout waiting for CTS Trigger\n");
 
+       /* De-assert the trigger */
+       iowrite32(0, gth->base + REG_CTS_CTL);
+
        intel_th_gth_stop(gth, output, false);
        intel_th_gth_start(gth, output);
 }
index fc9f15f..6d240df 100644 (file)
@@ -164,7 +164,7 @@ struct msc {
 };
 
 static LIST_HEAD(msu_buffer_list);
-static struct mutex msu_buffer_mutex;
+static DEFINE_MUTEX(msu_buffer_mutex);
 
 /**
  * struct msu_buffer_entry - internal MSU buffer bookkeeping
@@ -327,7 +327,7 @@ static size_t msc_win_total_sz(struct msc_window *win)
                struct msc_block_desc *bdesc = sg_virt(sg);
 
                if (msc_block_wrapped(bdesc))
-                       return win->nr_blocks << PAGE_SHIFT;
+                       return (size_t)win->nr_blocks << PAGE_SHIFT;
 
                size += msc_total_sz(bdesc);
                if (msc_block_last_written(bdesc))
@@ -1848,9 +1848,14 @@ mode_store(struct device *dev, struct device_attribute *attr, const char *buf,
                len = cp - buf;
 
        mode = kstrndup(buf, len, GFP_KERNEL);
+       if (!mode)
+               return -ENOMEM;
+
        i = match_string(msc_mode, ARRAY_SIZE(msc_mode), mode);
-       if (i >= 0)
+       if (i >= 0) {
+               kfree(mode);
                goto found;
+       }
 
        /* Buffer sinks only work with a usable IRQ */
        if (!msc->do_irq) {
index 91dfeba..03ca5b1 100644 (file)
@@ -199,6 +199,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x02a6),
                .driver_data = (kernel_ulong_t)&intel_th_2x,
        },
+       {
+               /* Comet Lake PCH */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x06a6),
+               .driver_data = (kernel_ulong_t)&intel_th_2x,
+       },
        {
                /* Ice Lake NNPI */
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5),
@@ -209,6 +214,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6),
                .driver_data = (kernel_ulong_t)&intel_th_2x,
        },
+       {
+               /* Jasper Lake PCH */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6),
+               .driver_data = (kernel_ulong_t)&intel_th_2x,
+       },
        { 0 },
 };
 
index 663f8a5..73aee59 100644 (file)
@@ -1399,7 +1399,7 @@ static int stm32_adc_dma_start(struct iio_dev *indio_dev)
        cookie = dmaengine_submit(desc);
        ret = dma_submit_error(cookie);
        if (ret) {
-               dmaengine_terminate_all(adc->dma_chan);
+               dmaengine_terminate_sync(adc->dma_chan);
                return ret;
        }
 
@@ -1477,7 +1477,7 @@ static void __stm32_adc_buffer_predisable(struct iio_dev *indio_dev)
                stm32_adc_conv_irq_disable(adc);
 
        if (adc->dma_chan)
-               dmaengine_terminate_all(adc->dma_chan);
+               dmaengine_terminate_sync(adc->dma_chan);
 
        if (stm32_adc_set_trig(indio_dev, NULL))
                dev_err(&indio_dev->dev, "Can't clear trigger\n");
index b99d738..8743b2f 100644 (file)
@@ -317,8 +317,11 @@ static int adis16480_set_freq(struct iio_dev *indio_dev, int val, int val2)
        struct adis16480 *st = iio_priv(indio_dev);
        unsigned int t, reg;
 
+       if (val < 0 || val2 < 0)
+               return -EINVAL;
+
        t =  val * 1000 + val2 / 1000;
-       if (t <= 0)
+       if (t == 0)
                return -EINVAL;
 
        /*
index b17f060..868281b 100644 (file)
@@ -114,54 +114,63 @@ static const struct inv_mpu6050_hw hw_info[] = {
                .name = "MPU6050",
                .reg = &reg_set_6050,
                .config = &chip_config_6050,
+               .fifo_size = 1024,
        },
        {
                .whoami = INV_MPU6500_WHOAMI_VALUE,
                .name = "MPU6500",
                .reg = &reg_set_6500,
                .config = &chip_config_6050,
+               .fifo_size = 512,
        },
        {
                .whoami = INV_MPU6515_WHOAMI_VALUE,
                .name = "MPU6515",
                .reg = &reg_set_6500,
                .config = &chip_config_6050,
+               .fifo_size = 512,
        },
        {
                .whoami = INV_MPU6000_WHOAMI_VALUE,
                .name = "MPU6000",
                .reg = &reg_set_6050,
                .config = &chip_config_6050,
+               .fifo_size = 1024,
        },
        {
                .whoami = INV_MPU9150_WHOAMI_VALUE,
                .name = "MPU9150",
                .reg = &reg_set_6050,
                .config = &chip_config_6050,
+               .fifo_size = 1024,
        },
        {
                .whoami = INV_MPU9250_WHOAMI_VALUE,
                .name = "MPU9250",
                .reg = &reg_set_6500,
                .config = &chip_config_6050,
+               .fifo_size = 512,
        },
        {
                .whoami = INV_MPU9255_WHOAMI_VALUE,
                .name = "MPU9255",
                .reg = &reg_set_6500,
                .config = &chip_config_6050,
+               .fifo_size = 512,
        },
        {
                .whoami = INV_ICM20608_WHOAMI_VALUE,
                .name = "ICM20608",
                .reg = &reg_set_6500,
                .config = &chip_config_6050,
+               .fifo_size = 512,
        },
        {
                .whoami = INV_ICM20602_WHOAMI_VALUE,
                .name = "ICM20602",
                .reg = &reg_set_icm20602,
                .config = &chip_config_6050,
+               .fifo_size = 1008,
        },
 };
 
index db1c690..5123567 100644 (file)
@@ -100,12 +100,14 @@ struct inv_mpu6050_chip_config {
  *  @name:      name of the chip.
  *  @reg:   register map of the chip.
  *  @config:    configuration of the chip.
+ *  @fifo_size:        size of the FIFO in bytes.
  */
 struct inv_mpu6050_hw {
        u8 whoami;
        u8 *name;
        const struct inv_mpu6050_reg_map *reg;
        const struct inv_mpu6050_chip_config *config;
+       size_t fifo_size;
 };
 
 /*
index 5f9a5de..72d8c57 100644 (file)
@@ -180,9 +180,6 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p)
                        "failed to ack interrupt\n");
                goto flush_fifo;
        }
-       /* handle fifo overflow by reseting fifo */
-       if (int_status & INV_MPU6050_BIT_FIFO_OVERFLOW_INT)
-               goto flush_fifo;
        if (!(int_status & INV_MPU6050_BIT_RAW_DATA_RDY_INT)) {
                dev_warn(regmap_get_device(st->map),
                        "spurious interrupt with status 0x%x\n", int_status);
@@ -211,6 +208,18 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p)
        if (result)
                goto end_session;
        fifo_count = get_unaligned_be16(&data[0]);
+
+       /*
+        * Handle fifo overflow by resetting fifo.
+        * Reset if there is only 3 data set free remaining to mitigate
+        * possible delay between reading fifo count and fifo data.
+        */
+       nb = 3 * bytes_per_datum;
+       if (fifo_count >= st->hw->fifo_size - nb) {
+               dev_warn(regmap_get_device(st->map), "fifo overflow reset\n");
+               goto flush_fifo;
+       }
+
        /* compute and process all complete datum */
        nb = fifo_count / bytes_per_datum;
        inv_mpu6050_update_period(st, pf->timestamp, nb);
index 8b50d56..01eb8cc 100644 (file)
@@ -110,7 +110,7 @@ static int srf04_read(struct srf04_data *data)
        udelay(data->cfg->trigger_pulse_us);
        gpiod_set_value(data->gpiod_trig, 0);
 
-       /* it cannot take more than 20 ms */
+       /* it should not take more than 20 ms until echo is rising */
        ret = wait_for_completion_killable_timeout(&data->rising, HZ/50);
        if (ret < 0) {
                mutex_unlock(&data->lock);
@@ -120,7 +120,8 @@ static int srf04_read(struct srf04_data *data)
                return -ETIMEDOUT;
        }
 
-       ret = wait_for_completion_killable_timeout(&data->falling, HZ/50);
+       /* it cannot take more than 50 ms until echo is falling */
+       ret = wait_for_completion_killable_timeout(&data->falling, HZ/20);
        if (ret < 0) {
                mutex_unlock(&data->lock);
                return ret;
@@ -135,19 +136,19 @@ static int srf04_read(struct srf04_data *data)
 
        dt_ns = ktime_to_ns(ktime_dt);
        /*
-        * measuring more than 3 meters is beyond the capabilities of
-        * the sensor
+        * measuring more than 6,45 meters is beyond the capabilities of
+        * the supported sensors
         * ==> filter out invalid results for not measuring echos of
         *     another us sensor
         *
         * formula:
-        *         distance       3 m
-        * time = ---------- = --------- = 9404389 ns
-        *          speed       319 m/s
+        *         distance     6,45 * 2 m
+        * time = ---------- = ------------ = 40438871 ns
+        *          speed         319 m/s
         *
         * using a minimum speed at -20 Â°C of 319 m/s
         */
-       if (dt_ns > 9404389)
+       if (dt_ns > 40438871)
                return -EIO;
 
        time_ns = dt_ns;
@@ -159,20 +160,20 @@ static int srf04_read(struct srf04_data *data)
         *   with Temp in Â°C
         *   and speed in m/s
         *
-        * use 343 m/s as ultrasonic speed at 20 Â°C here in absence of the
+        * use 343,5 m/s as ultrasonic speed at 20 Â°C here in absence of the
         * temperature
         *
         * therefore:
-        *             time     343
-        * distance = ------ * -----
-        *             10^6       2
+        *             time     343,5     time * 106
+        * distance = ------ * ------- = ------------
+        *             10^6         2         617176
         *   with time in ns
         *   and distance in mm (one way)
         *
-        * because we limit to 3 meters the multiplication with 343 just
+        * because we limit to 6,45 meters the multiplication with 106 just
         * fits into 32 bit
         */
-       distance_mm = time_ns * 343 / 2000000;
+       distance_mm = time_ns * 106 / 617176;
 
        return distance_mm;
 }
index 71cb952..26b792b 100644 (file)
@@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void)
                goto bail_dev;
        }
 
-       hfi1_compute_tid_rdma_flow_wt();
        /*
         * These must be called before the driver is registered with
         * the PCI subsystem.
index 61aa550..61362bd 100644 (file)
@@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
        /*
         * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
         */
-       if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+       if (parent &&
+           (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
+            dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
                dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
                dd->link_gen3_capable = 0;
        }
index 513a8aa..1a3c647 100644 (file)
@@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
                if (qp->s_flags & RVT_S_WAIT_RNR)
                        goto bail_stop;
                rdi = ib_to_rvt(qp->ibqp.device);
-               if (qp->s_rnr_retry == 0 &&
-                   !((rdi->post_parms[wqe->wr.opcode].flags &
-                     RVT_OPERATION_IGN_RNR_CNT) &&
-                     qp->s_rnr_retry_cnt == 0)) {
-                       status = IB_WC_RNR_RETRY_EXC_ERR;
-                       goto class_b;
+               if (!(rdi->post_parms[wqe->wr.opcode].flags &
+                      RVT_OPERATION_IGN_RNR_CNT)) {
+                       if (qp->s_rnr_retry == 0) {
+                               status = IB_WC_RNR_RETRY_EXC_ERR;
+                               goto class_b;
+                       }
+                       if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
+                               qp->s_rnr_retry--;
                }
-               if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
-                       qp->s_rnr_retry--;
 
                /*
                 * The last valid PSN is the previous PSN. For TID RDMA WRITE
index f21fca3..e53f542 100644 (file)
@@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
  * C - Capcode
  */
 
-static u32 tid_rdma_flow_wt;
-
 static void tid_rdma_trigger_resume(struct work_struct *work);
 static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
 static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
@@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
                                   struct tid_rdma_flow *flow,
                                   bool fecn);
 
+static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
+{
+       if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
+               priv->r_tid_ack = priv->r_tid_tail;
+}
+
+static void tid_rdma_schedule_ack(struct rvt_qp *qp)
+{
+       struct hfi1_qp_priv *priv = qp->priv;
+
+       priv->s_flags |= RVT_S_ACK_PENDING;
+       hfi1_schedule_tid_send(qp);
+}
+
+static void tid_rdma_trigger_ack(struct rvt_qp *qp)
+{
+       validate_r_tid_ack(qp->priv);
+       tid_rdma_schedule_ack(qp);
+}
+
 static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
 {
        return
@@ -3005,10 +3023,7 @@ nak_psn:
                qpriv->s_nak_state = IB_NAK_PSN_ERROR;
                /* We are NAK'ing the next expected PSN */
                qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
-               qpriv->s_flags |= RVT_S_ACK_PENDING;
-               if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
-                       qpriv->r_tid_ack = qpriv->r_tid_tail;
-               hfi1_schedule_tid_send(qp);
+               tid_rdma_trigger_ack(qp);
        }
        goto unlock;
 }
@@ -3371,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
        return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
 }
 
-void hfi1_compute_tid_rdma_flow_wt(void)
+static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
 {
        /*
         * Heuristic for computing the RNR timeout when waiting on the flow
         * queue. Rather than a computationaly expensive exact estimate of when
         * a flow will be available, we assume that if a QP is at position N in
         * the flow queue it has to wait approximately (N + 1) * (number of
-        * segments between two sync points), assuming PMTU of 4K. The rationale
-        * for this is that flows are released and recycled at each sync point.
+        * segments between two sync points). The rationale for this is that
+        * flows are released and recycled at each sync point.
         */
-       tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
-               TID_RDMA_MAX_SEGMENT_SIZE;
+       return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
 }
 
 static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
@@ -3505,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
                if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
                        ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
                        if (ret) {
-                               to_seg = tid_rdma_flow_wt *
+                               to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
                                        position_in_queue(qpriv,
                                                          &rcd->flow_queue);
                                break;
@@ -3526,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
                /*
                 * If overtaking req->acked_tail, send an RNR NAK. Because the
                 * QP is not queued in this case, and the issue can only be
-                * caused due a delay in scheduling the second leg which we
+                * caused by a delay in scheduling the second leg which we
                 * cannot estimate, we use a rather arbitrary RNR timeout of
                 * (MAX_FLOWS / 2) segments
                 */
@@ -3534,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
                                MAX_FLOWS)) {
                        ret = -EAGAIN;
                        to_seg = MAX_FLOWS >> 1;
-                       qpriv->s_flags |= RVT_S_ACK_PENDING;
-                       hfi1_schedule_tid_send(qp);
+                       tid_rdma_trigger_ack(qp);
                        break;
                }
 
@@ -4335,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
        trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
                                          req);
        trace_hfi1_tid_write_rsp_rcv_data(qp);
-       if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
-               priv->r_tid_ack = priv->r_tid_tail;
+       validate_r_tid_ack(priv);
 
        if (opcode == TID_OP(WRITE_DATA_LAST)) {
                release_rdma_sge_mr(e);
@@ -4375,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
        }
 
 done:
-       priv->s_flags |= RVT_S_ACK_PENDING;
-       hfi1_schedule_tid_send(qp);
+       tid_rdma_schedule_ack(qp);
 exit:
        priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
        if (fecn)
@@ -4388,10 +4399,7 @@ send_nak:
        if (!priv->s_nak_state) {
                priv->s_nak_state = IB_NAK_PSN_ERROR;
                priv->s_nak_psn = flow->flow_state.r_next_psn;
-               priv->s_flags |= RVT_S_ACK_PENDING;
-               if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
-                       priv->r_tid_ack = priv->r_tid_tail;
-               hfi1_schedule_tid_send(qp);
+               tid_rdma_trigger_ack(qp);
        }
        goto done;
 }
@@ -4939,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
        qpriv->resync = true;
        /* RESYNC request always gets a TID RDMA ACK. */
        qpriv->s_nak_state = 0;
-       qpriv->s_flags |= RVT_S_ACK_PENDING;
-       hfi1_schedule_tid_send(qp);
+       tid_rdma_trigger_ack(qp);
 bail:
        if (fecn)
                qp->s_flags |= RVT_S_ECN;
index 1c53618..6e82df2 100644 (file)
@@ -17,6 +17,7 @@
 #define TID_RDMA_MIN_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
 #define TID_RDMA_MAX_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
 #define TID_RDMA_MAX_PAGES              (BIT(18) >> PAGE_SHIFT)
+#define TID_RDMA_SEGMENT_SHIFT         18
 
 /*
  * Bit definitions for priv->s_flags.
@@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
                                  struct ib_other_headers *ohdr,
                                  u32 *bth1, u32 *bth2, u32 *len);
 
-void hfi1_compute_tid_rdma_flow_wt(void);
-
 void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
 
 u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
index 8678327..3bb8f78 100644 (file)
@@ -59,7 +59,7 @@ enum {
 
 #define HNS_ROCE_HEM_CHUNK_LEN \
         ((256 - sizeof(struct list_head) - 2 * sizeof(int)) /   \
-        (sizeof(struct scatterlist)))
+        (sizeof(struct scatterlist) + sizeof(void *)))
 
 #define check_whether_bt_num_3(type, hop_num) \
        (type < HEM_TYPE_MTT && hop_num == 2)
index 9591457..43ea2c1 100644 (file)
@@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
        srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
        srq->max_gs = srq_init_attr->attr.max_sge;
 
-       srq_desc_size = max(16, 16 * srq->max_gs);
+       srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
 
        srq->wqe_shift = ilog2(srq_desc_size);
 
index 1cb40c7..8229a90 100644 (file)
@@ -489,6 +489,15 @@ static void ml_ff_destroy(struct ff_device *ff)
 {
        struct ml_device *ml = ff->private;
 
+       /*
+        * Even though we stop all playing effects when tearing down
+        * an input device (via input_device_flush() that calls into
+        * input_ff_flush() that stops and erases all effects), we
+        * do not actually stop the timer, and therefore we should
+        * do it here.
+        */
+       del_timer_sync(&ml->timer);
+
        kfree(ml->private);
 }
 
index 56fae34..704558d 100644 (file)
@@ -177,6 +177,7 @@ static const char * const smbus_pnp_ids[] = {
        "LEN0096", /* X280 */
        "LEN0097", /* X280 -> ALPS trackpoint */
        "LEN009b", /* T580 */
+       "LEN0402", /* X1 Extreme 2nd Generation */
        "LEN200f", /* T450s */
        "LEN2054", /* E480 */
        "LEN2055", /* E580 */
index f28a715..bbf9ae9 100644 (file)
@@ -510,7 +510,6 @@ struct f11_data {
        struct rmi_2d_sensor_platform_data sensor_pdata;
        unsigned long *abs_mask;
        unsigned long *rel_mask;
-       unsigned long *result_bits;
 };
 
 enum f11_finger_state {
@@ -1057,7 +1056,7 @@ static int rmi_f11_initialize(struct rmi_function *fn)
        /*
        ** init instance data, fill in values and create any sysfs files
        */
-       f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 3,
+       f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 2,
                        GFP_KERNEL);
        if (!f11)
                return -ENOMEM;
@@ -1076,8 +1075,6 @@ static int rmi_f11_initialize(struct rmi_function *fn)
                        + sizeof(struct f11_data));
        f11->rel_mask = (unsigned long *)((char *)f11
                        + sizeof(struct f11_data) + mask_size);
-       f11->result_bits = (unsigned long *)((char *)f11
-                       + sizeof(struct f11_data) + mask_size * 2);
 
        set_bit(fn->irq_pos, f11->abs_mask);
        set_bit(fn->irq_pos + 1, f11->rel_mask);
@@ -1284,8 +1281,8 @@ static irqreturn_t rmi_f11_attention(int irq, void *ctx)
                        valid_bytes = f11->sensor.attn_size;
                memcpy(f11->sensor.data_pkt, drvdata->attn_data.data,
                        valid_bytes);
-               drvdata->attn_data.data += f11->sensor.attn_size;
-               drvdata->attn_data.size -= f11->sensor.attn_size;
+               drvdata->attn_data.data += valid_bytes;
+               drvdata->attn_data.size -= valid_bytes;
        } else {
                error = rmi_read_block(rmi_dev,
                                data_base_addr, f11->sensor.data_pkt,
index d20a5d6..7e97944 100644 (file)
@@ -55,6 +55,9 @@ struct f12_data {
 
        const struct rmi_register_desc_item *data15;
        u16 data15_offset;
+
+       unsigned long *abs_mask;
+       unsigned long *rel_mask;
 };
 
 static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
@@ -209,8 +212,8 @@ static irqreturn_t rmi_f12_attention(int irq, void *ctx)
                        valid_bytes = sensor->attn_size;
                memcpy(sensor->data_pkt, drvdata->attn_data.data,
                        valid_bytes);
-               drvdata->attn_data.data += sensor->attn_size;
-               drvdata->attn_data.size -= sensor->attn_size;
+               drvdata->attn_data.data += valid_bytes;
+               drvdata->attn_data.size -= valid_bytes;
        } else {
                retval = rmi_read_block(rmi_dev, f12->data_addr,
                                        sensor->data_pkt, sensor->pkt_size);
@@ -291,9 +294,18 @@ static int rmi_f12_write_control_regs(struct rmi_function *fn)
 static int rmi_f12_config(struct rmi_function *fn)
 {
        struct rmi_driver *drv = fn->rmi_dev->driver;
+       struct f12_data *f12 = dev_get_drvdata(&fn->dev);
+       struct rmi_2d_sensor *sensor;
        int ret;
 
-       drv->set_irq_bits(fn->rmi_dev, fn->irq_mask);
+       sensor = &f12->sensor;
+
+       if (!sensor->report_abs)
+               drv->clear_irq_bits(fn->rmi_dev, f12->abs_mask);
+       else
+               drv->set_irq_bits(fn->rmi_dev, f12->abs_mask);
+
+       drv->clear_irq_bits(fn->rmi_dev, f12->rel_mask);
 
        ret = rmi_f12_write_control_regs(fn);
        if (ret)
@@ -315,9 +327,12 @@ static int rmi_f12_probe(struct rmi_function *fn)
        struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
        struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
        u16 data_offset = 0;
+       int mask_size;
 
        rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s\n", __func__);
 
+       mask_size = BITS_TO_LONGS(drvdata->irq_count) * sizeof(unsigned long);
+
        ret = rmi_read(fn->rmi_dev, query_addr, &buf);
        if (ret < 0) {
                dev_err(&fn->dev, "Failed to read general info register: %d\n",
@@ -332,10 +347,19 @@ static int rmi_f12_probe(struct rmi_function *fn)
                return -ENODEV;
        }
 
-       f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data), GFP_KERNEL);
+       f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data) + mask_size * 2,
+                       GFP_KERNEL);
        if (!f12)
                return -ENOMEM;
 
+       f12->abs_mask = (unsigned long *)((char *)f12
+                       + sizeof(struct f12_data));
+       f12->rel_mask = (unsigned long *)((char *)f12
+                       + sizeof(struct f12_data) + mask_size);
+
+       set_bit(fn->irq_pos, f12->abs_mask);
+       set_bit(fn->irq_pos + 1, f12->rel_mask);
+
        f12->has_dribble = !!(buf & BIT(3));
 
        if (fn->dev.of_node) {
index 710b025..897105b 100644 (file)
@@ -359,7 +359,7 @@ static const struct vb2_ops rmi_f54_queue_ops = {
 static const struct vb2_queue rmi_f54_queue = {
        .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
        .io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ,
-       .buf_struct_size = sizeof(struct vb2_buffer),
+       .buf_struct_size = sizeof(struct vb2_v4l2_buffer),
        .ops = &rmi_f54_queue_ops,
        .mem_ops = &vb2_vmalloc_memops,
        .timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC,
@@ -601,7 +601,7 @@ static int rmi_f54_config(struct rmi_function *fn)
 {
        struct rmi_driver *drv = fn->rmi_dev->driver;
 
-       drv->set_irq_bits(fn->rmi_dev, fn->irq_mask);
+       drv->clear_irq_bits(fn->rmi_dev, fn->irq_mask);
 
        return 0;
 }
@@ -730,6 +730,7 @@ static void rmi_f54_remove(struct rmi_function *fn)
 
        video_unregister_device(&f54->vdev);
        v4l2_device_unregister(&f54->v4l2);
+       destroy_workqueue(f54->workqueue);
 }
 
 struct rmi_function_handler rmi_f54_handler = {
index 4b22d49..6bcffc9 100644 (file)
@@ -1990,11 +1990,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd)
 
        /* get sysinfo */
        md->si = &cd->sysinfo;
-       if (!md->si) {
-               dev_err(dev, "%s: Fail get sysinfo pointer from core p=%p\n",
-                       __func__, md->si);
-               goto error_get_sysinfo;
-       }
 
        rc = cyttsp4_setup_input_device(cd);
        if (rc)
@@ -2004,8 +1999,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd)
 
 error_init_input:
        input_free_device(md->input);
-error_get_sysinfo:
-       input_set_drvdata(md->input, NULL);
 error_alloc_failed:
        dev_err(dev, "%s failed.\n", __func__);
        return rc;
index 7b97122..c498796 100644 (file)
@@ -405,8 +405,12 @@ void icc_set_tag(struct icc_path *path, u32 tag)
        if (!path)
                return;
 
+       mutex_lock(&icc_lock);
+
        for (i = 0; i < path->num_nodes; i++)
                path->reqs[i].tag = tag;
+
+       mutex_unlock(&icc_lock);
 }
 EXPORT_SYMBOL_GPL(icc_set_tag);
 
index 910081d..b4966d8 100644 (file)
@@ -433,7 +433,8 @@ static int qnoc_probe(struct platform_device *pdev)
        if (!qp)
                return -ENOMEM;
 
-       data = devm_kcalloc(dev, num_nodes, sizeof(*node), GFP_KERNEL);
+       data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes),
+                           GFP_KERNEL);
        if (!data)
                return -ENOMEM;
 
index 5795559..502a6c2 100644 (file)
@@ -790,7 +790,8 @@ static int qnoc_probe(struct platform_device *pdev)
        if (!qp)
                return -ENOMEM;
 
-       data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
+       data = devm_kzalloc(&pdev->dev, struct_size(data, nodes, num_nodes),
+                           GFP_KERNEL);
        if (!data)
                return -ENOMEM;
 
index e7d1920..0ae986c 100644 (file)
@@ -358,7 +358,7 @@ static int sdhci_at91_probe(struct platform_device *pdev)
        pm_runtime_use_autosuspend(&pdev->dev);
 
        /* HS200 is broken at this moment */
-       host->quirks2 = SDHCI_QUIRK2_BROKEN_HS200;
+       host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200;
 
        ret = sdhci_add_host(host);
        if (ret)
index bb60322..0a9f42e 100644 (file)
@@ -617,6 +617,7 @@ err_free_chan:
        sl->tty = NULL;
        tty->disc_data = NULL;
        clear_bit(SLF_INUSE, &sl->flags);
+       free_netdev(sl->dev);
 
 err_exit:
        rtnl_unlock();
index 073cbd0..d838c17 100644 (file)
@@ -273,6 +273,19 @@ static int mv88e6352_ptp_enable_extts(struct mv88e6xxx_chip *chip,
        int pin;
        int err;
 
+       /* Reject requests with unsupported flags */
+       if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+                               PTP_RISING_EDGE |
+                               PTP_FALLING_EDGE |
+                               PTP_STRICT_FLAGS))
+               return -EOPNOTSUPP;
+
+       /* Reject requests to enable time stamping on both edges. */
+       if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+           (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+           (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+               return -EOPNOTSUPP;
+
        pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index);
 
        if (pin < 0)
index 77f3511..ca3aa12 100644 (file)
@@ -6280,6 +6280,10 @@ static int tg3_ptp_enable(struct ptp_clock_info *ptp,
 
        switch (rq->type) {
        case PTP_CLK_REQ_PEROUT:
+               /* Reject requests with unsupported flags */
+               if (rq->perout.flags)
+                       return -EOPNOTSUPP;
+
                if (rq->perout.index != 0)
                        return -EINVAL;
 
index f1a0c4d..f37c9a0 100644 (file)
@@ -763,6 +763,7 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
 {
        struct net_device *dev;
        struct ep93xx_priv *ep;
+       struct resource *mem;
 
        dev = platform_get_drvdata(pdev);
        if (dev == NULL)
@@ -778,8 +779,8 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
                iounmap(ep->base_addr);
 
        if (ep->res != NULL) {
-               release_resource(ep->res);
-               kfree(ep->res);
+               mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+               release_mem_region(mem->start, resource_size(mem));
        }
 
        free_netdev(dev);
index e736ce2..a8f4c69 100644 (file)
@@ -2524,6 +2524,7 @@ static int gemini_ethernet_port_remove(struct platform_device *pdev)
        struct gemini_ethernet_port *port = platform_get_drvdata(pdev);
 
        gemini_port_remove(port);
+       free_netdev(port->netdev);
        return 0;
 }
 
index c26c0a7..11538e5 100644 (file)
@@ -2260,8 +2260,16 @@ err_set_cdan:
 err_service_reg:
        free_channel(priv, channel);
 err_alloc_ch:
-       if (err == -EPROBE_DEFER)
+       if (err == -EPROBE_DEFER) {
+               for (i = 0; i < priv->num_channels; i++) {
+                       channel = priv->channel[i];
+                       nctx = &channel->nctx;
+                       dpaa2_io_service_deregister(channel->dpio, nctx, dev);
+                       free_channel(priv, channel);
+               }
+               priv->num_channels = 0;
                return err;
+       }
 
        if (cpumask_empty(&priv->dpio_cpumask)) {
                dev_err(dev, "No cpu with an affine DPIO/DPCON\n");
index b104d3c..6e0212b 100644 (file)
@@ -70,11 +70,6 @@ static const struct hns3_stats hns3_rxq_stats[] = {
 #define HNS3_NIC_LB_TEST_TX_CNT_ERR    2
 #define HNS3_NIC_LB_TEST_RX_CNT_ERR    3
 
-struct hns3_link_mode_mapping {
-       u32 hns3_link_mode;
-       u32 ethtool_link_mode;
-};
-
 static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
 {
        struct hnae3_handle *h = hns3_get_handle(ndev);
index 49ad848..d6c3952 100644 (file)
@@ -124,7 +124,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
        if (ret)
                return ret;
 
-       for (i = 0; i < HNAE3_MAX_TC; i++) {
+       for (i = 0; i < hdev->tc_max; i++) {
                switch (ets->tc_tsa[i]) {
                case IEEE_8021QAZ_TSA_STRICT:
                        if (hdev->tm_info.tc_info[i].tc_sch_mode !=
@@ -318,6 +318,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
        struct net_device *netdev = h->kinfo.netdev;
        struct hclge_dev *hdev = vport->back;
        u8 i, j, pfc_map, *prio_tc;
+       int ret;
 
        if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
            hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
@@ -347,7 +348,21 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
 
        hclge_tm_pfc_info_update(hdev);
 
-       return hclge_pause_setup_hw(hdev, false);
+       ret = hclge_pause_setup_hw(hdev, false);
+       if (ret)
+               return ret;
+
+       ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+       if (ret)
+               return ret;
+
+       ret = hclge_buffer_alloc(hdev);
+       if (ret) {
+               hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+               return ret;
+       }
+
+       return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 }
 
 /* DCBX configuration */
index eb14c43..7c70386 100644 (file)
@@ -6366,11 +6366,23 @@ static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
 
        func_id = hclge_get_port_number(HOST_PORT, 0, vfid, 0);
        req = (struct hclge_mac_vlan_switch_cmd *)desc.data;
+
+       /* read current config parameter */
        hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_SWITCH_PARAM,
-                                  false);
+                                  true);
        req->roce_sel = HCLGE_MAC_VLAN_NIC_SEL;
        req->func_id = cpu_to_le32(func_id);
-       req->switch_param = switch_param;
+
+       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "read mac vlan switch parameter fail, ret = %d\n", ret);
+               return ret;
+       }
+
+       /* modify and write new config parameter */
+       hclge_cmd_reuse_desc(&desc, false);
+       req->switch_param = (req->switch_param & param_mask) | switch_param;
        req->param_mask = param_mask;
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
index fd3071f..c39e921 100644 (file)
@@ -521,6 +521,19 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
 
        switch (rq->type) {
        case PTP_CLK_REQ_EXTTS:
+               /* Reject requests with unsupported flags */
+               if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+                                       PTP_RISING_EDGE |
+                                       PTP_FALLING_EDGE |
+                                       PTP_STRICT_FLAGS))
+                       return -EOPNOTSUPP;
+
+               /* Reject requests failing to enable both edges. */
+               if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+                   (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+                   (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES)
+                       return -EOPNOTSUPP;
+
                if (on) {
                        pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS,
                                           rq->extts.index);
@@ -551,6 +564,10 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
                return 0;
 
        case PTP_CLK_REQ_PEROUT:
+               /* Reject requests with unsupported flags */
+               if (rq->perout.flags)
+                       return -EOPNOTSUPP;
+
                if (on) {
                        pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT,
                                           rq->perout.index);
index 096a04a..9343bf3 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 CGX driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 CGX driver
  *
  * Copyright (C) 2018 Marvell International Ltd.
  *
index fb3ba49..473d975 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 CGX driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 CGX driver
  *
  * Copyright (C) 2018 Marvell International Ltd.
  *
index baec832..784207b 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
  *
  * Copyright (C) 2018 Marvell International Ltd.
  *
index f143d7b..a589748 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
  *
  * Copyright (C) 2018 Marvell International Ltd.
  *
index 168fb4e..3803af9 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
  *
  * Copyright (C) 2018 Marvell International Ltd.
  *
index 832810a..aa2727e 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
  *
  * Copyright (C) 2018 Marvell International Ltd.
  *
index b252d86..51c206f 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
  *
  * Copyright (C) 2018 Marvell International Ltd.
  *
index be758c1..7ca599b 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
  *
  * Copyright (C) 2018 Marvell International Ltd.
  *
index f6a260d..9d8942a 100644 (file)
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/*  Marvell OcteonTx2 RVU Admin Function driver
  *
  * Copyright (C) 2018 Marvell International Ltd.
  *
index 0059b29..43f9760 100644 (file)
@@ -236,6 +236,19 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
        if (!MLX5_PPS_CAP(mdev))
                return -EOPNOTSUPP;
 
+       /* Reject requests with unsupported flags */
+       if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+                               PTP_RISING_EDGE |
+                               PTP_FALLING_EDGE |
+                               PTP_STRICT_FLAGS))
+               return -EOPNOTSUPP;
+
+       /* Reject requests to enable time stamping on both edges. */
+       if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+           (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+           (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+               return -EOPNOTSUPP;
+
        if (rq->extts.index >= clock->ptp_info.n_pins)
                return -EINVAL;
 
@@ -290,6 +303,10 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
        if (!MLX5_PPS_CAP(mdev))
                return -EOPNOTSUPP;
 
+       /* Reject requests with unsupported flags */
+       if (rq->perout.flags)
+               return -EOPNOTSUPP;
+
        if (rq->perout.index >= clock->ptp_info.n_pins)
                return -EINVAL;
 
index e177b1a..afe5246 100644 (file)
@@ -492,6 +492,10 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on,
        unsigned int index = perout_request->index;
        struct lan743x_ptp_perout *perout = &ptp->perout[index];
 
+       /* Reject requests with unsupported flags */
+       if (perout_request->flags)
+               return -EOPNOTSUPP;
+
        if (on) {
                perout_pin = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT,
                                          perout_request->index);
index a9c89d5..9f88b5d 100644 (file)
@@ -955,6 +955,8 @@ enum RAVB_QUEUE {
 #define NUM_RX_QUEUE   2
 #define NUM_TX_QUEUE   2
 
+#define RX_BUF_SZ      (2048 - ETH_FCS_LEN + sizeof(__sum16))
+
 /* TX descriptors per packet */
 #define NUM_TX_DESC_GEN2       2
 #define NUM_TX_DESC_GEN3       1
@@ -1018,7 +1020,6 @@ struct ravb_private {
        u32 dirty_rx[NUM_RX_QUEUE];     /* Producer ring indices */
        u32 cur_tx[NUM_TX_QUEUE];
        u32 dirty_tx[NUM_TX_QUEUE];
-       u32 rx_buf_sz;                  /* Based on MTU+slack. */
        struct napi_struct napi[NUM_RX_QUEUE];
        struct work_struct work;
        /* MII transceiver section. */
index 5ea14b5..4b13a18 100644 (file)
@@ -230,7 +230,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
                                               le32_to_cpu(desc->dptr)))
                                dma_unmap_single(ndev->dev.parent,
                                                 le32_to_cpu(desc->dptr),
-                                                priv->rx_buf_sz,
+                                                RX_BUF_SZ,
                                                 DMA_FROM_DEVICE);
                }
                ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -293,9 +293,9 @@ static void ravb_ring_format(struct net_device *ndev, int q)
        for (i = 0; i < priv->num_rx_ring[q]; i++) {
                /* RX descriptor */
                rx_desc = &priv->rx_ring[q][i];
-               rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
+               rx_desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
                dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
-                                         priv->rx_buf_sz,
+                                         RX_BUF_SZ,
                                          DMA_FROM_DEVICE);
                /* We just set the data size to 0 for a failed mapping which
                 * should prevent DMA from happening...
@@ -342,9 +342,6 @@ static int ravb_ring_init(struct net_device *ndev, int q)
        int ring_size;
        int i;
 
-       priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
-               ETH_HLEN + VLAN_HLEN + sizeof(__sum16);
-
        /* Allocate RX and TX skb rings */
        priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
                                  sizeof(*priv->rx_skb[q]), GFP_KERNEL);
@@ -354,7 +351,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
                goto error;
 
        for (i = 0; i < priv->num_rx_ring[q]; i++) {
-               skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
+               skb = netdev_alloc_skb(ndev, RX_BUF_SZ + RAVB_ALIGN - 1);
                if (!skb)
                        goto error;
                ravb_set_buffer_align(skb);
@@ -584,7 +581,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
                        skb = priv->rx_skb[q][entry];
                        priv->rx_skb[q][entry] = NULL;
                        dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
-                                        priv->rx_buf_sz,
+                                        RX_BUF_SZ,
                                         DMA_FROM_DEVICE);
                        get_ts &= (q == RAVB_NC) ?
                                        RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -617,11 +614,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
        for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
                entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
                desc = &priv->rx_ring[q][entry];
-               desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
+               desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
 
                if (!priv->rx_skb[q][entry]) {
                        skb = netdev_alloc_skb(ndev,
-                                              priv->rx_buf_sz +
+                                              RX_BUF_SZ +
                                               RAVB_ALIGN - 1);
                        if (!skb)
                                break;  /* Better luck next round. */
@@ -1801,10 +1798,15 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 
 static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
 {
-       if (netif_running(ndev))
-               return -EBUSY;
+       struct ravb_private *priv = netdev_priv(ndev);
 
        ndev->mtu = new_mtu;
+
+       if (netif_running(ndev)) {
+               synchronize_irq(priv->emac_irq);
+               ravb_emac_init(ndev);
+       }
+
        netdev_update_features(ndev);
 
        return 0;
index 9a42580..6984bd5 100644 (file)
@@ -182,6 +182,13 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp,
        struct net_device *ndev = priv->ndev;
        unsigned long flags;
 
+       /* Reject requests with unsupported flags */
+       if (req->flags & ~(PTP_ENABLE_FEATURE |
+                          PTP_RISING_EDGE |
+                          PTP_FALLING_EDGE |
+                          PTP_STRICT_FLAGS))
+               return -EOPNOTSUPP;
+
        if (req->index)
                return -EINVAL;
 
@@ -211,6 +218,10 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
        unsigned long flags;
        int error = 0;
 
+       /* Reject requests with unsupported flags */
+       if (req->flags)
+               return -EOPNOTSUPP;
+
        if (req->index)
                return -EINVAL;
 
index eefb06d..1c8d84e 100644 (file)
@@ -1227,7 +1227,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
 dwmac_mux:
        sun8i_dwmac_unset_syscon(gmac);
 dwmac_exit:
-       sun8i_dwmac_exit(pdev, plat_dat->bsp_priv);
+       stmmac_pltfr_remove(pdev);
 return ret;
 }
 
index 775db77..23fecf6 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
 // Copyright (c) 2017 Synopsys, Inc. and/or its affiliates.
 // stmmac Support for 5.xx Ethernet QoS cores
 
index e908d80..3b6e559 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
 /*
  * Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
  * stmmac XGMAC definitions.
index 509daee..aa5b917 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
 // Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
 // stmmac HW Interface Callbacks
 
index df638b1..0989e2b 100644 (file)
@@ -140,6 +140,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
 
        switch (rq->type) {
        case PTP_CLK_REQ_PEROUT:
+               /* Reject requests with unsupported flags */
+               if (rq->perout.flags)
+                       return -EOPNOTSUPP;
+
                cfg = &priv->pps[rq->perout.index];
 
                cfg->start.tv_sec = rq->perout.start.sec;
index 6580094..8f241b5 100644 (file)
@@ -469,6 +469,19 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
 
        switch (rq->type) {
        case PTP_CLK_REQ_EXTTS:
+               /* Reject requests with unsupported flags */
+               if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+                                       PTP_RISING_EDGE |
+                                       PTP_FALLING_EDGE |
+                                       PTP_STRICT_FLAGS))
+                       return -EOPNOTSUPP;
+
+               /* Reject requests to enable time stamping on both edges. */
+               if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+                   (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+                   (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+                       return -EOPNOTSUPP;
+
                index = rq->extts.index;
                if (index >= N_EXT_TS)
                        return -EINVAL;
@@ -491,6 +504,9 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
                return 0;
 
        case PTP_CLK_REQ_PEROUT:
+               /* Reject requests with unsupported flags */
+               if (rq->perout.flags)
+                       return -EOPNOTSUPP;
                if (rq->perout.index >= N_PER_OUT)
                        return -EINVAL;
                return periodic_output(clock, rq, on, rq->perout.index);
index 2e29ab8..3587656 100644 (file)
@@ -64,11 +64,12 @@ static int mdiobus_register_reset(struct mdio_device *mdiodev)
        if (mdiodev->dev.of_node)
                reset = devm_reset_control_get_exclusive(&mdiodev->dev,
                                                         "phy");
-       if (PTR_ERR(reset) == -ENOENT ||
-           PTR_ERR(reset) == -ENOTSUPP)
-               reset = NULL;
-       else if (IS_ERR(reset))
-               return PTR_ERR(reset);
+       if (IS_ERR(reset)) {
+               if (PTR_ERR(reset) == -ENOENT || PTR_ERR(reset) == -ENOSYS)
+                       reset = NULL;
+               else
+                       return PTR_ERR(reset);
+       }
 
        mdiodev->reset_ctrl = reset;
 
index cac64b9..4d479e3 100644 (file)
@@ -855,6 +855,7 @@ err_free_chan:
        sl->tty = NULL;
        tty->disc_data = NULL;
        clear_bit(SLF_INUSE, &sl->flags);
+       free_netdev(sl->dev);
 
 err_exit:
        rtnl_unlock();
index 011bd4c..af3994e 100644 (file)
@@ -196,7 +196,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf)
 
        /* Get the MAC address */
        ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
-       if (ret < 0) {
+       if (ret < ETH_ALEN) {
                netdev_err(dev->net, "Failed to read MAC address: %d\n", ret);
                goto free;
        }
index a245597..c2c82e6 100644 (file)
@@ -579,7 +579,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size)
        err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE,
                              USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE,
                              0, iface_no, &max_datagram_size, sizeof(max_datagram_size));
-       if (err < sizeof(max_datagram_size)) {
+       if (err != sizeof(max_datagram_size)) {
                dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n");
                goto out;
        }
index 56d334b..4196c0e 100644 (file)
@@ -1371,6 +1371,8 @@ static const struct usb_device_id products[] = {
        {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */
        {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)},    /* Quectel BG96 */
        {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */
+       {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)},    /* Foxconn T77W968 LTE */
+       {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)},    /* Foxconn T77W968 LTE with eSIM support*/
 
        /* 4. Gobi 1000 devices */
        {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},    /* Acer Gobi Modem Device */
index 4c9c78d..8323fa7 100644 (file)
@@ -251,27 +251,23 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
        struct ieee80211_hdr *hdr = (void *)skb->data;
        unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
        unsigned int mss = skb_shinfo(skb)->gso_size;
-       u16 length, iv_len, amsdu_pad;
+       u16 length, amsdu_pad;
        u8 *start_hdr;
        struct iwl_tso_hdr_page *hdr_page;
        struct page **page_ptr;
        struct tso_t tso;
 
-       /* if the packet is protected, then it must be CCMP or GCMP */
-       iv_len = ieee80211_has_protected(hdr->frame_control) ?
-               IEEE80211_CCMP_HDR_LEN : 0;
-
        trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd),
                             &dev_cmd->hdr, start_len, 0);
 
        ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
        snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
-       total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len;
+       total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len;
        amsdu_pad = 0;
 
        /* total amount of header we may need for this A-MSDU */
        hdr_room = DIV_ROUND_UP(total_len, mss) *
-               (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len;
+               (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr));
 
        /* Our device supports 9 segments at most, it will fit in 1 page */
        hdr_page = get_page_hdr(trans, hdr_room);
@@ -282,14 +278,12 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
        start_hdr = hdr_page->pos;
        page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
        *page_ptr = hdr_page->page;
-       memcpy(hdr_page->pos, skb->data + hdr_len, iv_len);
-       hdr_page->pos += iv_len;
 
        /*
-        * Pull the ieee80211 header + IV to be able to use TSO core,
+        * Pull the ieee80211 header to be able to use TSO core,
         * we will restore it for the tx_status flow.
         */
-       skb_pull(skb, hdr_len + iv_len);
+       skb_pull(skb, hdr_len);
 
        /*
         * Remove the length of all the headers that we don't actually
@@ -364,8 +358,8 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
                }
        }
 
-       /* re -add the WiFi header and IV */
-       skb_push(skb, hdr_len + iv_len);
+       /* re -add the WiFi header */
+       skb_push(skb, hdr_len);
 
        return 0;
 
index 307bd2a..4d1909a 100644 (file)
@@ -220,8 +220,10 @@ static irqreturn_t nxp_nci_i2c_irq_thread_fn(int irq, void *phy_id)
 
        if (r == -EREMOTEIO) {
                phy->hard_fault = r;
-               skb = NULL;
-       } else if (r < 0) {
+               if (info->mode == NXP_NCI_MODE_FW)
+                       nxp_nci_fw_recv_frame(phy->ndev, NULL);
+       }
+       if (r < 0) {
                nfc_err(&client->dev, "Read failed with error %d\n", r);
                goto exit_irq_handled;
        }
index c6251ea..2c419fa 100644 (file)
@@ -147,6 +147,7 @@ struct chv_pin_context {
  * @pctldesc: Pin controller description
  * @pctldev: Pointer to the pin controller device
  * @chip: GPIO chip in this pin controller
+ * @irqchip: IRQ chip in this pin controller
  * @regs: MMIO registers
  * @intr_lines: Stores mapping between 16 HW interrupt wires and GPIO
  *             offset (in GPIO number space)
@@ -162,6 +163,7 @@ struct chv_pinctrl {
        struct pinctrl_desc pctldesc;
        struct pinctrl_dev *pctldev;
        struct gpio_chip chip;
+       struct irq_chip irqchip;
        void __iomem *regs;
        unsigned intr_lines[16];
        const struct chv_community *community;
@@ -1466,16 +1468,6 @@ static int chv_gpio_irq_type(struct irq_data *d, unsigned int type)
        return 0;
 }
 
-static struct irq_chip chv_gpio_irqchip = {
-       .name = "chv-gpio",
-       .irq_startup = chv_gpio_irq_startup,
-       .irq_ack = chv_gpio_irq_ack,
-       .irq_mask = chv_gpio_irq_mask,
-       .irq_unmask = chv_gpio_irq_unmask,
-       .irq_set_type = chv_gpio_irq_type,
-       .flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
 static void chv_gpio_irq_handler(struct irq_desc *desc)
 {
        struct gpio_chip *gc = irq_desc_get_handler_data(desc);
@@ -1559,7 +1551,7 @@ static void chv_init_irq_valid_mask(struct gpio_chip *chip,
                intsel >>= CHV_PADCTRL0_INTSEL_SHIFT;
 
                if (intsel >= community->nirqs)
-                       clear_bit(i, valid_mask);
+                       clear_bit(desc->number, valid_mask);
        }
 }
 
@@ -1625,7 +1617,15 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
                }
        }
 
-       ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0,
+       pctrl->irqchip.name = "chv-gpio";
+       pctrl->irqchip.irq_startup = chv_gpio_irq_startup;
+       pctrl->irqchip.irq_ack = chv_gpio_irq_ack;
+       pctrl->irqchip.irq_mask = chv_gpio_irq_mask;
+       pctrl->irqchip.irq_unmask = chv_gpio_irq_unmask;
+       pctrl->irqchip.irq_set_type = chv_gpio_irq_type;
+       pctrl->irqchip.flags = IRQCHIP_SKIP_SET_WAKE;
+
+       ret = gpiochip_irqchip_add(chip, &pctrl->irqchip, 0,
                                   handle_bad_irq, IRQ_TYPE_NONE);
        if (ret) {
                dev_err(pctrl->dev, "failed to add IRQ chip\n");
@@ -1642,7 +1642,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
                }
        }
 
-       gpiochip_set_chained_irqchip(chip, &chv_gpio_irqchip, irq,
+       gpiochip_set_chained_irqchip(chip, &pctrl->irqchip, irq,
                                     chv_gpio_irq_handler);
        return 0;
 }
index bc01359..83981ad 100644 (file)
@@ -52,6 +52,7 @@
 #define PADCFG0_GPIROUTNMI             BIT(17)
 #define PADCFG0_PMODE_SHIFT            10
 #define PADCFG0_PMODE_MASK             GENMASK(13, 10)
+#define PADCFG0_PMODE_GPIO             0
 #define PADCFG0_GPIORXDIS              BIT(9)
 #define PADCFG0_GPIOTXDIS              BIT(8)
 #define PADCFG0_GPIORXSTATE            BIT(1)
@@ -332,7 +333,7 @@ static void intel_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s,
        cfg1 = readl(intel_get_padcfg(pctrl, pin, PADCFG1));
 
        mode = (cfg0 & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
-       if (!mode)
+       if (mode == PADCFG0_PMODE_GPIO)
                seq_puts(s, "GPIO ");
        else
                seq_printf(s, "mode %d ", mode);
@@ -458,6 +459,11 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input)
        writel(value, padcfg0);
 }
 
+static int intel_gpio_get_gpio_mode(void __iomem *padcfg0)
+{
+       return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
+}
+
 static void intel_gpio_set_gpio_mode(void __iomem *padcfg0)
 {
        u32 value;
@@ -491,7 +497,20 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev,
        }
 
        padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0);
+
+       /*
+        * If pin is already configured in GPIO mode, we assume that
+        * firmware provides correct settings. In such case we avoid
+        * potential glitches on the pin. Otherwise, for the pin in
+        * alternative mode, consumer has to supply respective flags.
+        */
+       if (intel_gpio_get_gpio_mode(padcfg0) == PADCFG0_PMODE_GPIO) {
+               raw_spin_unlock_irqrestore(&pctrl->lock, flags);
+               return 0;
+       }
+
        intel_gpio_set_gpio_mode(padcfg0);
+
        /* Disable TX buffer and enable RX (this will be input) */
        __intel_gpio_set_direction(padcfg0, true);
 
index 5646600..ccdf0bb 100644 (file)
@@ -585,19 +585,6 @@ static int stmfx_pinctrl_gpio_function_enable(struct stmfx_pinctrl *pctl)
        return stmfx_function_enable(pctl->stmfx, func);
 }
 
-static int stmfx_pinctrl_gpio_init_valid_mask(struct gpio_chip *gc,
-                                             unsigned long *valid_mask,
-                                             unsigned int ngpios)
-{
-       struct stmfx_pinctrl *pctl = gpiochip_get_data(gc);
-       u32 n;
-
-       for_each_clear_bit(n, &pctl->gpio_valid_mask, ngpios)
-               clear_bit(n, valid_mask);
-
-       return 0;
-}
-
 static int stmfx_pinctrl_probe(struct platform_device *pdev)
 {
        struct stmfx *stmfx = dev_get_drvdata(pdev->dev.parent);
@@ -660,7 +647,6 @@ static int stmfx_pinctrl_probe(struct platform_device *pdev)
        pctl->gpio_chip.ngpio = pctl->pctl_desc.npins;
        pctl->gpio_chip.can_sleep = true;
        pctl->gpio_chip.of_node = np;
-       pctl->gpio_chip.init_valid_mask = stmfx_pinctrl_gpio_init_valid_mask;
 
        ret = devm_gpiochip_add_data(pctl->dev, &pctl->gpio_chip, pctl);
        if (ret) {
index 67d0199..9d72ab5 100644 (file)
@@ -149,11 +149,21 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
                        err = -EFAULT;
                        break;
                }
-               if (((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) ||
-                       req.extts.rsv[0] || req.extts.rsv[1]) &&
-                       cmd == PTP_EXTTS_REQUEST2) {
-                       err = -EINVAL;
-                       break;
+               if (cmd == PTP_EXTTS_REQUEST2) {
+                       /* Tell the drivers to check the flags carefully. */
+                       req.extts.flags |= PTP_STRICT_FLAGS;
+                       /* Make sure no reserved bit is set. */
+                       if ((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) ||
+                           req.extts.rsv[0] || req.extts.rsv[1]) {
+                               err = -EINVAL;
+                               break;
+                       }
+                       /* Ensure one of the rising/falling edge bits is set. */
+                       if ((req.extts.flags & PTP_ENABLE_FEATURE) &&
+                           (req.extts.flags & PTP_EXTTS_EDGES) == 0) {
+                               err = -EINVAL;
+                               break;
+                       }
                } else if (cmd == PTP_EXTTS_REQUEST) {
                        req.extts.flags &= PTP_EXTTS_V1_VALID_FLAGS;
                        req.extts.rsv[0] = 0;
index 213ff40..3c9a64c 100644 (file)
@@ -76,7 +76,6 @@ static const char *rcdev_name(struct reset_controller_dev *rcdev)
  * of_reset_simple_xlate - translate reset_spec to the reset line number
  * @rcdev: a pointer to the reset controller device
  * @reset_spec: reset line specifier as found in the device tree
- * @flags: a flags pointer to fill in (optional)
  *
  * This simple translation function should be used for reset controllers
  * with 1:1 mapping, where reset lines can be indexed by number without gaps.
@@ -748,6 +747,7 @@ static void reset_control_array_put(struct reset_control_array *resets)
        for (i = 0; i < resets->num_rstcs; i++)
                __reset_control_put_internal(resets->rstc[i]);
        mutex_unlock(&reset_list_mutex);
+       kfree(resets);
 }
 
 /**
@@ -825,9 +825,10 @@ int __device_reset(struct device *dev, bool optional)
 }
 EXPORT_SYMBOL_GPL(__device_reset);
 
-/**
+/*
  * APIs to manage an array of reset controls.
  */
+
 /**
  * of_reset_control_get_count - Count number of resets available with a device
  *
index 6afad68..2382409 100644 (file)
@@ -76,9 +76,11 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
         * ensures no active vp_list traversal while the vport is removed
         * from the queue)
         */
-       for (i = 0; i < 10 && atomic_read(&vha->vref_count); i++)
-               wait_event_timeout(vha->vref_waitq,
-                   atomic_read(&vha->vref_count), HZ);
+       for (i = 0; i < 10; i++) {
+               if (wait_event_timeout(vha->vref_waitq,
+                   !atomic_read(&vha->vref_count), HZ) > 0)
+                       break;
+       }
 
        spin_lock_irqsave(&ha->vport_slock, flags);
        if (atomic_read(&vha->vref_count)) {
index 337162a..726ad4c 100644 (file)
@@ -1119,9 +1119,11 @@ qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha)
 
        qla2x00_mark_all_devices_lost(vha, 0);
 
-       for (i = 0; i < 10; i++)
-               wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha),
-                   HZ);
+       for (i = 0; i < 10; i++) {
+               if (wait_event_timeout(vha->fcport_waitQ,
+                   test_fcport_count(vha), HZ) > 0)
+                       break;
+       }
 
        flush_workqueue(vha->hw->wq);
 }
index 5447738..91c007d 100644 (file)
@@ -1883,7 +1883,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
 {
        unsigned int cmd_size, sgl_size;
 
-       sgl_size = scsi_mq_inline_sgl_size(shost);
+       sgl_size = max_t(unsigned int, sizeof(struct scatterlist),
+                               scsi_mq_inline_sgl_size(shost));
        cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size;
        if (scsi_host_get_prot(shost))
                cmd_size += sizeof(struct scsi_data_buffer) +
index de4019d..1efc69e 100644 (file)
@@ -263,25 +263,16 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
        int result = cmd->result;
        struct request *rq = cmd->request;
 
-       switch (req_op(rq)) {
-       case REQ_OP_ZONE_RESET:
-       case REQ_OP_ZONE_RESET_ALL:
-
-               if (result &&
-                   sshdr->sense_key == ILLEGAL_REQUEST &&
-                   sshdr->asc == 0x24)
-                       /*
-                        * INVALID FIELD IN CDB error: reset of a conventional
-                        * zone was attempted. Nothing to worry about, so be
-                        * quiet about the error.
-                        */
-                       rq->rq_flags |= RQF_QUIET;
-               break;
-
-       case REQ_OP_WRITE:
-       case REQ_OP_WRITE_ZEROES:
-       case REQ_OP_WRITE_SAME:
-               break;
+       if (req_op(rq) == REQ_OP_ZONE_RESET &&
+           result &&
+           sshdr->sense_key == ILLEGAL_REQUEST &&
+           sshdr->asc == 0x24) {
+               /*
+                * INVALID FIELD IN CDB error: reset of a conventional
+                * zone was attempted. Nothing to worry about, so be
+                * quiet about the error.
+                */
+               rq->rq_flags |= RQF_QUIET;
        }
 }
 
index d9231bd..98b9d9a 100644 (file)
@@ -249,13 +249,13 @@ static struct genpd_power_state imx6_pm_domain_pu_state = {
 };
 
 static struct imx_pm_domain imx_gpc_domains[] = {
-       [GPC_PGC_DOMAIN_ARM] {
+       [GPC_PGC_DOMAIN_ARM] {
                .base = {
                        .name = "ARM",
                        .flags = GENPD_FLAG_ALWAYS_ON,
                },
        },
-       [GPC_PGC_DOMAIN_PU] {
+       [GPC_PGC_DOMAIN_PU] {
                .base = {
                        .name = "PU",
                        .power_off = imx6_pm_domain_power_off,
@@ -266,7 +266,7 @@ static struct imx_pm_domain imx_gpc_domains[] = {
                .reg_offs = 0x260,
                .cntr_pdn_bit = 0,
        },
-       [GPC_PGC_DOMAIN_DISPLAY] {
+       [GPC_PGC_DOMAIN_DISPLAY] {
                .base = {
                        .name = "DISPLAY",
                        .power_off = imx6_pm_domain_power_off,
@@ -275,7 +275,7 @@ static struct imx_pm_domain imx_gpc_domains[] = {
                .reg_offs = 0x240,
                .cntr_pdn_bit = 4,
        },
-       [GPC_PGC_DOMAIN_PCI] {
+       [GPC_PGC_DOMAIN_PCI] {
                .base = {
                        .name = "PCI",
                        .power_off = imx6_pm_domain_power_off,
index f518273..c8c80df 100644 (file)
@@ -5,6 +5,7 @@
 
 menuconfig SOUNDWIRE
        tristate "SoundWire support"
+       depends on ACPI || OF
        help
          SoundWire is a 2-Pin interface with data and clock line ratified
          by the MIPI Alliance. SoundWire is used for transporting data
index f1e38a2..13c54ea 100644 (file)
@@ -900,7 +900,7 @@ static int intel_register_dai(struct sdw_intel *sdw)
        /* Create PCM DAIs */
        stream = &cdns->pcm;
 
-       ret = intel_create_dai(cdns, dais, INTEL_PDI_IN, stream->num_in,
+       ret = intel_create_dai(cdns, dais, INTEL_PDI_IN, cdns->pcm.num_in,
                               off, stream->num_ch_in, true);
        if (ret)
                return ret;
@@ -931,7 +931,7 @@ static int intel_register_dai(struct sdw_intel *sdw)
        if (ret)
                return ret;
 
-       off += cdns->pdm.num_bd;
+       off += cdns->pdm.num_out;
        ret = intel_create_dai(cdns, dais, INTEL_PDI_BD, cdns->pdm.num_bd,
                               off, stream->num_ch_bd, false);
        if (ret)
index 48a63ca..6473fa6 100644 (file)
@@ -128,7 +128,8 @@ int sdw_of_find_slaves(struct sdw_bus *bus)
        struct device_node *node;
 
        for_each_child_of_node(bus->dev->of_node, node) {
-               int link_id, sdw_version, ret, len;
+               int link_id, ret, len;
+               unsigned int sdw_version;
                const char *compat = NULL;
                struct sdw_slave_id id;
                const __be32 *addr;
index 61cd09c..6795851 100644 (file)
@@ -80,7 +80,6 @@ static void icl_nhi_lc_mailbox_cmd(struct tb_nhi *nhi, enum icl_lc_mailbox_cmd c
 {
        u32 data;
 
-       pci_read_config_dword(nhi->pdev, VS_CAP_19, &data);
        data = (cmd << VS_CAP_19_CMD_SHIFT) & VS_CAP_19_CMD_MASK;
        pci_write_config_dword(nhi->pdev, VS_CAP_19, data | VS_CAP_19_VALID);
 }
index 410bf1b..5ea8db6 100644 (file)
@@ -896,12 +896,13 @@ int tb_dp_port_set_hops(struct tb_port *port, unsigned int video,
  */
 bool tb_dp_port_is_enabled(struct tb_port *port)
 {
-       u32 data;
+       u32 data[2];
 
-       if (tb_port_read(port, &data, TB_CFG_PORT, port->cap_adap, 1))
+       if (tb_port_read(port, data, TB_CFG_PORT, port->cap_adap,
+                        ARRAY_SIZE(data)))
                return false;
 
-       return !!(data & (TB_DP_VIDEO_EN | TB_DP_AUX_EN));
+       return !!(data[0] & (TB_DP_VIDEO_EN | TB_DP_AUX_EN));
 }
 
 /**
@@ -914,19 +915,21 @@ bool tb_dp_port_is_enabled(struct tb_port *port)
  */
 int tb_dp_port_enable(struct tb_port *port, bool enable)
 {
-       u32 data;
+       u32 data[2];
        int ret;
 
-       ret = tb_port_read(port, &data, TB_CFG_PORT, port->cap_adap, 1);
+       ret = tb_port_read(port, data, TB_CFG_PORT, port->cap_adap,
+                          ARRAY_SIZE(data));
        if (ret)
                return ret;
 
        if (enable)
-               data |= TB_DP_VIDEO_EN | TB_DP_AUX_EN;
+               data[0] |= TB_DP_VIDEO_EN | TB_DP_AUX_EN;
        else
-               data &= ~(TB_DP_VIDEO_EN | TB_DP_AUX_EN);
+               data[0] &= ~(TB_DP_VIDEO_EN | TB_DP_AUX_EN);
 
-       return tb_port_write(port, &data, TB_CFG_PORT, port->cap_adap, 1);
+       return tb_port_write(port, data, TB_CFG_PORT, port->cap_adap,
+                            ARRAY_SIZE(data));
 }
 
 /* switch utility functions */
@@ -1031,13 +1034,6 @@ static int tb_switch_set_authorized(struct tb_switch *sw, unsigned int val)
        if (sw->authorized)
                goto unlock;
 
-       /*
-        * Make sure there is no PCIe rescan ongoing when a new PCIe
-        * tunnel is created. Otherwise the PCIe rescan code might find
-        * the new tunnel too early.
-        */
-       pci_lock_rescan_remove();
-
        switch (val) {
        /* Approve switch */
        case 1:
@@ -1057,8 +1053,6 @@ static int tb_switch_set_authorized(struct tb_switch *sw, unsigned int val)
                break;
        }
 
-       pci_unlock_rescan_remove();
-
        if (!ret) {
                sw->authorized = val;
                /* Notify status change to the userspace */
index b0152fe..bc60e03 100644 (file)
@@ -288,3 +288,4 @@ module_platform_driver(bd70528_wdt);
 MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
 MODULE_DESCRIPTION("BD70528 watchdog driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:bd70528-wdt");
index 9393be5..808eeb4 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/timer.h>
+#include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/io.h>
@@ -473,6 +474,11 @@ static long cpwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        return 0;
 }
 
+static long cpwd_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       return cpwd_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+
 static ssize_t cpwd_write(struct file *file, const char __user *buf,
                          size_t count, loff_t *ppos)
 {
@@ -497,7 +503,7 @@ static ssize_t cpwd_read(struct file *file, char __user *buffer,
 static const struct file_operations cpwd_fops = {
        .owner =                THIS_MODULE,
        .unlocked_ioctl =       cpwd_ioctl,
-       .compat_ioctl =         compat_ptr_ioctl,
+       .compat_ioctl =         cpwd_compat_ioctl,
        .open =                 cpwd_open,
        .write =                cpwd_write,
        .read =                 cpwd_read,
index 7ea5cf5..8ed89f0 100644 (file)
@@ -99,8 +99,14 @@ static int imx_sc_wdt_set_pretimeout(struct watchdog_device *wdog,
 {
        struct arm_smccc_res res;
 
+       /*
+        * SCU firmware calculates pretimeout based on current time
+        * stamp instead of watchdog timeout stamp, need to convert
+        * the pretimeout to SCU firmware's timeout value.
+        */
        arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_SET_PRETIME_WDOG,
-                     pretimeout * 1000, 0, 0, 0, 0, 0, &res);
+                     (wdog->timeout - pretimeout) * 1000, 0, 0, 0,
+                     0, 0, &res);
        if (res.a0)
                return -EACCES;
 
index d17c1a6..5a9ca10 100644 (file)
@@ -89,8 +89,8 @@ static unsigned int meson_gxbb_wdt_get_timeleft(struct watchdog_device *wdt_dev)
 
        reg = readl(data->reg_base + GXBB_WDT_TCNT_REG);
 
-       return ((reg >> GXBB_WDT_TCNT_CNT_SHIFT) -
-               (reg & GXBB_WDT_TCNT_SETUP_MASK)) / 1000;
+       return ((reg & GXBB_WDT_TCNT_SETUP_MASK) -
+               (reg >> GXBB_WDT_TCNT_CNT_SHIFT)) / 1000;
 }
 
 static const struct watchdog_ops meson_gxbb_wdt_ops = {
index 2d36520..1213179 100644 (file)
@@ -163,9 +163,17 @@ static int pm8916_wdt_probe(struct platform_device *pdev)
 
        irq = platform_get_irq(pdev, 0);
        if (irq > 0) {
-               if (devm_request_irq(dev, irq, pm8916_wdt_isr, 0, "pm8916_wdt",
-                                    wdt))
-                       irq = 0;
+               err = devm_request_irq(dev, irq, pm8916_wdt_isr, 0,
+                                      "pm8916_wdt", wdt);
+               if (err)
+                       return err;
+
+               wdt->wdev.info = &pm8916_wdt_pt_ident;
+       } else {
+               if (irq == -EPROBE_DEFER)
+                       return -EPROBE_DEFER;
+
+               wdt->wdev.info = &pm8916_wdt_ident;
        }
 
        /* Configure watchdog to hard-reset mode */
@@ -177,7 +185,6 @@ static int pm8916_wdt_probe(struct platform_device *pdev)
                return err;
        }
 
-       wdt->wdev.info = (irq > 0) ? &pm8916_wdt_pt_ident : &pm8916_wdt_ident,
        wdt->wdev.ops = &pm8916_wdt_ops,
        wdt->wdev.parent = dev;
        wdt->wdev.min_timeout = PM8916_WDT_MIN_TIMEOUT;
index cc12772..497f979 100644 (file)
@@ -803,7 +803,12 @@ success:
                        continue;
 
                if (cookie->inodes[i]) {
-                       afs_vnode_commit_status(&fc, AFS_FS_I(cookie->inodes[i]),
+                       struct afs_vnode *iv = AFS_FS_I(cookie->inodes[i]);
+
+                       if (test_bit(AFS_VNODE_UNSET, &iv->flags))
+                               continue;
+
+                       afs_vnode_commit_status(&fc, iv,
                                                scb->cb_break, NULL, scb);
                        continue;
                }
index 01e0fb9..0d9a559 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2179,7 +2179,7 @@ SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
 #ifdef CONFIG_COMPAT
 
 struct __compat_aio_sigset {
-       compat_sigset_t __user  *sigmask;
+       compat_uptr_t           sigmask;
        compat_size_t           sigsetsize;
 };
 
@@ -2193,7 +2193,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
                struct old_timespec32 __user *, timeout,
                const struct __compat_aio_sigset __user *, usig)
 {
-       struct __compat_aio_sigset ksig = { NULL, };
+       struct __compat_aio_sigset ksig = { 0, };
        struct timespec64 t;
        bool interrupted;
        int ret;
@@ -2204,7 +2204,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
        if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
                return -EFAULT;
 
-       ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize);
+       ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
        if (ret)
                return ret;
 
@@ -2228,7 +2228,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
                struct __kernel_timespec __user *, timeout,
                const struct __compat_aio_sigset __user *, usig)
 {
-       struct __compat_aio_sigset ksig = { NULL, };
+       struct __compat_aio_sigset ksig = { 0, };
        struct timespec64 t;
        bool interrupted;
        int ret;
@@ -2239,7 +2239,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
        if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
                return -EFAULT;
 
-       ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize);
+       ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
        if (ret)
                return ret;
 
index 2866fab..91f5787 100644 (file)
@@ -459,9 +459,10 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb,
                 */
                how &= ~AUTOFS_EXP_LEAVES;
                found = should_expire(expired, mnt, timeout, how);
-               if (!found || found != expired)
-                       /* Something has changed, continue */
+               if (found != expired) { // something has changed, continue
+                       dput(found);
                        goto next;
+               }
 
                if (expired != dentry)
                        dput(dentry);
index c3f386b..0159100 100644 (file)
@@ -474,6 +474,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
        u64 start = async_chunk->start;
        u64 end = async_chunk->end;
        u64 actual_end;
+       u64 i_size;
        int ret = 0;
        struct page **pages = NULL;
        unsigned long nr_pages;
@@ -488,7 +489,19 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
        inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
                        SZ_16K);
 
-       actual_end = min_t(u64, i_size_read(inode), end + 1);
+       /*
+        * We need to save i_size before now because it could change in between
+        * us evaluating the size and assigning it.  This is because we lock and
+        * unlock the page in truncate and fallocate, and then modify the i_size
+        * later on.
+        *
+        * The barriers are to emulate READ_ONCE, remove that once i_size_read
+        * does that for us.
+        */
+       barrier();
+       i_size = i_size_read(inode);
+       barrier();
+       actual_end = min_t(u64, i_size, end + 1);
 again:
        will_compress = 0;
        nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
@@ -9731,6 +9744,18 @@ out_fail:
                        commit_transaction = true;
        }
        if (commit_transaction) {
+               /*
+                * We may have set commit_transaction when logging the new name
+                * in the destination root, in which case we left the source
+                * root context in the list of log contextes. So make sure we
+                * remove it to avoid invalid memory accesses, since the context
+                * was allocated in our stack frame.
+                */
+               if (sync_log_root) {
+                       mutex_lock(&root->log_mutex);
+                       list_del_init(&ctx_root.list);
+                       mutex_unlock(&root->log_mutex);
+               }
                ret = btrfs_commit_transaction(trans);
        } else {
                int ret2;
@@ -9744,6 +9769,9 @@ out_notrans:
        if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
                up_read(&fs_info->subvol_sem);
 
+       ASSERT(list_empty(&ctx_root.list));
+       ASSERT(list_empty(&ctx_dest.list));
+
        return ret;
 }
 
index 7c145a4..23272d9 100644 (file)
@@ -4195,9 +4195,6 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
        u64 transid;
        int ret;
 
-       btrfs_warn(root->fs_info,
-       "START_SYNC ioctl is deprecated and will be removed in kernel 5.7");
-
        trans = btrfs_attach_transaction_barrier(root);
        if (IS_ERR(trans)) {
                if (PTR_ERR(trans) != -ENOENT)
@@ -4225,9 +4222,6 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
 {
        u64 transid;
 
-       btrfs_warn(fs_info,
-               "WAIT_SYNC ioctl is deprecated and will be removed in kernel 5.7");
-
        if (argp) {
                if (copy_from_user(&transid, argp, sizeof(transid)))
                        return -EFAULT;
index 98dc092..e8a4b0e 100644 (file)
@@ -893,6 +893,15 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
        while (ticket->bytes > 0 && ticket->error == 0) {
                ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
                if (ret) {
+                       /*
+                        * Delete us from the list. After we unlock the space
+                        * info, we don't want the async reclaim job to reserve
+                        * space for this ticket. If that would happen, then the
+                        * ticket's task would not known that space was reserved
+                        * despite getting an error, resulting in a space leak
+                        * (bytes_may_use counter of our space_info).
+                        */
+                       list_del_init(&ticket->list);
                        ticket->error = -EINTR;
                        break;
                }
@@ -945,12 +954,24 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
        spin_lock(&space_info->lock);
        ret = ticket->error;
        if (ticket->bytes || ticket->error) {
+               /*
+                * Need to delete here for priority tickets. For regular tickets
+                * either the async reclaim job deletes the ticket from the list
+                * or we delete it ourselves at wait_reserve_ticket().
+                */
                list_del_init(&ticket->list);
                if (!ret)
                        ret = -ENOSPC;
        }
        spin_unlock(&space_info->lock);
        ASSERT(list_empty(&ticket->list));
+       /*
+        * Check that we can't have an error set if the reservation succeeded,
+        * as that would confuse tasks and lead them to error out without
+        * releasing reserved space (if an error happens the expectation is that
+        * space wasn't reserved at all).
+        */
+       ASSERT(!(ticket->bytes == 0 && ticket->error));
        return ret;
 }
 
index 43e488f..076d5b8 100644 (file)
@@ -686,9 +686,7 @@ static void dev_item_err(const struct extent_buffer *eb, int slot,
 static int check_dev_item(struct extent_buffer *leaf,
                          struct btrfs_key *key, int slot)
 {
-       struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_dev_item *ditem;
-       u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK);
 
        if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
                dev_item_err(leaf, slot,
@@ -696,12 +694,6 @@ static int check_dev_item(struct extent_buffer *leaf,
                             key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
                return -EUCLEAN;
        }
-       if (key->offset > max_devid) {
-               dev_item_err(leaf, slot,
-                            "invalid devid: has=%llu expect=[0, %llu]",
-                            key->offset, max_devid);
-               return -EUCLEAN;
-       }
        ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
        if (btrfs_device_id(leaf, ditem) != key->offset) {
                dev_item_err(leaf, slot,
index bdfe449..e04409f 100644 (file)
@@ -4967,6 +4967,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
                max_stripe_size = SZ_32M;
                max_chunk_size = 2 * max_stripe_size;
+               devs_max = min_t(int, devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
        } else {
                btrfs_err(info, "invalid chunk type 0x%llx requested",
                       type);
index bd77adb..8de6339 100644 (file)
@@ -753,6 +753,9 @@ static void ceph_aio_complete(struct inode *inode,
        if (!atomic_dec_and_test(&aio_req->pending_reqs))
                return;
 
+       if (aio_req->iocb->ki_flags & IOCB_DIRECT)
+               inode_dio_end(inode);
+
        ret = aio_req->error;
        if (!ret)
                ret = aio_req->total_len;
@@ -1091,6 +1094,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                                              CEPH_CAP_FILE_RD);
 
                list_splice(&aio_req->osd_reqs, &osd_reqs);
+               inode_dio_begin(inode);
                while (!list_empty(&osd_reqs)) {
                        req = list_first_entry(&osd_reqs,
                                               struct ceph_osd_request,
@@ -1264,14 +1268,24 @@ again:
        dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
             inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
 
+       if (iocb->ki_flags & IOCB_DIRECT)
+               ceph_start_io_direct(inode);
+       else
+               ceph_start_io_read(inode);
+
        if (fi->fmode & CEPH_FILE_MODE_LAZY)
                want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
        else
                want = CEPH_CAP_FILE_CACHE;
        ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
                            &got, &pinned_page);
-       if (ret < 0)
+       if (ret < 0) {
+               if (iocb->ki_flags & IOCB_DIRECT)
+                       ceph_end_io_direct(inode);
+               else
+                       ceph_end_io_read(inode);
                return ret;
+       }
 
        if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
            (iocb->ki_flags & IOCB_DIRECT) ||
@@ -1283,16 +1297,12 @@ again:
 
                if (ci->i_inline_version == CEPH_INLINE_NONE) {
                        if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
-                               ceph_start_io_direct(inode);
                                ret = ceph_direct_read_write(iocb, to,
                                                             NULL, NULL);
-                               ceph_end_io_direct(inode);
                                if (ret >= 0 && ret < len)
                                        retry_op = CHECK_EOF;
                        } else {
-                               ceph_start_io_read(inode);
                                ret = ceph_sync_read(iocb, to, &retry_op);
-                               ceph_end_io_read(inode);
                        }
                } else {
                        retry_op = READ_INLINE;
@@ -1303,11 +1313,10 @@ again:
                     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
                     ceph_cap_string(got));
                ceph_add_rw_context(fi, &rw_ctx);
-               ceph_start_io_read(inode);
                ret = generic_file_read_iter(iocb, to);
-               ceph_end_io_read(inode);
                ceph_del_rw_context(fi, &rw_ctx);
        }
+
        dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
             inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
        if (pinned_page) {
@@ -1315,6 +1324,12 @@ again:
                pinned_page = NULL;
        }
        ceph_put_cap_refs(ci, got);
+
+       if (iocb->ki_flags & IOCB_DIRECT)
+               ceph_end_io_direct(inode);
+       else
+               ceph_end_io_read(inode);
+
        if (retry_op > HAVE_RETRIED && ret >= 0) {
                int statret;
                struct page *page = NULL;
index ea735d5..0abfde6 100644 (file)
@@ -838,6 +838,7 @@ struct create_durable_handle_reconnect_v2 {
        struct create_context ccontext;
        __u8   Name[8];
        struct durable_reconnect_context_v2 dcontext;
+       __u8   Pad[4];
 } __packed;
 
 /* See MS-SMB2 2.2.13.2.5 */
index dc5dbf6..cb61467 100644 (file)
@@ -101,7 +101,7 @@ static int create_link(struct config_item *parent_item,
        }
        target_sd->s_links++;
        spin_unlock(&configfs_dirent_lock);
-       ret = configfs_get_target_path(item, item, body);
+       ret = configfs_get_target_path(parent_item, item, body);
        if (!ret)
                ret = configfs_create_link(target_sd, parent_item->ci_dentry,
                                           dentry, body);
index 18426f4..e23752d 100644 (file)
@@ -128,13 +128,20 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
                              struct inode *inode)
 {
        struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
-       struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
        struct dentry *lower_dir_dentry;
+       struct inode *lower_dir_inode;
        int rc;
 
-       dget(lower_dentry);
-       lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
+       lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
+       lower_dir_inode = d_inode(lower_dir_dentry);
+       inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT);
+       dget(lower_dentry);     // don't even try to make the lower negative
+       if (lower_dentry->d_parent != lower_dir_dentry)
+               rc = -EINVAL;
+       else if (d_unhashed(lower_dentry))
+               rc = -EINVAL;
+       else
+               rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
        if (rc) {
                printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
                goto out_unlock;
@@ -142,10 +149,11 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
        fsstack_copy_attr_times(dir, lower_dir_inode);
        set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
        inode->i_ctime = dir->i_ctime;
-       d_drop(dentry);
 out_unlock:
-       unlock_dir(lower_dir_dentry);
        dput(lower_dentry);
+       inode_unlock(lower_dir_inode);
+       if (!rc)
+               d_drop(dentry);
        return rc;
 }
 
@@ -311,9 +319,9 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode)
 static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
                                     struct dentry *lower_dentry)
 {
-       struct inode *inode, *lower_inode = d_inode(lower_dentry);
+       struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent);
+       struct inode *inode, *lower_inode;
        struct ecryptfs_dentry_info *dentry_info;
-       struct vfsmount *lower_mnt;
        int rc = 0;
 
        dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
@@ -322,16 +330,23 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
                return ERR_PTR(-ENOMEM);
        }
 
-       lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
        fsstack_copy_attr_atime(d_inode(dentry->d_parent),
-                               d_inode(lower_dentry->d_parent));
+                               d_inode(path->dentry));
        BUG_ON(!d_count(lower_dentry));
 
        ecryptfs_set_dentry_private(dentry, dentry_info);
-       dentry_info->lower_path.mnt = lower_mnt;
+       dentry_info->lower_path.mnt = mntget(path->mnt);
        dentry_info->lower_path.dentry = lower_dentry;
 
-       if (d_really_is_negative(lower_dentry)) {
+       /*
+        * negative dentry can go positive under us here - its parent is not
+        * locked.  That's OK and that could happen just as we return from
+        * ecryptfs_lookup() anyway.  Just need to be careful and fetch
+        * ->d_inode only once - it's not stable here.
+        */
+       lower_inode = READ_ONCE(lower_dentry->d_inode);
+
+       if (!lower_inode) {
                /* We want to add because we couldn't find in lower */
                d_add(dentry, NULL);
                return NULL;
@@ -512,22 +527,30 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
        struct dentry *lower_dentry;
        struct dentry *lower_dir_dentry;
+       struct inode *lower_dir_inode;
        int rc;
 
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
-       dget(dentry);
-       lower_dir_dentry = lock_parent(lower_dentry);
-       dget(lower_dentry);
-       rc = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry);
-       dput(lower_dentry);
-       if (!rc && d_really_is_positive(dentry))
+       lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
+       lower_dir_inode = d_inode(lower_dir_dentry);
+
+       inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT);
+       dget(lower_dentry);     // don't even try to make the lower negative
+       if (lower_dentry->d_parent != lower_dir_dentry)
+               rc = -EINVAL;
+       else if (d_unhashed(lower_dentry))
+               rc = -EINVAL;
+       else
+               rc = vfs_rmdir(lower_dir_inode, lower_dentry);
+       if (!rc) {
                clear_nlink(d_inode(dentry));
-       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
-       set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
-       unlock_dir(lower_dir_dentry);
+               fsstack_copy_attr_times(dir, lower_dir_inode);
+               set_nlink(dir, lower_dir_inode->i_nlink);
+       }
+       dput(lower_dentry);
+       inode_unlock(lower_dir_inode);
        if (!rc)
                d_drop(dentry);
-       dput(dentry);
        return rc;
 }
 
@@ -565,20 +588,22 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct dentry *lower_new_dentry;
        struct dentry *lower_old_dir_dentry;
        struct dentry *lower_new_dir_dentry;
-       struct dentry *trap = NULL;
+       struct dentry *trap;
        struct inode *target_inode;
 
        if (flags)
                return -EINVAL;
 
+       lower_old_dir_dentry = ecryptfs_dentry_to_lower(old_dentry->d_parent);
+       lower_new_dir_dentry = ecryptfs_dentry_to_lower(new_dentry->d_parent);
+
        lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
        lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
-       dget(lower_old_dentry);
-       dget(lower_new_dentry);
-       lower_old_dir_dentry = dget_parent(lower_old_dentry);
-       lower_new_dir_dentry = dget_parent(lower_new_dentry);
+
        target_inode = d_inode(new_dentry);
+
        trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+       dget(lower_new_dentry);
        rc = -EINVAL;
        if (lower_old_dentry->d_parent != lower_old_dir_dentry)
                goto out_lock;
@@ -606,11 +631,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (new_dir != old_dir)
                fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry));
 out_lock:
-       unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
-       dput(lower_new_dir_dentry);
-       dput(lower_old_dir_dentry);
        dput(lower_new_dentry);
-       dput(lower_old_dentry);
+       unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
        return rc;
 }
 
index 09bc687..2dd55b1 100644 (file)
@@ -519,26 +519,33 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
                 * inode is actually connected to the parent.
                 */
                err = exportfs_get_name(mnt, target_dir, nbuf, result);
-               if (!err) {
-                       inode_lock(target_dir->d_inode);
-                       nresult = lookup_one_len(nbuf, target_dir,
-                                                strlen(nbuf));
-                       inode_unlock(target_dir->d_inode);
-                       if (!IS_ERR(nresult)) {
-                               if (nresult->d_inode) {
-                                       dput(result);
-                                       result = nresult;
-                               } else
-                                       dput(nresult);
-                       }
+               if (err) {
+                       dput(target_dir);
+                       goto err_result;
                }
 
+               inode_lock(target_dir->d_inode);
+               nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf));
+               if (!IS_ERR(nresult)) {
+                       if (unlikely(nresult->d_inode != result->d_inode)) {
+                               dput(nresult);
+                               nresult = ERR_PTR(-ESTALE);
+                       }
+               }
+               inode_unlock(target_dir->d_inode);
                /*
                 * At this point we are done with the parent, but it's pinned
                 * by the child dentry anyway.
                 */
                dput(target_dir);
 
+               if (IS_ERR(nresult)) {
+                       err = PTR_ERR(nresult);
+                       goto err_result;
+               }
+               dput(result);
+               result = nresult;
+
                /*
                 * And finally make sure the dentry is actually acceptable
                 * to NFSD.
index f9a3899..2c819c3 100644 (file)
@@ -326,6 +326,7 @@ struct io_kiocb {
 #define REQ_F_TIMEOUT          1024    /* timeout request */
 #define REQ_F_ISREG            2048    /* regular file */
 #define REQ_F_MUST_PUNT                4096    /* must be punted even for NONBLOCK */
+#define REQ_F_TIMEOUT_NOSEQ    8192    /* no timeout sequence */
        u64                     user_data;
        u32                     result;
        u32                     sequence;
@@ -453,9 +454,13 @@ static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
        struct io_kiocb *req;
 
        req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list);
-       if (req && !__io_sequence_defer(ctx, req)) {
-               list_del_init(&req->list);
-               return req;
+       if (req) {
+               if (req->flags & REQ_F_TIMEOUT_NOSEQ)
+                       return NULL;
+               if (!__io_sequence_defer(ctx, req)) {
+                       list_del_init(&req->list);
+                       return req;
+               }
        }
 
        return NULL;
@@ -1225,7 +1230,7 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
                }
        }
 
-       return 0;
+       return len;
 }
 
 static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,
@@ -1941,18 +1946,24 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr)))
                return -EFAULT;
 
+       req->flags |= REQ_F_TIMEOUT;
+
        /*
         * sqe->off holds how many events that need to occur for this
-        * timeout event to be satisfied.
+        * timeout event to be satisfied. If it isn't set, then this is
+        * a pure timeout request, sequence isn't used.
         */
        count = READ_ONCE(sqe->off);
-       if (!count)
-               count = 1;
+       if (!count) {
+               req->flags |= REQ_F_TIMEOUT_NOSEQ;
+               spin_lock_irq(&ctx->completion_lock);
+               entry = ctx->timeout_list.prev;
+               goto add;
+       }
 
        req->sequence = ctx->cached_sq_head + count - 1;
        /* reuse it to store the count */
        req->submit.sequence = count;
-       req->flags |= REQ_F_TIMEOUT;
 
        /*
         * Insertion sort, ensuring the first entry in the list is always
@@ -1964,6 +1975,9 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                unsigned nxt_sq_head;
                long long tmp, tmp_nxt;
 
+               if (nxt->flags & REQ_F_TIMEOUT_NOSEQ)
+                       continue;
+
                /*
                 * Since cached_sq_head + count - 1 can overflow, use type long
                 * long to store it.
@@ -1990,6 +2004,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                nxt->sequence++;
        }
        req->sequence -= span;
+add:
        list_add(&req->list, entry);
        spin_unlock_irq(&ctx->completion_lock);
 
@@ -2283,6 +2298,7 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe)
        switch (op) {
        case IORING_OP_NOP:
        case IORING_OP_POLL_REMOVE:
+       case IORING_OP_TIMEOUT:
                return false;
        default:
                return true;
index fe0e9e1..2adfe7b 100644 (file)
@@ -2478,8 +2478,10 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
 
                time64_to_tm(sb->s_time_max, 0, &tm);
 
-               pr_warn("Mounted %s file system at %s supports timestamps until %04ld (0x%llx)\n",
-                       sb->s_type->name, mntpath,
+               pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
+                       sb->s_type->name,
+                       is_mounted(mnt) ? "remounted" : "mounted",
+                       mntpath,
                        tm.tm_year+1900, (unsigned long long)sb->s_time_max);
 
                free_page((unsigned long)buf);
@@ -2764,14 +2766,11 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
        if (IS_ERR(mnt))
                return PTR_ERR(mnt);
 
-       error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
-       if (error < 0) {
-               mntput(mnt);
-               return error;
-       }
-
        mnt_warn_timestamp_expiry(mountpoint, mnt);
 
+       error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
+       if (error < 0)
+               mntput(mnt);
        return error;
 }
 
index e94b197..ce41032 100644 (file)
@@ -25,13 +25,6 @@ static __always_inline int __arch_get_clock_mode(struct timekeeper *tk)
 }
 #endif /* __arch_get_clock_mode */
 
-#ifndef __arch_use_vsyscall
-static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata)
-{
-       return 1;
-}
-#endif /* __arch_use_vsyscall */
-
 #ifndef __arch_update_vsyscall
 static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata,
                                                   struct timekeeper *tk)
index 8339071..e20a0cd 100644 (file)
@@ -65,5 +65,6 @@ extern void can_rx_unregister(struct net *net, struct net_device *dev,
                              void *data);
 
 extern int can_send(struct sk_buff *skb, int loop);
+void can_sock_destruct(struct sock *sk);
 
 #endif /* !_CAN_CORE_H */
index d0633eb..bc6c879 100644 (file)
@@ -59,6 +59,11 @@ extern ssize_t cpu_show_l1tf(struct device *dev,
                             struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_mds(struct device *dev,
                            struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf);
+extern ssize_t cpu_show_itlb_multihit(struct device *dev,
+                                     struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
@@ -213,28 +218,7 @@ static inline int cpuhp_smt_enable(void) { return 0; }
 static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
 #endif
 
-/*
- * These are used for a global "mitigations=" cmdline option for toggling
- * optional CPU mitigations.
- */
-enum cpu_mitigations {
-       CPU_MITIGATIONS_OFF,
-       CPU_MITIGATIONS_AUTO,
-       CPU_MITIGATIONS_AUTO_NOSMT,
-};
-
-extern enum cpu_mitigations cpu_mitigations;
-
-/* mitigations=off */
-static inline bool cpu_mitigations_off(void)
-{
-       return cpu_mitigations == CPU_MITIGATIONS_OFF;
-}
-
-/* mitigations=auto,nosmt */
-static inline bool cpu_mitigations_auto_nosmt(void)
-{
-       return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
-}
+extern bool cpu_mitigations_off(void);
+extern bool cpu_mitigations_auto_nosmt(void);
 
 #endif /* _LINUX_CPU_H_ */
index 719fc3e..d41c521 100644 (file)
@@ -966,6 +966,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
 bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
 
 struct kvm_irq_ack_notifier {
        struct hlist_node link;
@@ -1382,4 +1383,10 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
 }
 #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
 
+typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+                               uintptr_t data, const char *name,
+                               struct task_struct **thread_ptr);
+
 #endif
index 0ebb105..4c75dae 100644 (file)
@@ -119,6 +119,7 @@ extern struct memory_block *find_memory_block(struct mem_section *);
 typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
 extern int walk_memory_blocks(unsigned long start, unsigned long size,
                              void *arg, walk_memory_blocks_func_t func);
+extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
 #define CONFIG_MEM_BLOCK_SIZE  (PAGES_PER_SECTION<<PAGE_SHIFT)
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
index 9326d67..eaae6b4 100644 (file)
@@ -7,7 +7,7 @@
 struct reset_controller_dev;
 
 /**
- * struct reset_control_ops
+ * struct reset_control_ops - reset controller driver callbacks
  *
  * @reset: for self-deasserting resets, does all necessary
  *         things to reset the device
@@ -33,7 +33,7 @@ struct of_phandle_args;
  * @provider: name of the reset controller device controlling this reset line
  * @index: ID of the reset controller in the reset controller device
  * @dev_id: name of the device associated with this reset line
- * @con_id name of the reset line (can be NULL)
+ * @con_id: name of the reset line (can be NULL)
  */
 struct reset_control_lookup {
        struct list_head list;
index e7793fc..eb597e8 100644 (file)
@@ -143,7 +143,7 @@ static inline int device_reset_optional(struct device *dev)
  * If this function is called more than once for the same reset_control it will
  * return -EBUSY.
  *
- * See reset_control_get_shared for details on shared references to
+ * See reset_control_get_shared() for details on shared references to
  * reset-controls.
  *
  * Use of id names is optional.
index 2bc9960..cf97f63 100644 (file)
@@ -86,7 +86,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
                              sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
        ),
 
-       TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s\n",
+       TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s",
                  __entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
                  __entry->saddr_v6, __entry->daddr_v6,
                  show_tcp_state_name(__entry->state))
index b558ea8..ae37fd4 100644 (file)
@@ -421,10 +421,11 @@ enum devlink_attr {
 
        DEVLINK_ATTR_RELOAD_FAILED,                     /* u8 0 or 1 */
 
+       DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,        /* u64 */
+
        DEVLINK_ATTR_NETNS_FD,                  /* u32 */
        DEVLINK_ATTR_NETNS_PID,                 /* u32 */
        DEVLINK_ATTR_NETNS_ID,                  /* u32 */
-
        /* add new attributes above here, update the policy in devlink.c */
 
        __DEVLINK_ATTR_MAX,
index 59e89a1..9dc9d00 100644 (file)
 #define PTP_ENABLE_FEATURE (1<<0)
 #define PTP_RISING_EDGE    (1<<1)
 #define PTP_FALLING_EDGE   (1<<2)
+#define PTP_STRICT_FLAGS   (1<<3)
+#define PTP_EXTTS_EDGES    (PTP_RISING_EDGE | PTP_FALLING_EDGE)
 
 /*
  * flag fields valid for the new PTP_EXTTS_REQUEST2 ioctl.
  */
 #define PTP_EXTTS_VALID_FLAGS  (PTP_ENABLE_FEATURE |   \
                                 PTP_RISING_EDGE |      \
-                                PTP_FALLING_EDGE)
+                                PTP_FALLING_EDGE |     \
+                                PTP_STRICT_FLAGS)
 
 /*
  * flag fields valid for the original PTP_EXTTS_REQUEST ioctl.
index 1f31c2f..4508d5e 100644 (file)
@@ -351,12 +351,12 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
        struct dentry *d = kern_path_locked(watch->path, parent);
        if (IS_ERR(d))
                return PTR_ERR(d);
-       inode_unlock(d_backing_inode(parent->dentry));
        if (d_is_positive(d)) {
                /* update watch filter fields */
                watch->dev = d->d_sb->s_dev;
                watch->ino = d_backing_inode(d)->i_ino;
        }
+       inode_unlock(d_backing_inode(parent->dentry));
        dput(d);
        return 0;
 }
index 080561b..ef4242e 100644 (file)
@@ -2119,11 +2119,12 @@ int cgroup_do_get_tree(struct fs_context *fc)
 
                nsdentry = kernfs_node_dentry(cgrp->kn, sb);
                dput(fc->root);
-               fc->root = nsdentry;
                if (IS_ERR(nsdentry)) {
-                       ret = PTR_ERR(nsdentry);
                        deactivate_locked_super(sb);
+                       ret = PTR_ERR(nsdentry);
+                       nsdentry = NULL;
                }
+               fc->root = nsdentry;
        }
 
        if (!ctx->kfc.new_sb_created)
index fc28e17..e2cad3e 100644 (file)
@@ -2373,7 +2373,18 @@ void __init boot_cpu_hotplug_init(void)
        this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
 }
 
-enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
+/*
+ * These are used for a global "mitigations=" cmdline option for toggling
+ * optional CPU mitigations.
+ */
+enum cpu_mitigations {
+       CPU_MITIGATIONS_OFF,
+       CPU_MITIGATIONS_AUTO,
+       CPU_MITIGATIONS_AUTO_NOSMT,
+};
+
+static enum cpu_mitigations cpu_mitigations __ro_after_init =
+       CPU_MITIGATIONS_AUTO;
 
 static int __init mitigations_parse_cmdline(char *arg)
 {
@@ -2390,3 +2401,17 @@ static int __init mitigations_parse_cmdline(char *arg)
        return 0;
 }
 early_param("mitigations", mitigations_parse_cmdline);
+
+/* mitigations=off */
+bool cpu_mitigations_off(void)
+{
+       return cpu_mitigations == CPU_MITIGATIONS_OFF;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_off);
+
+/* mitigations=auto,nosmt */
+bool cpu_mitigations_auto_nosmt(void)
+{
+       return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
index aec8dba..00a0146 100644 (file)
@@ -1031,7 +1031,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
 {
 }
 
-void
+static inline void
 perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
 {
 }
@@ -10535,6 +10535,15 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
                goto err_ns;
        }
 
+       /*
+        * Disallow uncore-cgroup events, they don't make sense as the cgroup will
+        * be different on other CPUs in the uncore mask.
+        */
+       if (pmu->task_ctx_nr == perf_invalid_context && cgroup_fd != -1) {
+               err = -EINVAL;
+               goto err_pmu;
+       }
+
        if (event->attr.aux_output &&
            !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
                err = -EOPNOTSUPP;
@@ -11323,8 +11332,11 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        int err;
 
        /*
-        * Get the target context (task or percpu):
+        * Grouping is not supported for kernel events, neither is 'AUX',
+        * make sure the caller's intentions are adjusted.
         */
+       if (attr->aux_output)
+               return ERR_PTR(-EINVAL);
 
        event = perf_event_alloc(attr, cpu, task, NULL, NULL,
                                 overflow_handler, context, -1);
@@ -11336,6 +11348,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        /* Mark owner so we could distinguish it from user events. */
        event->owner = TASK_TOMBSTONE;
 
+       /*
+        * Get the target context (task or percpu):
+        */
        ctx = find_get_context(event->pmu, task, event);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
@@ -11787,7 +11802,7 @@ inherit_event(struct perf_event *parent_event,
                                                   GFP_KERNEL);
                if (!child_ctx->task_ctx_data) {
                        free_event(child_event);
-                       return NULL;
+                       return ERR_PTR(-ENOMEM);
                }
        }
 
@@ -11890,7 +11905,7 @@ static int inherit_group(struct perf_event *parent_event,
                if (IS_ERR(child_ctr))
                        return PTR_ERR(child_ctr);
 
-               if (sub->aux_event == parent_event &&
+               if (sub->aux_event == parent_event && child_ctr &&
                    !perf_get_aux_event(child_ctr, leader))
                        return -EINVAL;
        }
index 132672b..dd822fd 100644 (file)
@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);
  * @type:      Type of irqchip_fwnode. See linux/irqdomain.h
  * @name:      Optional user provided domain name
  * @id:                Optional user provided id if name != NULL
- * @data:      Optional user-provided data
+ * @pa:                Optional user-provided physical address
  *
  * Allocate a struct irqchip_fwid, and return a poiner to the embedded
  * fwnode_handle (or NULL on failure).
index dd05a37..0f2eb36 100644 (file)
@@ -1073,6 +1073,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
        task_rq_unlock(rq, p, &rf);
 }
 
+#ifdef CONFIG_UCLAMP_TASK_GROUP
 static inline void
 uclamp_update_active_tasks(struct cgroup_subsys_state *css,
                           unsigned int clamps)
@@ -1091,7 +1092,6 @@ uclamp_update_active_tasks(struct cgroup_subsys_state *css,
        css_task_iter_end(&it);
 }
 
-#ifdef CONFIG_UCLAMP_TASK_GROUP
 static void cpu_util_update_eff(struct cgroup_subsys_state *css);
 static void uclamp_update_root_tg(void)
 {
@@ -3929,13 +3929,22 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
        }
 
 restart:
+#ifdef CONFIG_SMP
        /*
-        * Ensure that we put DL/RT tasks before the pick loop, such that they
-        * can PULL higher prio tasks when we lower the RQ 'priority'.
+        * We must do the balancing pass before put_next_task(), such
+        * that when we release the rq->lock the task is in the same
+        * state as before we took rq->lock.
+        *
+        * We can terminate the balance pass as soon as we know there is
+        * a runnable task of @class priority or higher.
         */
-       prev->sched_class->put_prev_task(rq, prev, rf);
-       if (!rq->nr_running)
-               newidle_balance(rq, rf);
+       for_class_range(class, prev->sched_class, &idle_sched_class) {
+               if (class->balance(rq, prev, rf))
+                       break;
+       }
+#endif
+
+       put_prev_task(rq, prev);
 
        for_each_class(class) {
                p = class->pick_next_task(rq, NULL, NULL);
@@ -6201,7 +6210,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
        for_each_class(class) {
                next = class->pick_next_task(rq, NULL, NULL);
                if (next) {
-                       next->sched_class->put_prev_task(rq, next, NULL);
+                       next->sched_class->put_prev_task(rq, next);
                        return next;
                }
        }
index 2dc4872..a8a0803 100644 (file)
@@ -1691,6 +1691,22 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
        resched_curr(rq);
 }
 
+static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+{
+       if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
+               /*
+                * This is OK, because current is on_cpu, which avoids it being
+                * picked for load-balance and preemption/IRQs are still
+                * disabled avoiding further scheduler activity on it and we've
+                * not yet started the picking loop.
+                */
+               rq_unpin_lock(rq, rf);
+               pull_dl_task(rq);
+               rq_repin_lock(rq, rf);
+       }
+
+       return sched_stop_runnable(rq) || sched_dl_runnable(rq);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -1758,45 +1774,28 @@ static struct task_struct *
 pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
        struct sched_dl_entity *dl_se;
+       struct dl_rq *dl_rq = &rq->dl;
        struct task_struct *p;
-       struct dl_rq *dl_rq;
 
        WARN_ON_ONCE(prev || rf);
 
-       dl_rq = &rq->dl;
-
-       if (unlikely(!dl_rq->dl_nr_running))
+       if (!sched_dl_runnable(rq))
                return NULL;
 
        dl_se = pick_next_dl_entity(rq, dl_rq);
        BUG_ON(!dl_se);
-
        p = dl_task_of(dl_se);
-
        set_next_task_dl(rq, p);
-
        return p;
 }
 
-static void put_prev_task_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
 {
        update_curr_dl(rq);
 
        update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
        if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
                enqueue_pushable_dl_task(rq, p);
-
-       if (rf && !on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
-               /*
-                * This is OK, because current is on_cpu, which avoids it being
-                * picked for load-balance and preemption/IRQs are still
-                * disabled avoiding further scheduler activity on it and we've
-                * not yet started the picking loop.
-                */
-               rq_unpin_lock(rq, rf);
-               pull_dl_task(rq);
-               rq_repin_lock(rq, rf);
-       }
 }
 
 /*
@@ -2442,6 +2441,7 @@ const struct sched_class dl_sched_class = {
        .set_next_task          = set_next_task_dl,
 
 #ifdef CONFIG_SMP
+       .balance                = balance_dl,
        .select_task_rq         = select_task_rq_dl,
        .migrate_task_rq        = migrate_task_rq_dl,
        .set_cpus_allowed       = set_cpus_allowed_dl,
index 682a754..22a2fed 100644 (file)
@@ -6570,6 +6570,15 @@ static void task_dead_fair(struct task_struct *p)
 {
        remove_entity_load_avg(&p->se);
 }
+
+static int
+balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+       if (rq->nr_running)
+               return 1;
+
+       return newidle_balance(rq, rf) != 0;
+}
 #endif /* CONFIG_SMP */
 
 static unsigned long wakeup_gran(struct sched_entity *se)
@@ -6746,7 +6755,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
        int new_tasks;
 
 again:
-       if (!cfs_rq->nr_running)
+       if (!sched_fair_runnable(rq))
                goto idle;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -6884,7 +6893,7 @@ idle:
 /*
  * Account for a descheduled task:
  */
-static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
 {
        struct sched_entity *se = &prev->se;
        struct cfs_rq *cfs_rq;
@@ -10414,11 +10423,11 @@ const struct sched_class fair_sched_class = {
        .check_preempt_curr     = check_preempt_wakeup,
 
        .pick_next_task         = pick_next_task_fair,
-
        .put_prev_task          = put_prev_task_fair,
        .set_next_task          = set_next_task_fair,
 
 #ifdef CONFIG_SMP
+       .balance                = balance_fair,
        .select_task_rq         = select_task_rq_fair,
        .migrate_task_rq        = migrate_task_rq_fair,
 
index 8dad5aa..f65ef1e 100644 (file)
@@ -365,6 +365,12 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
        return task_cpu(p); /* IDLE tasks as never migrated */
 }
+
+static int
+balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+       return WARN_ON_ONCE(1);
+}
 #endif
 
 /*
@@ -375,7 +381,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
        resched_curr(rq);
 }
 
-static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
 {
 }
 
@@ -460,6 +466,7 @@ const struct sched_class idle_sched_class = {
        .set_next_task          = set_next_task_idle,
 
 #ifdef CONFIG_SMP
+       .balance                = balance_idle,
        .select_task_rq         = select_task_rq_idle,
        .set_cpus_allowed       = set_cpus_allowed_common,
 #endif
index ebaa4e6..9b8adc0 100644 (file)
@@ -1469,6 +1469,22 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
        resched_curr(rq);
 }
 
+static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+{
+       if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
+               /*
+                * This is OK, because current is on_cpu, which avoids it being
+                * picked for load-balance and preemption/IRQs are still
+                * disabled avoiding further scheduler activity on it and we've
+                * not yet started the picking loop.
+                */
+               rq_unpin_lock(rq, rf);
+               pull_rt_task(rq);
+               rq_repin_lock(rq, rf);
+       }
+
+       return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
+}
 #endif /* CONFIG_SMP */
 
 /*
@@ -1552,21 +1568,18 @@ static struct task_struct *
 pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
        struct task_struct *p;
-       struct rt_rq *rt_rq = &rq->rt;
 
        WARN_ON_ONCE(prev || rf);
 
-       if (!rt_rq->rt_queued)
+       if (!sched_rt_runnable(rq))
                return NULL;
 
        p = _pick_next_task_rt(rq);
-
        set_next_task_rt(rq, p);
-
        return p;
 }
 
-static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
        update_curr_rt(rq);
 
@@ -1578,18 +1591,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_fla
         */
        if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
                enqueue_pushable_task(rq, p);
-
-       if (rf && !on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
-               /*
-                * This is OK, because current is on_cpu, which avoids it being
-                * picked for load-balance and preemption/IRQs are still
-                * disabled avoiding further scheduler activity on it and we've
-                * not yet started the picking loop.
-                */
-               rq_unpin_lock(rq, rf);
-               pull_rt_task(rq);
-               rq_repin_lock(rq, rf);
-       }
 }
 
 #ifdef CONFIG_SMP
@@ -2366,8 +2367,8 @@ const struct sched_class rt_sched_class = {
        .set_next_task          = set_next_task_rt,
 
 #ifdef CONFIG_SMP
+       .balance                = balance_rt,
        .select_task_rq         = select_task_rq_rt,
-
        .set_cpus_allowed       = set_cpus_allowed_common,
        .rq_online              = rq_online_rt,
        .rq_offline             = rq_offline_rt,
index 0db2c1b..c8870c5 100644 (file)
@@ -1727,10 +1727,11 @@ struct sched_class {
        struct task_struct * (*pick_next_task)(struct rq *rq,
                                               struct task_struct *prev,
                                               struct rq_flags *rf);
-       void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf);
+       void (*put_prev_task)(struct rq *rq, struct task_struct *p);
        void (*set_next_task)(struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
+       int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
        int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
        void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
 
@@ -1773,7 +1774,7 @@ struct sched_class {
 static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
        WARN_ON_ONCE(rq->curr != prev);
-       prev->sched_class->put_prev_task(rq, prev, NULL);
+       prev->sched_class->put_prev_task(rq, prev);
 }
 
 static inline void set_next_task(struct rq *rq, struct task_struct *next)
@@ -1787,8 +1788,12 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
 #else
 #define sched_class_highest (&dl_sched_class)
 #endif
+
+#define for_class_range(class, _from, _to) \
+       for (class = (_from); class != (_to); class = class->next)
+
 #define for_each_class(class) \
-   for (class = sched_class_highest; class; class = class->next)
+       for_class_range(class, sched_class_highest, NULL)
 
 extern const struct sched_class stop_sched_class;
 extern const struct sched_class dl_sched_class;
@@ -1796,6 +1801,25 @@ extern const struct sched_class rt_sched_class;
 extern const struct sched_class fair_sched_class;
 extern const struct sched_class idle_sched_class;
 
+static inline bool sched_stop_runnable(struct rq *rq)
+{
+       return rq->stop && task_on_rq_queued(rq->stop);
+}
+
+static inline bool sched_dl_runnable(struct rq *rq)
+{
+       return rq->dl.dl_nr_running > 0;
+}
+
+static inline bool sched_rt_runnable(struct rq *rq)
+{
+       return rq->rt.rt_queued > 0;
+}
+
+static inline bool sched_fair_runnable(struct rq *rq)
+{
+       return rq->cfs.nr_running > 0;
+}
 
 #ifdef CONFIG_SMP
 
index 7e1cee4..c064073 100644 (file)
@@ -15,6 +15,12 @@ select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
        return task_cpu(p); /* stop tasks as never migrate */
 }
+
+static int
+balance_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+       return sched_stop_runnable(rq);
+}
 #endif /* CONFIG_SMP */
 
 static void
@@ -31,16 +37,13 @@ static void set_next_task_stop(struct rq *rq, struct task_struct *stop)
 static struct task_struct *
 pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
-       struct task_struct *stop = rq->stop;
-
        WARN_ON_ONCE(prev || rf);
 
-       if (!stop || !task_on_rq_queued(stop))
+       if (!sched_stop_runnable(rq))
                return NULL;
 
-       set_next_task_stop(rq, stop);
-
-       return stop;
+       set_next_task_stop(rq, rq->stop);
+       return rq->stop;
 }
 
 static void
@@ -60,7 +63,7 @@ static void yield_task_stop(struct rq *rq)
        BUG(); /* the stop task should never yield, its pointless. */
 }
 
-static void put_prev_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
 {
        struct task_struct *curr = rq->curr;
        u64 delta_exec;
@@ -129,6 +132,7 @@ const struct sched_class stop_sched_class = {
        .set_next_task          = set_next_task_stop,
 
 #ifdef CONFIG_SMP
+       .balance                = balance_stop,
        .select_task_rq         = select_task_rq_stop,
        .set_cpus_allowed       = set_cpus_allowed_common,
 #endif
index c4da1ef..bcd46f5 100644 (file)
@@ -2205,8 +2205,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
                 */
                preempt_disable();
                read_unlock(&tasklist_lock);
-               preempt_enable_no_resched();
                cgroup_enter_frozen();
+               preempt_enable_no_resched();
                freezable_schedule();
                cgroup_leave_frozen(true);
        } else {
index 6d1f68b..c9ea7eb 100644 (file)
@@ -141,7 +141,8 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
        struct stacktrace_cookie c = {
                .store  = store,
                .size   = size,
-               .skip   = skipnr + 1,
+               /* skip this function if they are tracing us */
+               .skip   = skipnr + !!(current == tsk),
        };
 
        if (!try_get_task_stack(tsk))
@@ -298,7 +299,8 @@ unsigned int stack_trace_save_tsk(struct task_struct *task,
        struct stack_trace trace = {
                .entries        = store,
                .max_entries    = size,
-               .skip           = skipnr + 1,
+               /* skip this function if they are tracing us */
+               .skip   = skipnr + !!(current == task),
        };
 
        save_stack_trace_tsk(task, &trace);
index 65eb796..069ca78 100644 (file)
@@ -771,7 +771,7 @@ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
        /* fill PPS status fields */
        pps_fill_timex(txc);
 
-       txc->time.tv_sec = (time_t)ts->tv_sec;
+       txc->time.tv_sec = ts->tv_sec;
        txc->time.tv_usec = ts->tv_nsec;
        if (!(time_status & STA_NANO))
                txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC;
index 4bc37ac..5ee0f77 100644 (file)
@@ -110,8 +110,7 @@ void update_vsyscall(struct timekeeper *tk)
        nsec            = nsec + tk->wall_to_monotonic.tv_nsec;
        vdso_ts->sec    += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec);
 
-       if (__arch_use_vsyscall(vdata))
-               update_vdso_data(vdata, tk);
+       update_vdso_data(vdata, tk);
 
        __arch_update_vsyscall(vdata, tk);
 
@@ -124,10 +123,8 @@ void update_vsyscall_tz(void)
 {
        struct vdso_data *vdata = __arch_get_k_vdso_data();
 
-       if (__arch_use_vsyscall(vdata)) {
-               vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
-               vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
-       }
+       vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
+       vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
 
        __arch_sync_vdso_data(vdata);
 }
index 183f92a..3321d04 100644 (file)
@@ -447,7 +447,6 @@ config ASSOCIATIVE_ARRAY
 config HAS_IOMEM
        bool
        depends on !NO_IOMEM
-       select GENERIC_IO
        default y
 
 config HAS_IOPORT_MAP
index 08c3c80..156f26f 100644 (file)
@@ -1146,6 +1146,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
 
                if (DEC_IS_DYNALLOC(s->dict.mode)) {
                        if (s->dict.allocated < s->dict.size) {
+                               s->dict.allocated = s->dict.size;
                                vfree(s->dict.buf);
                                s->dict.buf = vmalloc(s->dict.size);
                                if (s->dict.buf == NULL) {
index 8345bb6..0461df1 100644 (file)
@@ -67,28 +67,31 @@ void __dump_page(struct page *page, const char *reason)
         */
        mapcount = PageSlab(page) ? 0 : page_mapcount(page);
 
-       pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx",
-                 page, page_ref_count(page), mapcount,
-                 page->mapping, page_to_pgoff(page));
        if (PageCompound(page))
-               pr_cont(" compound_mapcount: %d", compound_mapcount(page));
-       pr_cont("\n");
-       if (PageAnon(page))
-               pr_warn("anon ");
-       else if (PageKsm(page))
-               pr_warn("ksm ");
+               pr_warn("page:%px refcount:%d mapcount:%d mapping:%px "
+                       "index:%#lx compound_mapcount: %d\n",
+                       page, page_ref_count(page), mapcount,
+                       page->mapping, page_to_pgoff(page),
+                       compound_mapcount(page));
+       else
+               pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx\n",
+                       page, page_ref_count(page), mapcount,
+                       page->mapping, page_to_pgoff(page));
+       if (PageKsm(page))
+               pr_warn("ksm flags: %#lx(%pGp)\n", page->flags, &page->flags);
+       else if (PageAnon(page))
+               pr_warn("anon flags: %#lx(%pGp)\n", page->flags, &page->flags);
        else if (mapping) {
-               pr_warn("%ps ", mapping->a_ops);
                if (mapping->host && mapping->host->i_dentry.first) {
                        struct dentry *dentry;
                        dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
-                       pr_warn("name:\"%pd\" ", dentry);
-               }
+                       pr_warn("%ps name:\"%pd\"\n", mapping->a_ops, dentry);
+               } else
+                       pr_warn("%ps\n", mapping->a_ops);
+               pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags);
        }
        BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
 
-       pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags);
-
 hex_only:
        print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
                        sizeof(unsigned long), page,
index f1930fa..2ac38bd 100644 (file)
@@ -196,7 +196,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 again:
        rcu_read_lock();
        h_cg = hugetlb_cgroup_from_task(current);
-       if (!css_tryget_online(&h_cg->css)) {
+       if (!css_tryget(&h_cg->css)) {
                rcu_read_unlock();
                goto again;
        }
index f05d27b..a8a57be 100644 (file)
@@ -1602,17 +1602,6 @@ static void collapse_file(struct mm_struct *mm,
                                        result = SCAN_FAIL;
                                        goto xa_unlocked;
                                }
-                       } else if (!PageUptodate(page)) {
-                               xas_unlock_irq(&xas);
-                               wait_on_page_locked(page);
-                               if (!trylock_page(page)) {
-                                       result = SCAN_PAGE_LOCK;
-                                       goto xa_unlocked;
-                               }
-                               get_page(page);
-                       } else if (PageDirty(page)) {
-                               result = SCAN_FAIL;
-                               goto xa_locked;
                        } else if (trylock_page(page)) {
                                get_page(page);
                                xas_unlock_irq(&xas);
@@ -1627,7 +1616,12 @@ static void collapse_file(struct mm_struct *mm,
                 * without racing with truncate.
                 */
                VM_BUG_ON_PAGE(!PageLocked(page), page);
-               VM_BUG_ON_PAGE(!PageUptodate(page), page);
+
+               /* make sure the page is up to date */
+               if (unlikely(!PageUptodate(page))) {
+                       result = SCAN_FAIL;
+                       goto out_unlock;
+               }
 
                /*
                 * If file was truncated then extended, or hole-punched, before
@@ -1643,6 +1637,16 @@ static void collapse_file(struct mm_struct *mm,
                        goto out_unlock;
                }
 
+               if (!is_shmem && PageDirty(page)) {
+                       /*
+                        * khugepaged only works on read-only fd, so this
+                        * page is dirty because it hasn't been flushed
+                        * since first write.
+                        */
+                       result = SCAN_FAIL;
+                       goto out_unlock;
+               }
+
                if (isolate_lru_page(page)) {
                        result = SCAN_DEL_PAGE_LRU;
                        goto out_unlock;
index 2be9f3f..94c343b 100644 (file)
@@ -363,8 +363,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
                ClearPageReferenced(page);
                test_and_clear_page_young(page);
                if (pageout) {
-                       if (!isolate_lru_page(page))
-                               list_add(&page->lru, &page_list);
+                       if (!isolate_lru_page(page)) {
+                               if (PageUnevictable(page))
+                                       putback_lru_page(page);
+                               else
+                                       list_add(&page->lru, &page_list);
+                       }
                } else
                        deactivate_page(page);
 huge_unlock:
@@ -441,8 +445,12 @@ regular_page:
                ClearPageReferenced(page);
                test_and_clear_page_young(page);
                if (pageout) {
-                       if (!isolate_lru_page(page))
-                               list_add(&page->lru, &page_list);
+                       if (!isolate_lru_page(page)) {
+                               if (PageUnevictable(page))
+                                       putback_lru_page(page);
+                               else
+                                       list_add(&page->lru, &page_list);
+                       }
                } else
                        deactivate_page(page);
        }
index 37592dd..46ad252 100644 (file)
@@ -960,7 +960,7 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
                        if (unlikely(!memcg))
                                memcg = root_mem_cgroup;
                }
-       } while (!css_tryget_online(&memcg->css));
+       } while (!css_tryget(&memcg->css));
        rcu_read_unlock();
        return memcg;
 }
index 07e5c67..3b62a9f 100644 (file)
@@ -1646,6 +1646,18 @@ static int check_cpu_on_node(pg_data_t *pgdat)
        return 0;
 }
 
+static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg)
+{
+       int nid = *(int *)arg;
+
+       /*
+        * If a memory block belongs to multiple nodes, the stored nid is not
+        * reliable. However, such blocks are always online (e.g., cannot get
+        * offlined) and, therefore, are still spanned by the node.
+        */
+       return mem->nid == nid ? -EEXIST : 0;
+}
+
 /**
  * try_offline_node
  * @nid: the node ID
@@ -1658,25 +1670,24 @@ static int check_cpu_on_node(pg_data_t *pgdat)
 void try_offline_node(int nid)
 {
        pg_data_t *pgdat = NODE_DATA(nid);
-       unsigned long start_pfn = pgdat->node_start_pfn;
-       unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
-       unsigned long pfn;
-
-       for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
-               unsigned long section_nr = pfn_to_section_nr(pfn);
-
-               if (!present_section_nr(section_nr))
-                       continue;
+       int rc;
 
-               if (pfn_to_nid(pfn) != nid)
-                       continue;
+       /*
+        * If the node still spans pages (especially ZONE_DEVICE), don't
+        * offline it. A node spans memory after move_pfn_range_to_zone(),
+        * e.g., after the memory block was onlined.
+        */
+       if (pgdat->node_spanned_pages)
+               return;
 
-               /*
-                * some memory sections of this node are not removed, and we
-                * can't offline node now.
-                */
+       /*
+        * Especially offline memory blocks might not be spanned by the
+        * node. They will get spanned by the node once they get onlined.
+        * However, they link to the node in sysfs and can get onlined later.
+        */
+       rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb);
+       if (rc)
                return;
-       }
 
        if (check_cpu_on_node(pgdat))
                return;
index 4ae967b..e08c941 100644 (file)
@@ -672,7 +672,9 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
  * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
  *     specified.
  * 0 - queue pages successfully or no misplaced page.
- * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified.
+ * errno - i.e. misplaced pages with MPOL_MF_STRICT specified (-EIO) or
+ *         memory range specified by nodemask and maxnode points outside
+ *         your accessible address space (-EFAULT)
  */
 static int
 queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
@@ -1286,7 +1288,7 @@ static long do_mbind(unsigned long start, unsigned long len,
                          flags | MPOL_MF_INVERT, &pagelist);
 
        if (ret < 0) {
-               err = -EIO;
+               err = ret;
                goto up_out;
        }
 
@@ -1305,10 +1307,12 @@ static long do_mbind(unsigned long start, unsigned long len,
 
                if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
                        err = -EIO;
-       } else
-               putback_movable_pages(&pagelist);
-
+       } else {
 up_out:
+               if (!list_empty(&pagelist))
+                       putback_movable_pages(&pagelist);
+       }
+
        up_write(&mm->mmap_sem);
 mpol_out:
        mpol_put(new);
index 24ee600..60a66a5 100644 (file)
@@ -73,6 +73,7 @@ static void swap_slot_free_notify(struct page *page)
 {
        struct swap_info_struct *sis;
        struct gendisk *disk;
+       swp_entry_t entry;
 
        /*
         * There is no guarantee that the page is in swap cache - the software
@@ -104,11 +105,10 @@ static void swap_slot_free_notify(struct page *page)
         * we again wish to reclaim it.
         */
        disk = sis->bdev->bd_disk;
-       if (disk->fops->swap_slot_free_notify) {
-               swp_entry_t entry;
+       entry.val = page_private(page);
+       if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
                unsigned long offset;
 
-               entry.val = page_private(page);
                offset = swp_offset(entry);
 
                SetPageDirty(page);
index b25c807..e72e802 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1433,12 +1433,15 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
        void *old_tail = *tail ? *tail : *head;
        int rsize;
 
-       if (slab_want_init_on_free(s)) {
-               void *p = NULL;
+       /* Head and tail of the reconstructed freelist */
+       *head = NULL;
+       *tail = NULL;
 
-               do {
-                       object = next;
-                       next = get_freepointer(s, object);
+       do {
+               object = next;
+               next = get_freepointer(s, object);
+
+               if (slab_want_init_on_free(s)) {
                        /*
                         * Clear the object and the metadata, but don't touch
                         * the redzone.
@@ -1448,29 +1451,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
                                                           : 0;
                        memset((char *)object + s->inuse, 0,
                               s->size - s->inuse - rsize);
-                       set_freepointer(s, object, p);
-                       p = object;
-               } while (object != old_tail);
-       }
-
-/*
- * Compiler cannot detect this function can be removed if slab_free_hook()
- * evaluates to nothing.  Thus, catch all relevant config debug options here.
- */
-#if defined(CONFIG_LOCKDEP)    ||              \
-       defined(CONFIG_DEBUG_KMEMLEAK) ||       \
-       defined(CONFIG_DEBUG_OBJECTS_FREE) ||   \
-       defined(CONFIG_KASAN)
 
-       next = *head;
-
-       /* Head and tail of the reconstructed freelist */
-       *head = NULL;
-       *tail = NULL;
-
-       do {
-               object = next;
-               next = get_freepointer(s, object);
+               }
                /* If object's reuse doesn't have to be delayed */
                if (!slab_free_hook(s, object)) {
                        /* Move object to the new freelist */
@@ -1485,9 +1467,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
                *tail = NULL;
 
        return *head != NULL;
-#else
-       return true;
-#endif
 }
 
 static void *setup_object(struct kmem_cache *s, struct page *page,
index 5518a7d..128d37a 100644 (file)
@@ -86,11 +86,12 @@ static atomic_t skbcounter = ATOMIC_INIT(0);
 
 /* af_can socket functions */
 
-static void can_sock_destruct(struct sock *sk)
+void can_sock_destruct(struct sock *sk)
 {
        skb_queue_purge(&sk->sk_receive_queue);
        skb_queue_purge(&sk->sk_error_queue);
 }
+EXPORT_SYMBOL(can_sock_destruct);
 
 static const struct can_proto *can_get_proto(int protocol)
 {
index def2f81..137054b 100644 (file)
@@ -51,6 +51,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
        if (!skb)
                return;
 
+       j1939_priv_get(priv);
        can_skb_set_owner(skb, iskb->sk);
 
        /* get a pointer to the header of the skb
@@ -104,6 +105,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
        j1939_simple_recv(priv, skb);
        j1939_sk_recv(priv, skb);
  done:
+       j1939_priv_put(priv);
        kfree_skb(skb);
 }
 
@@ -150,6 +152,10 @@ static void __j1939_priv_release(struct kref *kref)
 
        netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv);
 
+       WARN_ON_ONCE(!list_empty(&priv->active_session_list));
+       WARN_ON_ONCE(!list_empty(&priv->ecus));
+       WARN_ON_ONCE(!list_empty(&priv->j1939_socks));
+
        dev_put(ndev);
        kfree(priv);
 }
@@ -207,6 +213,9 @@ static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
 {
        struct can_ml_priv *can_ml_priv = ndev->ml_priv;
 
+       if (!can_ml_priv)
+               return NULL;
+
        return can_ml_priv->j1939_priv;
 }
 
index 4d8ba70..de09b0a 100644 (file)
@@ -78,7 +78,6 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
 {
        jsk->state |= J1939_SOCK_BOUND;
        j1939_priv_get(priv);
-       jsk->priv = priv;
 
        spin_lock_bh(&priv->j1939_socks_lock);
        list_add_tail(&jsk->list, &priv->j1939_socks);
@@ -91,7 +90,6 @@ static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
        list_del_init(&jsk->list);
        spin_unlock_bh(&priv->j1939_socks_lock);
 
-       jsk->priv = NULL;
        j1939_priv_put(priv);
        jsk->state &= ~J1939_SOCK_BOUND;
 }
@@ -349,6 +347,34 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
        spin_unlock_bh(&priv->j1939_socks_lock);
 }
 
+static void j1939_sk_sock_destruct(struct sock *sk)
+{
+       struct j1939_sock *jsk = j1939_sk(sk);
+
+       /* This function will be call by the generic networking code, when then
+        * the socket is ultimately closed (sk->sk_destruct).
+        *
+        * The race between
+        * - processing a received CAN frame
+        *   (can_receive -> j1939_can_recv)
+        *   and accessing j1939_priv
+        * ... and ...
+        * - closing a socket
+        *   (j1939_can_rx_unregister -> can_rx_unregister)
+        *   and calling the final j1939_priv_put()
+        *
+        * is avoided by calling the final j1939_priv_put() from this
+        * RCU deferred cleanup call.
+        */
+       if (jsk->priv) {
+               j1939_priv_put(jsk->priv);
+               jsk->priv = NULL;
+       }
+
+       /* call generic CAN sock destruct */
+       can_sock_destruct(sk);
+}
+
 static int j1939_sk_init(struct sock *sk)
 {
        struct j1939_sock *jsk = j1939_sk(sk);
@@ -371,6 +397,7 @@ static int j1939_sk_init(struct sock *sk)
        atomic_set(&jsk->skb_pending, 0);
        spin_lock_init(&jsk->sk_session_queue_lock);
        INIT_LIST_HEAD(&jsk->sk_session_queue);
+       sk->sk_destruct = j1939_sk_sock_destruct;
 
        return 0;
 }
@@ -443,6 +470,12 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
                }
 
                jsk->ifindex = addr->can_ifindex;
+
+               /* the corresponding j1939_priv_put() is called via
+                * sk->sk_destruct, which points to j1939_sk_sock_destruct()
+                */
+               j1939_priv_get(priv);
+               jsk->priv = priv;
        }
 
        /* set default transmit pgn */
@@ -560,8 +593,8 @@ static int j1939_sk_release(struct socket *sock)
        if (!sk)
                return 0;
 
-       jsk = j1939_sk(sk);
        lock_sock(sk);
+       jsk = j1939_sk(sk);
 
        if (jsk->state & J1939_SOCK_BOUND) {
                struct j1939_priv *priv = jsk->priv;
@@ -1059,51 +1092,72 @@ static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg,
 {
        struct sock *sk = sock->sk;
        struct j1939_sock *jsk = j1939_sk(sk);
-       struct j1939_priv *priv = jsk->priv;
+       struct j1939_priv *priv;
        int ifindex;
        int ret;
 
+       lock_sock(sock->sk);
        /* various socket state tests */
-       if (!(jsk->state & J1939_SOCK_BOUND))
-               return -EBADFD;
+       if (!(jsk->state & J1939_SOCK_BOUND)) {
+               ret = -EBADFD;
+               goto sendmsg_done;
+       }
 
+       priv = jsk->priv;
        ifindex = jsk->ifindex;
 
-       if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR)
+       if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) {
                /* no source address assigned yet */
-               return -EBADFD;
+               ret = -EBADFD;
+               goto sendmsg_done;
+       }
 
        /* deal with provided destination address info */
        if (msg->msg_name) {
                struct sockaddr_can *addr = msg->msg_name;
 
-               if (msg->msg_namelen < J1939_MIN_NAMELEN)
-                       return -EINVAL;
+               if (msg->msg_namelen < J1939_MIN_NAMELEN) {
+                       ret = -EINVAL;
+                       goto sendmsg_done;
+               }
 
-               if (addr->can_family != AF_CAN)
-                       return -EINVAL;
+               if (addr->can_family != AF_CAN) {
+                       ret = -EINVAL;
+                       goto sendmsg_done;
+               }
 
-               if (addr->can_ifindex && addr->can_ifindex != ifindex)
-                       return -EBADFD;
+               if (addr->can_ifindex && addr->can_ifindex != ifindex) {
+                       ret = -EBADFD;
+                       goto sendmsg_done;
+               }
 
                if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
-                   !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
-                       return -EINVAL;
+                   !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) {
+                       ret = -EINVAL;
+                       goto sendmsg_done;
+               }
 
                if (!addr->can_addr.j1939.name &&
                    addr->can_addr.j1939.addr == J1939_NO_ADDR &&
-                   !sock_flag(sk, SOCK_BROADCAST))
+                   !sock_flag(sk, SOCK_BROADCAST)) {
                        /* broadcast, but SO_BROADCAST not set */
-                       return -EACCES;
+                       ret = -EACCES;
+                       goto sendmsg_done;
+               }
        } else {
                if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR &&
-                   !sock_flag(sk, SOCK_BROADCAST))
+                   !sock_flag(sk, SOCK_BROADCAST)) {
                        /* broadcast, but SO_BROADCAST not set */
-                       return -EACCES;
+                       ret = -EACCES;
+                       goto sendmsg_done;
+               }
        }
 
        ret = j1939_sk_send_loop(priv, sk, msg, size);
 
+sendmsg_done:
+       release_sock(sock->sk);
+
        return ret;
 }
 
index e5f1a56..9f99af5 100644 (file)
@@ -255,6 +255,7 @@ static void __j1939_session_drop(struct j1939_session *session)
                return;
 
        j1939_sock_pending_del(session->sk);
+       sock_put(session->sk);
 }
 
 static void j1939_session_destroy(struct j1939_session *session)
@@ -266,6 +267,9 @@ static void j1939_session_destroy(struct j1939_session *session)
 
        netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
 
+       WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry));
+       WARN_ON_ONCE(!list_empty(&session->active_session_list_entry));
+
        skb_queue_purge(&session->skb_queue);
        __j1939_session_drop(session);
        j1939_priv_put(session->priv);
@@ -1042,12 +1046,13 @@ j1939_session_deactivate_activate_next(struct j1939_session *session)
                j1939_sk_queue_activate_next(session);
 }
 
-static void j1939_session_cancel(struct j1939_session *session,
+static void __j1939_session_cancel(struct j1939_session *session,
                                 enum j1939_xtp_abort err)
 {
        struct j1939_priv *priv = session->priv;
 
        WARN_ON_ONCE(!err);
+       lockdep_assert_held(&session->priv->active_session_list_lock);
 
        session->err = j1939_xtp_abort_to_errno(priv, err);
        /* do not send aborts on incoming broadcasts */
@@ -1062,6 +1067,20 @@ static void j1939_session_cancel(struct j1939_session *session,
                j1939_sk_send_loop_abort(session->sk, session->err);
 }
 
+static void j1939_session_cancel(struct j1939_session *session,
+                                enum j1939_xtp_abort err)
+{
+       j1939_session_list_lock(session->priv);
+
+       if (session->state >= J1939_SESSION_ACTIVE &&
+           session->state < J1939_SESSION_WAITING_ABORT) {
+               j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+               __j1939_session_cancel(session, err);
+       }
+
+       j1939_session_list_unlock(session->priv);
+}
+
 static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
 {
        struct j1939_session *session =
@@ -1108,8 +1127,6 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
                netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n",
                             __func__, session, ret);
                if (session->skcb.addr.type != J1939_SIMPLE) {
-                       j1939_tp_set_rxtimeout(session,
-                                              J1939_XTP_ABORT_TIMEOUT_MS);
                        j1939_session_cancel(session, J1939_XTP_ABORT_OTHER);
                } else {
                        session->err = ret;
@@ -1169,7 +1186,7 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
                        hrtimer_start(&session->rxtimer,
                                      ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
                                      HRTIMER_MODE_REL_SOFT);
-                       j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
+                       __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
                }
                j1939_session_list_unlock(session->priv);
        }
@@ -1375,7 +1392,6 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
 
  out_session_cancel:
        j1939_session_timers_cancel(session);
-       j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
        j1939_session_cancel(session, err);
 }
 
@@ -1572,7 +1588,6 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
 
                /* RTS on active session */
                j1939_session_timers_cancel(session);
-               j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
                j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
        }
 
@@ -1583,7 +1598,6 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
                             session->last_cmd);
 
                j1939_session_timers_cancel(session);
-               j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
                j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
 
                return -EBUSY;
@@ -1785,7 +1799,6 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 
  out_session_cancel:
        j1939_session_timers_cancel(session);
-       j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
        j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
        j1939_session_put(session);
 }
@@ -1866,6 +1879,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
                return ERR_PTR(-ENOMEM);
 
        /* skb is recounted in j1939_session_new() */
+       sock_hold(skb->sk);
        session->sk = skb->sk;
        session->transmission = true;
        session->pkt.total = (size + 6) / 7;
@@ -2028,7 +2042,11 @@ int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk)
                                 &priv->active_session_list,
                                 active_session_list_entry) {
                if (!sk || sk == session->sk) {
-                       j1939_session_timers_cancel(session);
+                       if (hrtimer_try_to_cancel(&session->txtimer) == 1)
+                               j1939_session_put(session);
+                       if (hrtimer_try_to_cancel(&session->rxtimer) == 1)
+                               j1939_session_put(session);
+
                        session->err = ESHUTDOWN;
                        j1939_session_deactivate_locked(session);
                }
index 1338f5f..4c63c9a 100644 (file)
@@ -2812,7 +2812,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
        struct net *dest_net = NULL;
        int err;
 
-       if (!devlink_reload_supported(devlink))
+       if (!devlink_reload_supported(devlink) || !devlink->reload_enabled)
                return -EOPNOTSUPP;
 
        err = devlink_resources_validate(devlink, NULL, info);
@@ -4747,6 +4747,7 @@ struct devlink_health_reporter {
        bool auto_recover;
        u8 health_state;
        u64 dump_ts;
+       u64 dump_real_ts;
        u64 error_count;
        u64 recovery_count;
        u64 last_recovery_ts;
@@ -4923,6 +4924,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
                goto dump_err;
 
        reporter->dump_ts = jiffies;
+       reporter->dump_real_ts = ktime_get_real_ns();
 
        return 0;
 
@@ -5072,6 +5074,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
                              jiffies_to_msecs(reporter->dump_ts),
                              DEVLINK_ATTR_PAD))
                goto reporter_nest_cancel;
+       if (reporter->dump_fmsg &&
+           nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+                             reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+               goto reporter_nest_cancel;
 
        nla_nest_end(msg, reporter_attr);
        genlmsg_end(msg, hdr);
index 73632d2..2fb6c26 100644 (file)
@@ -105,7 +105,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
        slave = dsa_to_port(ds, port)->slave;
 
        err = br_vlan_get_pvid(slave, &pvid);
-       if (err < 0)
+       if (!pvid || err < 0)
                /* There is no pvid on the bridge for this port, which is
                 * perfectly valid. Nothing to restore, bye-bye!
                 */
index 440294b..6e68def 100644 (file)
@@ -2291,7 +2291,8 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
                        rcu_read_unlock();
                        return -ENODEV;
                }
-               skb2 = skb_clone(skb, GFP_ATOMIC);
+
+               skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
                if (!skb2) {
                        read_unlock(&mrt_lock);
                        rcu_read_unlock();
index 9d4f75e..e705674 100644 (file)
@@ -81,6 +81,11 @@ static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
        if (!pskb_may_pull(skb, srhoff + len))
                return NULL;
 
+       /* note that pskb_may_pull may change pointers in header;
+        * for this reason it is necessary to reload them when needed.
+        */
+       srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
        if (!seg6_validate_srh(srh, len))
                return NULL;
 
@@ -336,6 +341,8 @@ static int input_action_end_dx6(struct sk_buff *skb,
        if (!ipv6_addr_any(&slwt->nh6))
                nhaddr = &slwt->nh6;
 
+       skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
        seg6_lookup_nexthop(skb, nhaddr, 0);
 
        return dst_input(skb);
@@ -365,6 +372,8 @@ static int input_action_end_dx4(struct sk_buff *skb,
 
        skb_dst_drop(skb);
 
+       skb_set_transport_header(skb, sizeof(struct iphdr));
+
        err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
        if (err)
                goto drop;
@@ -385,6 +394,8 @@ static int input_action_end_dt6(struct sk_buff *skb,
        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
                goto drop;
 
+       skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
        seg6_lookup_nexthop(skb, NULL, slwt->table);
 
        return dst_input(skb);
index 6b345c8..c71f432 100644 (file)
@@ -513,6 +513,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        struct ib_qp_init_attr attr;
        struct ib_cq_init_attr cq_attr = {};
        struct rds_ib_device *rds_ibdev;
+       unsigned long max_wrs;
        int ret, fr_queue_space;
        struct dma_pool *pool;
 
@@ -533,10 +534,15 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
        /* add the conn now so that connection establishment has the dev */
        rds_ib_add_conn(rds_ibdev, conn);
 
-       if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
-               rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
-       if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1)
-               rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1);
+       max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ?
+               rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr;
+       if (ic->i_send_ring.w_nr != max_wrs)
+               rds_ib_ring_resize(&ic->i_send_ring, max_wrs);
+
+       max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ?
+               rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr;
+       if (ic->i_recv_ring.w_nr != max_wrs)
+               rds_ib_ring_resize(&ic->i_recv_ring, max_wrs);
 
        /* Protection domain and memory range */
        ic->i_pd = rds_ibdev->pd;
@@ -1176,8 +1182,9 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
        ic->i_flowctl = 0;
        atomic_set(&ic->i_credits, 0);
 
-       rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
-       rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
+       /* Re-init rings, but retain sizes. */
+       rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr);
+       rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr);
 
        if (ic->i_ibinc) {
                rds_inc_put(&ic->i_ibinc->ii_inc);
@@ -1224,8 +1231,8 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
         * rds_ib_conn_shutdown() waits for these to be emptied so they
         * must be initialized before it can be called.
         */
-       rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
-       rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
+       rds_ib_ring_init(&ic->i_send_ring, 0);
+       rds_ib_ring_init(&ic->i_recv_ring, 0);
 
        ic->conn = conn;
        conn->c_transport_data = ic;
index cde4dc0..b997072 100644 (file)
@@ -799,6 +799,7 @@ static void smc_connect_work(struct work_struct *work)
                        smc->sk.sk_err = EPIPE;
                else if (signal_pending(current))
                        smc->sk.sk_err = -sock_intr_errno(timeo);
+               sock_put(&smc->sk); /* passive closing */
                goto out;
        }
 
@@ -1736,7 +1737,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
        case TCP_FASTOPEN_KEY:
        case TCP_FASTOPEN_NO_COOKIE:
                /* option not supported by SMC */
-               if (sk->sk_state == SMC_INIT) {
+               if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
                        smc_switch_to_fallback(smc);
                        smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
                } else {
index fc01a13..7532a00 100644 (file)
@@ -34,8 +34,6 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
 #include "core.h"
 #include "name_table.h"
 #include "subscr.h"
index 775848a..631d83c 100644 (file)
 #include <net/genetlink.h>
 #include <net/netns/hash.h>
 
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 struct tipc_node;
 struct tipc_bearer;
 struct tipc_bc_base;
index 9b599ed..2c86a2f 100644 (file)
@@ -480,6 +480,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
                        else
                                XFRM_INC_STATS(net,
                                               LINUX_MIB_XFRMINSTATEINVALID);
+
+                       if (encap_type == -1)
+                               dev_put(skb->dev);
                        goto drop;
                }
 
index c6f3c4a..f342356 100644 (file)
@@ -495,6 +495,8 @@ static void ___xfrm_state_destroy(struct xfrm_state *x)
                x->type->destructor(x);
                xfrm_put_type(x->type);
        }
+       if (x->xfrag.page)
+               put_page(x->xfrag.page);
        xfrm_dev_state_free(x);
        security_xfrm_state_free(x);
        xfrm_state_free(x);
index 97a2c84..45e8aa3 100755 (executable)
@@ -4,13 +4,13 @@
 tmp_file=$(mktemp)
 trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT
 
-cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1
+cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1
 void *p = &p;
 END
-"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
+$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
 
 # Despite printing an error message, GNU nm still exits with exit code 0 if it
 # sees a relr section. So we need to check that nothing is printed to stderr.
-test -z "$("$NM" $tmp_file 2>&1 >/dev/null)"
+test -z "$($NM $tmp_file 2>&1 >/dev/null)"
 
-"$OBJCOPY" -O binary $tmp_file $tmp_file.bin
+$OBJCOPY -O binary $tmp_file $tmp_file.bin
index d80041e..2236b5e 100644 (file)
@@ -1782,11 +1782,14 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream)
        struct snd_pcm_runtime *runtime;
        unsigned long flags;
 
-       if (PCM_RUNTIME_CHECK(substream))
+       if (snd_BUG_ON(!substream))
                return;
-       runtime = substream->runtime;
 
        snd_pcm_stream_lock_irqsave(substream, flags);
+       if (PCM_RUNTIME_CHECK(substream))
+               goto _unlock;
+       runtime = substream->runtime;
+
        if (!snd_pcm_running(substream) ||
            snd_pcm_update_hw_ptr0(substream, 1) < 0)
                goto _end;
@@ -1797,6 +1800,7 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream)
 #endif
  _end:
        kill_fasync(&runtime->fasync, SIGIO, POLL_IN);
+ _unlock:
        snd_pcm_stream_unlock_irqrestore(substream, flags);
 }
 EXPORT_SYMBOL(snd_pcm_period_elapsed);
index cf53fbd..c524193 100644 (file)
@@ -2396,6 +2396,9 @@ static const struct pci_device_id azx_ids[] = {
        /* CometLake-H */
        { PCI_DEVICE(0x8086, 0x06C8),
          .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+       /* CometLake-S */
+       { PCI_DEVICE(0x8086, 0xa3f0),
+         .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
        /* Icelake */
        { PCI_DEVICE(0x8086, 0x34c8),
          .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
index 3c72070..78bd2e3 100644 (file)
@@ -46,10 +46,12 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
                                ((codec)->core.vendor_id == 0x80862800))
 #define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
 #define is_icelake(codec) ((codec)->core.vendor_id == 0x8086280f)
+#define is_tigerlake(codec) ((codec)->core.vendor_id == 0x80862812)
 #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
                                || is_skylake(codec) || is_broxton(codec) \
                                || is_kabylake(codec) || is_geminilake(codec) \
-                               || is_cannonlake(codec) || is_icelake(codec))
+                               || is_cannonlake(codec) || is_icelake(codec) \
+                               || is_tigerlake(codec))
 #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
 #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
index a2ab8e8..4a9a2f6 100644 (file)
@@ -388,6 +388,9 @@ static void snd_complete_urb(struct urb *urb)
                }
 
                prepare_outbound_urb(ep, ctx);
+               /* can be stopped during prepare callback */
+               if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags)))
+                       goto exit_clear;
        } else {
                retire_inbound_urb(ep, ctx);
                /* can be stopped during retire callback */
index 3fd1d17..45eee5c 100644 (file)
@@ -1229,7 +1229,8 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval,
                if (cval->min + cval->res < cval->max) {
                        int last_valid_res = cval->res;
                        int saved, test, check;
-                       get_cur_mix_raw(cval, minchn, &saved);
+                       if (get_cur_mix_raw(cval, minchn, &saved) < 0)
+                               goto no_res_check;
                        for (;;) {
                                test = saved;
                                if (test < cval->max)
@@ -1249,6 +1250,7 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval,
                        snd_usb_set_cur_mix_value(cval, minchn, 0, saved);
                }
 
+no_res_check:
                cval->initialized = 1;
        }
 
index 0bbe120..349e1e5 100644 (file)
@@ -248,8 +248,8 @@ static int create_yamaha_midi_quirk(struct snd_usb_audio *chip,
                                        NULL, USB_MS_MIDI_OUT_JACK);
        if (!injd && !outjd)
                return -ENODEV;
-       if (!(injd && snd_usb_validate_midi_desc(injd)) ||
-           !(outjd && snd_usb_validate_midi_desc(outjd)))
+       if ((injd && !snd_usb_validate_midi_desc(injd)) ||
+           (outjd && !snd_usb_validate_midi_desc(outjd)))
                return -ENODEV;
        if (injd && (injd->bLength < 5 ||
                     (injd->bJackType != USB_MS_EMBEDDED &&
index a5e584b..389e865 100644 (file)
@@ -81,9 +81,9 @@ static bool validate_processing_unit(const void *p,
        switch (v->protocol) {
        case UAC_VERSION_1:
        default:
-               /* bNrChannels, wChannelConfig, iChannelNames, bControlSize */
-               len += 1 + 2 + 1 + 1;
-               if (d->bLength < len) /* bControlSize */
+               /* bNrChannels, wChannelConfig, iChannelNames */
+               len += 1 + 2 + 1;
+               if (d->bLength < len + 1) /* bControlSize */
                        return false;
                m = hdr[len];
                len += 1 + m + 1; /* bControlSize, bmControls, iProcessing */
index 679a1d7..7b6eaf5 100644 (file)
@@ -1625,7 +1625,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
        return 0;
 }
 
-static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
+static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
 {
        struct hists *hists = a->hists;
        struct perf_hpp_fmt *fmt;
index 1596185..741f040 100644 (file)
@@ -539,10 +539,11 @@ static int perl_stop_script(void)
 
 static int perl_generate_script(struct tep_handle *pevent, const char *outfile)
 {
+       int i, not_first, count, nr_events;
+       struct tep_event **all_events;
        struct tep_event *event = NULL;
        struct tep_format_field *f;
        char fname[PATH_MAX];
-       int not_first, count;
        FILE *ofp;
 
        sprintf(fname, "%s.pl", outfile);
@@ -603,8 +604,11 @@ sub print_backtrace\n\
 }\n\n\
 ");
 
+       nr_events = tep_get_events_count(pevent);
+       all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
 
-       while ((event = trace_find_next_event(pevent, event))) {
+       for (i = 0; all_events && i < nr_events; i++) {
+               event = all_events[i];
                fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
                fprintf(ofp, "\tmy (");
 
index 5d341ef..93c03b3 100644 (file)
@@ -1687,10 +1687,11 @@ static int python_stop_script(void)
 
 static int python_generate_script(struct tep_handle *pevent, const char *outfile)
 {
+       int i, not_first, count, nr_events;
+       struct tep_event **all_events;
        struct tep_event *event = NULL;
        struct tep_format_field *f;
        char fname[PATH_MAX];
-       int not_first, count;
        FILE *ofp;
 
        sprintf(fname, "%s.py", outfile);
@@ -1735,7 +1736,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile
        fprintf(ofp, "def trace_end():\n");
        fprintf(ofp, "\tprint(\"in trace_end\")\n\n");
 
-       while ((event = trace_find_next_event(pevent, event))) {
+       nr_events = tep_get_events_count(pevent);
+       all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
+
+       for (i = 0; all_events && i < nr_events; i++) {
+               event = all_events[i];
                fprintf(ofp, "def %s__%s(", event->system, event->name);
                fprintf(ofp, "event_name, ");
                fprintf(ofp, "context, ");
index 5d6bfc7..9634f0a 100644 (file)
@@ -173,37 +173,6 @@ int parse_event_file(struct tep_handle *pevent,
        return tep_parse_event(pevent, buf, size, sys);
 }
 
-struct tep_event *trace_find_next_event(struct tep_handle *pevent,
-                                       struct tep_event *event)
-{
-       static int idx;
-       int events_count;
-       struct tep_event *all_events;
-
-       all_events = tep_get_first_event(pevent);
-       events_count = tep_get_events_count(pevent);
-       if (!pevent || !all_events || events_count < 1)
-               return NULL;
-
-       if (!event) {
-               idx = 0;
-               return all_events;
-       }
-
-       if (idx < events_count && event == (all_events + idx)) {
-               idx++;
-               if (idx == events_count)
-                       return NULL;
-               return (all_events + idx);
-       }
-
-       for (idx = 1; idx < events_count; idx++) {
-               if (event == (all_events + (idx - 1)))
-                       return (all_events + idx);
-       }
-       return NULL;
-}
-
 struct flag {
        const char *name;
        unsigned long long value;
index 2e15838..72fdf2a 100644 (file)
@@ -47,8 +47,6 @@ void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int siz
 
 ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe);
 
-struct tep_event *trace_find_next_event(struct tep_handle *pevent,
-                                       struct tep_event *event);
 unsigned long long read_size(struct tep_event *event, void *ptr, int size);
 unsigned long long eval_flag(const char *flag);
 
index ae6146e..4632f51 100755 (executable)
@@ -112,14 +112,16 @@ sanitization_single_dev_mcast_group_test()
        RET=0
 
        ip link add dev br0 type bridge mcast_snooping 0
+       ip link add name dummy1 up type dummy
 
        ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
                ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
-               dev $swp2 group 239.0.0.1
+               dev dummy1 group 239.0.0.1
 
        sanitization_single_dev_test_fail
 
        ip link del dev vxlan0
+       ip link del dev dummy1
        ip link del dev br0
 
        log_test "vxlan device with a multicast group"
@@ -181,13 +183,15 @@ sanitization_single_dev_local_interface_test()
        RET=0
 
        ip link add dev br0 type bridge mcast_snooping 0
+       ip link add name dummy1 up type dummy
 
        ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
-               ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2
+               ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1
 
        sanitization_single_dev_test_fail
 
        ip link del dev vxlan0
+       ip link del dev dummy1
        ip link del dev br0
 
        log_test "vxlan device with local interface"
index 4911fc7..d1cf9f6 100644 (file)
@@ -55,7 +55,7 @@ static void test_dump_stack(void)
 #pragma GCC diagnostic pop
 }
 
-static pid_t gettid(void)
+static pid_t _gettid(void)
 {
        return syscall(SYS_gettid);
 }
@@ -72,7 +72,7 @@ test_assert(bool exp, const char *exp_str,
                fprintf(stderr, "==== Test Assertion Failure ====\n"
                        "  %s:%u: %s\n"
                        "  pid=%d tid=%d - %s\n",
-                       file, line, exp_str, getpid(), gettid(),
+                       file, line, exp_str, getpid(), _gettid(),
                        strerror(errno));
                test_dump_stack();
                if (fmt) {
index bd4a724..c0dd102 100644 (file)
@@ -44,6 +44,46 @@ static int clock_adjtime(clockid_t id, struct timex *tx)
 }
 #endif
 
+static void show_flag_test(int rq_index, unsigned int flags, int err)
+{
+       printf("PTP_EXTTS_REQUEST%c flags 0x%08x : (%d) %s\n",
+              rq_index ? '1' + rq_index : ' ',
+              flags, err, strerror(errno));
+       /* sigh, uClibc ... */
+       errno = 0;
+}
+
+static void do_flag_test(int fd, unsigned int index)
+{
+       struct ptp_extts_request extts_request;
+       unsigned long request[2] = {
+               PTP_EXTTS_REQUEST,
+               PTP_EXTTS_REQUEST2,
+       };
+       unsigned int enable_flags[5] = {
+               PTP_ENABLE_FEATURE,
+               PTP_ENABLE_FEATURE | PTP_RISING_EDGE,
+               PTP_ENABLE_FEATURE | PTP_FALLING_EDGE,
+               PTP_ENABLE_FEATURE | PTP_RISING_EDGE | PTP_FALLING_EDGE,
+               PTP_ENABLE_FEATURE | (PTP_EXTTS_VALID_FLAGS + 1),
+       };
+       int err, i, j;
+
+       memset(&extts_request, 0, sizeof(extts_request));
+       extts_request.index = index;
+
+       for (i = 0; i < 2; i++) {
+               for (j = 0; j < 5; j++) {
+                       extts_request.flags = enable_flags[j];
+                       err = ioctl(fd, request[i], &extts_request);
+                       show_flag_test(i, extts_request.flags, err);
+
+                       extts_request.flags = 0;
+                       err = ioctl(fd, request[i], &extts_request);
+               }
+       }
+}
+
 static clockid_t get_clockid(int fd)
 {
 #define CLOCKFD 3
@@ -96,7 +136,8 @@ static void usage(char *progname)
                " -s         set the ptp clock time from the system time\n"
                " -S         set the system time from the ptp clock time\n"
                " -t val     shift the ptp clock time by 'val' seconds\n"
-               " -T val     set the ptp clock time to 'val' seconds\n",
+               " -T val     set the ptp clock time to 'val' seconds\n"
+               " -z         test combinations of rising/falling external time stamp flags\n",
                progname);
 }
 
@@ -122,6 +163,7 @@ int main(int argc, char *argv[])
        int adjtime = 0;
        int capabilities = 0;
        int extts = 0;
+       int flagtest = 0;
        int gettime = 0;
        int index = 0;
        int list_pins = 0;
@@ -138,7 +180,7 @@ int main(int argc, char *argv[])
 
        progname = strrchr(argv[0], '/');
        progname = progname ? 1+progname : argv[0];
-       while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
+       while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) {
                switch (c) {
                case 'c':
                        capabilities = 1;
@@ -191,6 +233,9 @@ int main(int argc, char *argv[])
                        settime = 3;
                        seconds = atoi(optarg);
                        break;
+               case 'z':
+                       flagtest = 1;
+                       break;
                case 'h':
                        usage(progname);
                        return 0;
@@ -322,6 +367,10 @@ int main(int argc, char *argv[])
                }
        }
 
+       if (flagtest) {
+               do_flag_test(fd, index);
+       }
+
        if (list_pins) {
                int n_pins = 0;
                if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
index d6f0696..13efc29 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/bsearch.h>
 #include <linux/io.h>
 #include <linux/lockdep.h>
+#include <linux/kthread.h>
 
 #include <asm/processor.h>
 #include <asm/ioctl.h>
@@ -121,9 +122,22 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
                                  unsigned long arg);
 #define KVM_COMPAT(c)  .compat_ioctl   = (c)
 #else
+/*
+ * For architectures that don't implement a compat infrastructure,
+ * adopt a double line of defense:
+ * - Prevent a compat task from opening /dev/kvm
+ * - If the open has been done by a 64bit task, and the KVM fd
+ *   passed to a compat task, let the ioctls fail.
+ */
 static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl,
                                unsigned long arg) { return -EINVAL; }
-#define KVM_COMPAT(c)  .compat_ioctl   = kvm_no_compat_ioctl
+
+static int kvm_no_compat_open(struct inode *inode, struct file *file)
+{
+       return is_compat_task() ? -ENODEV : 0;
+}
+#define KVM_COMPAT(c)  .compat_ioctl   = kvm_no_compat_ioctl,  \
+                       .open           = kvm_no_compat_open
 #endif
 static int hardware_enable_all(void);
 static void hardware_disable_all(void);
@@ -149,10 +163,30 @@ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
        return 0;
 }
 
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+       /*
+        * The metadata used by is_zone_device_page() to determine whether or
+        * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+        * the device has been pinned, e.g. by get_user_pages().  WARN if the
+        * page_count() is zero to help detect bad usage of this helper.
+        */
+       if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+               return false;
+
+       return is_zone_device_page(pfn_to_page(pfn));
+}
+
 bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
 {
+       /*
+        * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+        * perspective they are "normal" pages, albeit with slightly different
+        * usage rules.
+        */
        if (pfn_valid(pfn))
-               return PageReserved(pfn_to_page(pfn));
+               return PageReserved(pfn_to_page(pfn)) &&
+                      !kvm_is_zone_device_pfn(pfn);
 
        return true;
 }
@@ -625,6 +659,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
        return 0;
 }
 
+/*
+ * Called after the VM is otherwise initialized, but just before adding it to
+ * the vm_list.
+ */
+int __weak kvm_arch_post_init_vm(struct kvm *kvm)
+{
+       return 0;
+}
+
+/*
+ * Called just after removing the VM from the vm_list, but before doing any
+ * other destruction.
+ */
+void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+}
+
 static struct kvm *kvm_create_vm(unsigned long type)
 {
        struct kvm *kvm = kvm_arch_alloc_vm();
@@ -645,6 +696,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
        BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
 
+       if (init_srcu_struct(&kvm->srcu))
+               goto out_err_no_srcu;
+       if (init_srcu_struct(&kvm->irq_srcu))
+               goto out_err_no_irq_srcu;
+
+       refcount_set(&kvm->users_count, 1);
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
                struct kvm_memslots *slots = kvm_alloc_memslots();
 
@@ -662,7 +719,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
                        goto out_err_no_arch_destroy_vm;
        }
 
-       refcount_set(&kvm->users_count, 1);
        r = kvm_arch_init_vm(kvm, type);
        if (r)
                goto out_err_no_arch_destroy_vm;
@@ -675,12 +731,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
        INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
 
-       if (init_srcu_struct(&kvm->srcu))
-               goto out_err_no_srcu;
-       if (init_srcu_struct(&kvm->irq_srcu))
-               goto out_err_no_irq_srcu;
-
        r = kvm_init_mmu_notifier(kvm);
+       if (r)
+               goto out_err_no_mmu_notifier;
+
+       r = kvm_arch_post_init_vm(kvm);
        if (r)
                goto out_err;
 
@@ -693,19 +748,24 @@ static struct kvm *kvm_create_vm(unsigned long type)
        return kvm;
 
 out_err:
-       cleanup_srcu_struct(&kvm->irq_srcu);
-out_err_no_irq_srcu:
-       cleanup_srcu_struct(&kvm->srcu);
-out_err_no_srcu:
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+       if (kvm->mmu_notifier.ops)
+               mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
+#endif
+out_err_no_mmu_notifier:
        hardware_disable_all();
 out_err_no_disable:
        kvm_arch_destroy_vm(kvm);
-       WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
 out_err_no_arch_destroy_vm:
+       WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
        for (i = 0; i < KVM_NR_BUSES; i++)
                kfree(kvm_get_bus(kvm, i));
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
                kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
+       cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
+       cleanup_srcu_struct(&kvm->srcu);
+out_err_no_srcu:
        kvm_arch_free_vm(kvm);
        mmdrop(current->mm);
        return ERR_PTR(r);
@@ -737,6 +797,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
        mutex_lock(&kvm_lock);
        list_del(&kvm->vm_list);
        mutex_unlock(&kvm_lock);
+       kvm_arch_pre_destroy_vm(kvm);
+
        kvm_free_irq_routing(kvm);
        for (i = 0; i < KVM_NR_BUSES; i++) {
                struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
@@ -1857,7 +1919,7 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
 
 void kvm_set_pfn_dirty(kvm_pfn_t pfn)
 {
-       if (!kvm_is_reserved_pfn(pfn)) {
+       if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
                struct page *page = pfn_to_page(pfn);
 
                SetPageDirty(page);
@@ -1867,7 +1929,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
 
 void kvm_set_pfn_accessed(kvm_pfn_t pfn)
 {
-       if (!kvm_is_reserved_pfn(pfn))
+       if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
                mark_page_accessed(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
@@ -4371,3 +4433,86 @@ void kvm_exit(void)
        kvm_vfio_ops_exit();
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
+
+struct kvm_vm_worker_thread_context {
+       struct kvm *kvm;
+       struct task_struct *parent;
+       struct completion init_done;
+       kvm_vm_thread_fn_t thread_fn;
+       uintptr_t data;
+       int err;
+};
+
+static int kvm_vm_worker_thread(void *context)
+{
+       /*
+        * The init_context is allocated on the stack of the parent thread, so
+        * we have to locally copy anything that is needed beyond initialization
+        */
+       struct kvm_vm_worker_thread_context *init_context = context;
+       struct kvm *kvm = init_context->kvm;
+       kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
+       uintptr_t data = init_context->data;
+       int err;
+
+       err = kthread_park(current);
+       /* kthread_park(current) is never supposed to return an error */
+       WARN_ON(err != 0);
+       if (err)
+               goto init_complete;
+
+       err = cgroup_attach_task_all(init_context->parent, current);
+       if (err) {
+               kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
+                       __func__, err);
+               goto init_complete;
+       }
+
+       set_user_nice(current, task_nice(init_context->parent));
+
+init_complete:
+       init_context->err = err;
+       complete(&init_context->init_done);
+       init_context = NULL;
+
+       if (err)
+               return err;
+
+       /* Wait to be woken up by the spawner before proceeding. */
+       kthread_parkme();
+
+       if (!kthread_should_stop())
+               err = thread_fn(kvm, data);
+
+       return err;
+}
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+                               uintptr_t data, const char *name,
+                               struct task_struct **thread_ptr)
+{
+       struct kvm_vm_worker_thread_context init_context = {};
+       struct task_struct *thread;
+
+       *thread_ptr = NULL;
+       init_context.kvm = kvm;
+       init_context.parent = current;
+       init_context.thread_fn = thread_fn;
+       init_context.data = data;
+       init_completion(&init_context.init_done);
+
+       thread = kthread_run(kvm_vm_worker_thread, &init_context,
+                            "%s-%d", name, task_pid_nr(current));
+       if (IS_ERR(thread))
+               return PTR_ERR(thread);
+
+       /* kthread_run is never supposed to return NULL */
+       WARN_ON(thread == NULL);
+
+       wait_for_completion(&init_context.init_done);
+
+       if (!init_context.err)
+               *thread_ptr = thread;
+
+       return init_context.err;
+}