Merge tag 'trace-v4.15-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Dec 2017 21:06:57 +0000 (13:06 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Dec 2017 21:06:57 +0000 (13:06 -0800)
Pull tracing fixes from Steven Rostedt:
 "While doing tests on tracing over the network, I found that the
  packets were getting corrupted.

  In the process I found three bugs.

  One was the culprit, but the other two scared me. After deeper
  investigation, they were not as major as I thought they were, due to a
  signed compared to an unsigned that prevented a negative number from
  doing actual harm.

  The two bigger bugs:

   - Mask the ring buffer data page length. There are data flags at the
     high bits of the length field. These were not cleared via the
     length function, and the length could return a negative number.
     (Although the number returned was unsigned, but was assigned to a
     signed number) Luckily, this value was compared to PAGE_SIZE which
     is unsigned and kept it from entering the path that could have
     caused damage.

   - Check the page usage before reusing the ring buffer reader page.
     TCP increments the page ref when passing the page off to the
     network. The page is passed back to the ring buffer for use on
     free. But the page could still be in use by the TCP stack.

  Minor bugs:

   - Related to the first bug. No need to clear out the unused ring
     buffer data before sending to user space. It is now done by the
     ring buffer code itself.

   - Reset pointers after free on error path. There were some cases in
     the error path that pointers were freed but not set to NULL, and
     could have them freed again, having a pointer freed twice"

* tag 'trace-v4.15-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
  tracing: Fix possible double free on failure of allocating trace buffer
  tracing: Fix crash when it fails to alloc ring buffer
  ring-buffer: Do no reuse reader page if still in use
  tracing: Remove extra zeroing out of the ring buffer page
  ring-buffer: Mask out the info bits when returning buffer page length

321 files changed:
Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
Documentation/devicetree/bindings/sound/da7218.txt
Documentation/devicetree/bindings/sound/da7219.txt
Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
Documentation/x86/x86_64/mm.txt
Makefile
arch/arm/lib/csumpartialcopyuser.S
arch/arm64/kvm/hyp/debug-sr.c
arch/parisc/boot/compressed/misc.c
arch/parisc/include/asm/thread_info.h
arch/parisc/kernel/entry.S
arch/parisc/kernel/hpmc.S
arch/parisc/kernel/unwind.c
arch/parisc/lib/delay.c
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/kernel/process.c
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/net/bpf_jit_comp64.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/imc-pmu.c
arch/s390/net/bpf_jit_comp.c
arch/sparc/mm/fault_32.c
arch/sparc/mm/fault_64.c
arch/sparc/net/bpf_jit_comp_64.c
arch/um/include/asm/mmu_context.h
arch/um/kernel/trap.c
arch/unicore32/include/asm/mmu_context.h
arch/x86/Kconfig
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/vsyscall/vsyscall_64.c
arch/x86/include/asm/cpu_entry_area.h [new file with mode: 0644]
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/desc.h
arch/x86/include/asm/espfix.h
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/hypervisor.h
arch/x86/include/asm/invpcid.h [new file with mode: 0644]
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/kdebug.h
arch/x86/include/asm/mmu.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/pgtable_32_types.h
arch/x86/include/asm/pgtable_64_types.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/stacktrace.h
arch/x86/include/asm/switch_to.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/traps.h
arch/x86/include/asm/unwind.h
arch/x86/kernel/asm-offsets.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/doublefault.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/ioport.c
arch/x86/kernel/irq.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/ldt.c
arch/x86/kernel/paravirt_patch_64.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/traps.c
arch/x86/kernel/unwind_orc.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/emulate.c
arch/x86/kvm/mmu.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/delay.c
arch/x86/mm/Makefile
arch/x86/mm/cpu_entry_area.c [new file with mode: 0644]
arch/x86/mm/dump_pagetables.c
arch/x86/mm/fault.c
arch/x86/mm/init_32.c
arch/x86/mm/kasan_init_64.c
arch/x86/mm/pgtable_32.c
arch/x86/mm/tlb.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/power/cpu.c
arch/x86/xen/enlighten.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/setup.c
block/bio.c
block/blk-map.c
block/blk-throttle.c
block/bounce.c
block/kyber-iosched.c
crypto/af_alg.c
crypto/algif_aead.c
crypto/algif_skcipher.c
crypto/mcryptd.c
crypto/skcipher.c
drivers/acpi/apei/erst.c
drivers/acpi/cppc_acpi.c
drivers/acpi/nfit/core.c
drivers/block/null_blk.c
drivers/clk/clk.c
drivers/clk/sunxi/clk-sun9i-mmc.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/imx6q-cpufreq.c
drivers/gpio/gpio-reg.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib-devprop.c
drivers/gpio/gpiolib-of.c
drivers/gpio/gpiolib.h
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
drivers/gpu/drm/drm_lease.c
drivers/gpu/drm/drm_plane.c
drivers/gpu/drm/drm_syncobj.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_sw_fence.c
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_lpe_audio.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_drv.h
drivers/gpu/drm/nouveau/nouveau_fbcon.c
drivers/gpu/drm/nouveau/nouveau_mem.c
drivers/gpu/drm/nouveau/nouveau_ttm.c
drivers/gpu/drm/nouveau/nouveau_vmm.c
drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/gpu/drm/ttm/ttm_page_alloc.c
drivers/hwmon/hwmon.c
drivers/mfd/cros_ec_spi.c
drivers/mfd/twl4030-audio.c
drivers/mfd/twl6040.c
drivers/mtd/mtdcore.c
drivers/mtd/nand/brcmnand/brcmnand.c
drivers/mtd/nand/gpio.c
drivers/mtd/nand/gpmi-nand/gpmi-nand.c
drivers/net/ethernet/arc/emac.h
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/rl.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/netronome/nfp/bpf/main.c
drivers/net/ethernet/netronome/nfp/bpf/main.h
drivers/net/ethernet/qualcomm/emac/emac.c
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/enh_desc.c
drivers/net/ethernet/stmicro/stmmac/norm_desc.c
drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/phy/marvell.c
drivers/net/phy/mdio-xgene.c
drivers/net/vxlan.c
drivers/net/wireless/mac80211_hwsim.c
drivers/nvdimm/btt.c
drivers/nvdimm/btt.h
drivers/nvdimm/pfn_devs.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/parisc/lba_pci.c
drivers/pci/pci-driver.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/s390/net/qeth_core_main.c
drivers/scsi/aacraid/aacraid.h
drivers/scsi/aacraid/linit.c
drivers/scsi/osd/osd_initiator.c
drivers/scsi/scsi_devinfo.c
drivers/scsi/scsi_scan.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/scsi_transport_spi.c
drivers/spi/spi-armada-3700.c
drivers/spi/spi-atmel.c
drivers/spi/spi-rspi.c
drivers/spi/spi-sun4i.c
drivers/spi/spi-xilinx.c
drivers/target/target_core_pscsi.c
drivers/xen/balloon.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_attr.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_leaf.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_defer.h
fs/xfs/libxfs/xfs_iext_tree.c
fs/xfs/libxfs/xfs_refcount.c
fs/xfs/libxfs/xfs_rmap.c
fs/xfs/libxfs/xfs_rmap.h
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_icache.h
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_reflink.c
fs/xfs/xfs_super.c
include/asm-generic/mm_hooks.h
include/asm-generic/pgtable.h
include/crypto/mcryptd.h
include/kvm/arm_arch_timer.h
include/linux/bio.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/bpf_verifier.h
include/linux/ipv6.h
include/linux/mfd/rtsx_pci.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/spi/spi.h
include/net/cfg80211.h
include/net/pkt_cls.h
include/trace/events/clk.h
include/trace/events/kvm.h
include/xen/balloon.h
init/main.c
kernel/bpf/verifier.c
kernel/fork.c
lib/test_bpf.c
mm/backing-dev.c
net/bridge/br_netlink.c
net/core/dev.c
net/core/net_namespace.c
net/core/skbuff.c
net/ipv4/fib_frontend.c
net/ipv4/fib_semantics.c
net/ipv4/ip_gre.c
net/ipv6/af_inet6.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ipv6_sockglue.c
net/ipv6/route.c
net/openvswitch/flow.c
net/sched/cls_bpf.c
net/sctp/debug.c
net/sctp/ulpqueue.c
net/tipc/group.c
net/wireless/Makefile
net/wireless/certs/sforshee.hex [new file with mode: 0644]
net/wireless/certs/sforshee.x509 [deleted file]
net/wireless/nl80211.c
sound/core/rawmidi.c
sound/hda/hdac_i915.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/amd/acp-pcm-dma.c
sound/soc/atmel/Kconfig
sound/soc/codecs/da7218.c
sound/soc/codecs/msm8916-wcd-analog.c
sound/soc/codecs/msm8916-wcd-digital.c
sound/soc/codecs/nau8825.c
sound/soc/codecs/rt5514-spi.c
sound/soc/codecs/rt5514.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/rt5663.c
sound/soc/codecs/rt5663.h
sound/soc/codecs/tlv320aic31xx.h
sound/soc/codecs/twl4030.c
sound/soc/codecs/wm_adsp.c
sound/soc/fsl/fsl_asrc.h
sound/soc/fsl/fsl_ssi.c
sound/soc/intel/boards/kbl_rt5663_max98927.c
sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
sound/soc/intel/skylake/skl-nhlt.c
sound/soc/intel/skylake/skl-topology.c
sound/soc/rockchip/rockchip_spdif.c
sound/soc/sh/rcar/adg.c
sound/soc/sh/rcar/core.c
sound/soc/sh/rcar/dma.c
sound/soc/sh/rcar/ssi.c
sound/soc/sh/rcar/ssiu.c
sound/usb/mixer.c
sound/usb/quirks.c
tools/arch/s390/include/uapi/asm/bpf_perf_event.h
tools/kvm/kvm_stat/kvm_stat
tools/kvm/kvm_stat/kvm_stat.txt
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/net/config
tools/testing/selftests/x86/ldt_gdt.c
virt/kvm/arm/arch_timer.c
virt/kvm/arm/arm.c
virt/kvm/arm/mmio.c
virt/kvm/arm/mmu.c

index 376fa2f..956bb04 100644 (file)
@@ -13,7 +13,6 @@ Required properties:
                  at25df321a
                  at25df641
                  at26df081a
-                 en25s64
                  mr25h128
                  mr25h256
                  mr25h10
@@ -33,7 +32,6 @@ Required properties:
                  s25fl008k
                  s25fl064k
                  sst25vf040b
-                 sst25wf040b
                  m25p40
                  m25p80
                  m25p16
index 5ca5a70..3ab9dfe 100644 (file)
@@ -73,7 +73,7 @@ Example:
                compatible = "dlg,da7218";
                reg = <0x1a>;
                interrupt-parent = <&gpio6>;
-               interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
                wakeup-source;
 
                VDD-supply = <&reg_audio>;
index cf61681..5b54d2d 100644 (file)
@@ -77,7 +77,7 @@ Example:
                reg = <0x1a>;
 
                interrupt-parent = <&gpio6>;
-               interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
 
                VDD-supply = <&reg_audio>;
                VDDMIC-supply = <&reg_audio>;
index 5bf1396..e3c48b2 100644 (file)
@@ -12,24 +12,30 @@ Required properties:
   - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
 - reg : Offset and length of the register set for the device
 - interrupts : Should contain CSPI/eCSPI interrupt
-- cs-gpios : Specifies the gpio pins to be used for chipselects.
 - clocks : Clock specifiers for both ipg and per clocks.
 - clock-names : Clock names should include both "ipg" and "per"
 See the clock consumer binding,
        Documentation/devicetree/bindings/clock/clock-bindings.txt
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
-               Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request names should include "tx" and "rx" if present.
 
-Obsolete properties:
-- fsl,spi-num-chipselects : Contains the number of the chipselect
+Recommended properties:
+- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt.  While the native chip
+select lines can be used, they appear to always generate a pulse between each
+word of a transfer.  Most use cases will require GPIO based chip selects to
+generate a valid transaction.
 
 Optional properties:
+- num-cs :  Number of total chip selects, see spi-bus.txt.
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+Documentation/devicetree/bindings/dma/dma.txt.
+- dma-names: DMA request names, if present, should include "tx" and "rx".
 - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
 controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
 the SPI_READY mode-flag needs to be set too.
 Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
 
+Obsolete properties:
+- fsl,spi-num-chipselects : Contains the number of the chipselect
+
 Example:
 
 ecspi@70010000 {
index 3448e67..5110170 100644 (file)
@@ -1,6 +1,4 @@
 
-<previous description obsolete, deleted>
-
 Virtual memory map with 4 level page tables:
 
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
@@ -14,13 +12,15 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
 ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
 ... unused hole ...
+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Virtual memory map with 5 level page tables:
@@ -36,19 +36,22 @@ ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
 ... unused hole ...
 ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
 ... unused hole ...
+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Architecture defines a 64-bit virtual address. Implementations can support
 less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
-through to the most-significant implemented bit are set to either all ones
-or all zero. This causes hole between user space and kernel addresses.
+through to the most-significant implemented bit are sign extended.
+This causes hole between user space and kernel addresses if you interpret them
+as unsigned.
 
 The direct mapping covers all memory in the system up to the highest
 memory address (this means in some cases it can also include PCI memory
@@ -58,9 +61,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
 the processes using the page fault handler, with init_top_pgt as
 reference.
 
-Current X86-64 implementations support up to 46 bits of address space (64 TB),
-which is our current limit. This expands into MBZ space in the page tables.
-
 We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
 memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
@@ -72,5 +72,3 @@ following fixmap section.
 Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
 physical memory, vmalloc/ioremap space and virtual memory map are randomized.
 Their order is preserved but their base will be offset early at boot time.
-
--Andi Kleen, Jul 2004
index 7e02f95..ac8c441 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 15
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
index 1712f13..b83fdc0 100644 (file)
                .pushsection .text.fixup,"ax"
                .align  4
 9001:          mov     r4, #-EFAULT
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+               ldr     r5, [sp, #9*4]          @ *err_ptr
+#else
                ldr     r5, [sp, #8*4]          @ *err_ptr
+#endif
                str     r4, [r5]
                ldmia   sp, {r1, r2}            @ retrieve dst, len
                add     r2, r2, r1
index 321c9c0..f4363d4 100644 (file)
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 {
        u64 reg;
 
+       /* Clear pmscr in case of early return */
+       *pmscr_el1 = 0;
+
        /* SPE present on this CPU? */
        if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
                                                  ID_AA64DFR0_PMSVER_SHIFT))
index 9345b44..f57118e 100644 (file)
@@ -123,8 +123,8 @@ int puts(const char *s)
        while ((nuline = strchr(s, '\n')) != NULL) {
                if (nuline != s)
                        pdc_iodc_print(s, nuline - s);
-                       pdc_iodc_print("\r\n", 2);
-                       s = nuline + 1;
+               pdc_iodc_print("\r\n", 2);
+               s = nuline + 1;
        }
        if (*s != '\0')
                pdc_iodc_print(s, strlen(s));
index c980a02..598c8d6 100644 (file)
@@ -35,7 +35,12 @@ struct thread_info {
 
 /* thread information allocation */
 
+#ifdef CONFIG_IRQSTACKS
+#define THREAD_SIZE_ORDER      2 /* PA-RISC requires at least 16k stack */
+#else
 #define THREAD_SIZE_ORDER      3 /* PA-RISC requires at least 32k stack */
+#endif
+
 /* Be sure to hunt all references to this down when you change the size of
  * the kernel stack */
 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
index a4fd296..f3cecf5 100644 (file)
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
        STREG   %r19,PT_SR7(%r16)
 
 intr_return:
-       /* NOTE: Need to enable interrupts incase we schedule. */
-       ssm     PSW_SM_I, %r0
-
        /* check for reschedule */
        mfctl   %cr30,%r1
        LDREG   TI_FLAGS(%r1),%r19      /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +904,11 @@ intr_check_sig:
        LDREG   PT_IASQ1(%r16), %r20
        cmpib,COND(=),n 0,%r20,intr_restore /* backward */
 
+       /* NOTE: We need to enable interrupts if we have to deliver
+        * signals. We used to do this earlier but it caused kernel
+        * stack overflows. */
+       ssm     PSW_SM_I, %r0
+
        copy    %r0, %r25                       /* long in_syscall = 0 */
 #ifdef CONFIG_64BIT
        ldo     -16(%r30),%r29                  /* Reference param save area */
@@ -958,6 +960,10 @@ intr_do_resched:
        cmpib,COND(=)   0, %r20, intr_do_preempt
        nop
 
+       /* NOTE: We need to enable interrupts if we schedule.  We used
+        * to do this earlier but it caused kernel stack overflows. */
+       ssm     PSW_SM_I, %r0
+
 #ifdef CONFIG_64BIT
        ldo     -16(%r30),%r29          /* Reference param save area */
 #endif
index e3a8e5e..8d072c4 100644 (file)
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
 
 
        __INITRODATA
+       .align 4
        .export os_hpmc_size
 os_hpmc_size:
        .word .os_hpmc_end-.os_hpmc
index 5a65798..143f90e 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include <linux/kallsyms.h>
 #include <linux/sort.h>
-#include <linux/sched.h>
 
 #include <linux/uaccess.h>
 #include <asm/assembly.h>
index 7eab4bb..66e5065 100644 (file)
@@ -16,9 +16,7 @@
 #include <linux/preempt.h>
 #include <linux/init.h>
 
-#include <asm/processor.h>
 #include <asm/delay.h>
-
 #include <asm/special_insns.h>    /* for mfctl() */
 #include <asm/processor.h> /* for boot_cpu_data */
 
index 6177d43..e2a2b84 100644 (file)
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
 #endif
 }
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 #ifndef CONFIG_PPC_BOOK3S_64
index 5acb5a1..72be0c3 100644 (file)
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
 
        printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
               regs->nip, regs->link, regs->ctr);
-       printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
+       printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
               regs, regs->trap, print_tainted(), init_utsname()->release);
        printk("MSR:  "REG" ", regs->msr);
        print_msr_bits(regs->msr);
index bf45784..0d750d2 100644 (file)
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
 
        /* Return the per-cpu state for state saving/migration */
        return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
-              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+              (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
 }
 
 int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
        /*
         * Restore P and Q. If the interrupt was pending, we
-        * force both P and Q, which will trigger a resend.
+        * force Q and !P, which will trigger a resend.
         *
         * That means that a guest that had both an interrupt
         * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
         * is perfectly fine as coalescing interrupts that haven't
         * been presented yet is always allowed.
         */
-       if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+       if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
                state->old_p = true;
        if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
                state->old_q = true;
index 46d74e8..d183b48 100644 (file)
@@ -763,7 +763,8 @@ emit_clear:
                        func = (u8 *) __bpf_call_base + imm;
 
                        /* Save skb pointer if we need to re-cache skb data */
-                       if (bpf_helper_changes_pkt_data(func))
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func))
                                PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
 
                        bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
                        PPC_MR(b2p[BPF_REG_0], 3);
 
                        /* refresh skb cache */
-                       if (bpf_helper_changes_pkt_data(func)) {
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func)) {
                                /* reload skb pointer to r3 */
                                PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
                                bpf_jit_emit_skb_loads(image, ctx);
index 1538129..fce5457 100644 (file)
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
        int ret;
        __u64 target;
 
-       if (is_kernel_addr(addr))
-               return branch_target((unsigned int *)addr);
+       if (is_kernel_addr(addr)) {
+               if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+                       return 0;
+
+               return branch_target(&instr);
+       }
 
        /* Userspace: need copy instruction here then translate it */
        pagefault_disable();
index 0ead3cd..be4e7f8 100644 (file)
@@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
        if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
                return 0;
 
+       /*
+        * Check whether nest_imc is registered. We could end up here if the
+        * cpuhotplug callback registration fails. i.e, callback invokes the
+        * offline path for all successfully registered nodes. At this stage,
+        * nest_imc pmu will not be registered and we should return here.
+        *
+        * We return with a zero since this is not an offline failure. And
+        * cpuhp_setup_state() returns the actual failure reason to the caller,
+        * which in turn will call the cleanup routine.
+        */
+       if (!nest_pmus)
+               return 0;
+
        /*
         * Now that this cpu is one of the designated,
         * find a next cpu a) which is online and b) in same chip.
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                if (nest_pmus == 1) {
                        cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
                        kfree(nest_imc_refc);
+                       kfree(per_nest_pmu_arr);
                }
 
                if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
        kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
        kfree(pmu_ptr);
-       kfree(per_nest_pmu_arr);
        return;
 }
 
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
                        ret = nest_pmu_cpumask_init();
                        if (ret) {
                                mutex_unlock(&nest_init_lock);
+                               kfree(nest_imc_refc);
+                               kfree(per_nest_pmu_arr);
                                goto err_free;
                        }
                }
index e81c168..9557d8b 100644 (file)
@@ -55,8 +55,7 @@ struct bpf_jit {
 #define SEEN_LITERAL   8       /* code uses literals */
 #define SEEN_FUNC      16      /* calls C functions */
 #define SEEN_TAIL_CALL 32      /* code uses tail calls */
-#define SEEN_SKB_CHANGE        64      /* code changes skb data */
-#define SEEN_REG_AX    128     /* code uses constant blinding */
+#define SEEN_REG_AX    64      /* code uses constant blinding */
 #define SEEN_STACK     (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
 
 /*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
                        EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
                                      REG_15, 152);
        }
-       if (jit->seen & SEEN_SKB)
+       if (jit->seen & SEEN_SKB) {
                emit_load_skb_data_hlen(jit);
-       if (jit->seen & SEEN_SKB_CHANGE)
                /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
                EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
                              STK_OFF_SKBP);
+       }
 }
 
 /*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
                EMIT2(0x0d00, REG_14, REG_W1);
                /* lgr %b0,%r2: load return value into %b0 */
                EMIT4(0xb9040000, BPF_REG_0, REG_2);
-               if (bpf_helper_changes_pkt_data((void *)func)) {
-                       jit->seen |= SEEN_SKB_CHANGE;
+               if ((jit->seen & SEEN_SKB) &&
+                   bpf_helper_changes_pkt_data((void *)func)) {
                        /* lg %b1,ST_OFF_SKBP(%r15) */
                        EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
                                      REG_15, STK_OFF_SKBP);
index be3136f..a8103a8 100644 (file)
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
               task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
               tsk->comm, task_pid_nr(tsk), address,
               (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
index 815c03d..41363f4 100644 (file)
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
               task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
               tsk->comm, task_pid_nr(tsk), address,
               (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
index 5765e7e..ff5f9cb 100644 (file)
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                u8 *func = ((u8 *)__bpf_call_base) + imm;
 
                ctx->saw_call = true;
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
 
                emit_call((u32 *)func, ctx);
                emit_nop(ctx);
 
                emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
 
-               if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
-                       load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       load_skb_regs(ctx, L7);
                break;
        }
 
index b668e35..fca34b2 100644 (file)
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
 /*
  * Needed since we do not use the asm-generic/mm_hooks.h:
  */
-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
        uml_setup_stubs(mm);
+       return 0;
 }
 extern void arch_exit_mmap(struct mm_struct *mm);
 static inline void arch_unmap(struct mm_struct *mm,
index 4e6fcb3..4286441 100644 (file)
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
                (void *)UPT_IP(regs), (void *)UPT_SP(regs),
index 59b06b4..5c205a9 100644 (file)
@@ -81,9 +81,10 @@ do { \
        } \
 } while (0)
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 static inline void arch_unmap(struct mm_struct *mm,
index 8eed3f9..d4fc98c 100644 (file)
@@ -926,7 +926,8 @@ config MAXSMP
 config NR_CPUS
        int "Maximum number of CPUs" if SMP && !MAXSMP
        range 2 8 if SMP && X86_32 && !X86_BIGSMP
-       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
+       range 2 64 if SMP && X86_32 && X86_BIGSMP
+       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
        range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
        default "1" if !SMP
        default "8192" if MAXSMP
index 4838037..ace8f32 100644 (file)
@@ -941,9 +941,10 @@ ENTRY(debug)
        movl    %esp, %eax                      # pt_regs pointer
 
        /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
        jb      .Ldebug_from_sysenter_stack
 
        TRACE_IRQS_OFF
@@ -984,9 +985,10 @@ ENTRY(nmi)
        movl    %esp, %eax                      # pt_regs pointer
 
        /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
        jb      .Lnmi_from_sysenter_stack
 
        /* Not on SYSENTER stack. */
index f81d50d..3d19c83 100644 (file)
@@ -140,6 +140,64 @@ END(native_usergs_sysret64)
  * with them due to bugs in both AMD and Intel CPUs.
  */
 
+       .pushsection .entry_trampoline, "ax"
+
+/*
+ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
+ * that the assembler and linker have the wrong idea as to where this code
+ * lives (and, in fact, it's mapped more than once, so it's not even at a
+ * fixed address).  So we can't reference any symbols outside the entry
+ * trampoline and expect it to work.
+ *
+ * Instead, we carefully abuse %rip-relative addressing.
+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
+ * trampoline.  We can thus find cpu_entry_area with this macro:
+ */
+
+#define CPU_ENTRY_AREA \
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+#define RSP_SCRATCH    CPU_ENTRY_AREA_entry_stack + \
+                       SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+
+ENTRY(entry_SYSCALL_64_trampoline)
+       UNWIND_HINT_EMPTY
+       swapgs
+
+       /* Stash the user RSP. */
+       movq    %rsp, RSP_SCRATCH
+
+       /* Load the top of the task stack into RSP */
+       movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+
+       /* Start building the simulated IRET frame. */
+       pushq   $__USER_DS                      /* pt_regs->ss */
+       pushq   RSP_SCRATCH                     /* pt_regs->sp */
+       pushq   %r11                            /* pt_regs->flags */
+       pushq   $__USER_CS                      /* pt_regs->cs */
+       pushq   %rcx                            /* pt_regs->ip */
+
+       /*
+        * x86 lacks a near absolute jump, and we can't jump to the real
+        * entry text with a relative jump.  We could push the target
+        * address and then use retq, but this destroys the pipeline on
+        * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
+        * spill RDI and restore it in a second-stage trampoline.
+        */
+       pushq   %rdi
+       movq    $entry_SYSCALL_64_stage2, %rdi
+       jmp     *%rdi
+END(entry_SYSCALL_64_trampoline)
+
+       .popsection
+
+ENTRY(entry_SYSCALL_64_stage2)
+       UNWIND_HINT_EMPTY
+       popq    %rdi
+       jmp     entry_SYSCALL_64_after_hwframe
+END(entry_SYSCALL_64_stage2)
+
 ENTRY(entry_SYSCALL_64)
        UNWIND_HINT_EMPTY
        /*
@@ -330,8 +388,24 @@ syscall_return_via_sysret:
        popq    %rsi    /* skip rcx */
        popq    %rdx
        popq    %rsi
+
+       /*
+        * Now all regs are restored except RSP and RDI.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       pushq   RSP-RDI(%rdi)   /* RSP */
+       pushq   (%rdi)          /* RDI */
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+
        popq    %rdi
-       movq    RSP-ORIG_RAX(%rsp), %rsp
+       popq    %rsp
        USERGS_SYSRET64
 END(entry_SYSCALL_64)
 
@@ -466,12 +540,13 @@ END(irq_entries_start)
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
-       pushfq
-       testl $X86_EFLAGS_IF, (%rsp)
+       pushq %rax
+       SAVE_FLAGS(CLBR_RAX)
+       testl $X86_EFLAGS_IF, %eax
        jz .Lokay_\@
        ud2
 .Lokay_\@:
-       addq $8, %rsp
+       popq %rax
 #endif
 .endm
 
@@ -563,6 +638,13 @@ END(irq_entries_start)
 /* 0(%rsp): ~(interrupt number) */
        .macro interrupt func
        cld
+
+       testb   $3, CS-ORIG_RAX(%rsp)
+       jz      1f
+       SWAPGS
+       call    switch_to_thread_stack
+1:
+
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
@@ -572,12 +654,8 @@ END(irq_entries_start)
        jz      1f
 
        /*
-        * IRQ from user mode.  Switch to kernel gsbase and inform context
-        * tracking that we're in kernel mode.
-        */
-       SWAPGS
-
-       /*
+        * IRQ from user mode.
+        *
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
         * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +708,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
        ud2
 1:
 #endif
-       SWAPGS
        POP_EXTRA_REGS
-       POP_C_REGS
-       addq    $8, %rsp        /* skip regs->orig_ax */
+       popq    %r11
+       popq    %r10
+       popq    %r9
+       popq    %r8
+       popq    %rax
+       popq    %rcx
+       popq    %rdx
+       popq    %rsi
+
+       /*
+        * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       /* Copy the IRET frame to the trampoline stack. */
+       pushq   6*8(%rdi)       /* SS */
+       pushq   5*8(%rdi)       /* RSP */
+       pushq   4*8(%rdi)       /* EFLAGS */
+       pushq   3*8(%rdi)       /* CS */
+       pushq   2*8(%rdi)       /* RIP */
+
+       /* Push user RDI on the trampoline stack. */
+       pushq   (%rdi)
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+
+       /* Restore RDI. */
+       popq    %rdi
+       SWAPGS
        INTERRUPT_RETURN
 
 
@@ -829,7 +938,33 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
 /*
  * Exception entry points.
  */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+
+/*
+ * Switch to the thread stack.  This is called with the IRET frame and
+ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+ENTRY(switch_to_thread_stack)
+       UNWIND_HINT_FUNC
+
+       pushq   %rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+       pushq   7*8(%rdi)               /* regs->ss */
+       pushq   6*8(%rdi)               /* regs->rsp */
+       pushq   5*8(%rdi)               /* regs->eflags */
+       pushq   4*8(%rdi)               /* regs->cs */
+       pushq   3*8(%rdi)               /* regs->ip */
+       pushq   2*8(%rdi)               /* regs->orig_ax */
+       pushq   8(%rdi)                 /* return address */
+       UNWIND_HINT_FUNC
+
+       movq    (%rdi), %rdi
+       ret
+END(switch_to_thread_stack)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -848,11 +983,12 @@ ENTRY(\sym)
 
        ALLOC_PT_GPREGS_ON_STACK
 
-       .if \paranoid
-       .if \paranoid == 1
+       .if \paranoid < 2
        testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
-       jnz     1f
+       jnz     .Lfrom_usermode_switch_stack_\@
        .endif
+
+       .if \paranoid
        call    paranoid_entry
        .else
        call    error_entry
@@ -894,20 +1030,15 @@ ENTRY(\sym)
        jmp     error_exit
        .endif
 
-       .if \paranoid == 1
+       .if \paranoid < 2
        /*
-        * Paranoid entry from userspace.  Switch stacks and treat it
+        * Entry from userspace.  Switch stacks and treat it
         * as a normal entry.  This means that paranoid handlers
         * run in real process context if user_mode(regs).
         */
-1:
+.Lfrom_usermode_switch_stack_\@:
        call    error_entry
 
-
-       movq    %rsp, %rdi                      /* pt_regs pointer */
-       call    sync_regs
-       movq    %rax, %rsp                      /* switch stack */
-
        movq    %rsp, %rdi                      /* pt_regs pointer */
 
        .if \has_error_code
@@ -1170,6 +1301,14 @@ ENTRY(error_entry)
        SWAPGS
 
 .Lerror_entry_from_usermode_after_swapgs:
+       /* Put us onto the real thread stack. */
+       popq    %r12                            /* save return addr in %12 */
+       movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
+       call    sync_regs
+       movq    %rax, %rsp                      /* switch stack */
+       ENCODE_FRAME_POINTER
+       pushq   %r12
+
        /*
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
index 568e130..95ad40e 100644 (file)
@@ -48,7 +48,7 @@
  */
 ENTRY(entry_SYSENTER_compat)
        /* Interrupts are off on entry. */
-       SWAPGS_UNSAFE_STACK
+       SWAPGS
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
        /*
@@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
         */
        movl    %eax, %eax
 
-       /* Construct struct pt_regs on stack (iret frame is already on stack) */
        pushq   %rax                    /* pt_regs->orig_ax */
+
+       /* switch to thread stack expects orig_ax to be pushed */
+       call    switch_to_thread_stack
+
        pushq   %rdi                    /* pt_regs->di */
        pushq   %rsi                    /* pt_regs->si */
        pushq   %rdx                    /* pt_regs->dx */
index f279ba2..1faf40f 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/unistd.h>
 #include <asm/fixmap.h>
 #include <asm/traps.h>
+#include <asm/paravirt.h>
 
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
        WARN_ON_ONCE(address != regs->ip);
 
+       /* This should be unreachable in NATIVE mode. */
+       if (WARN_ON(vsyscall_mode == NATIVE))
+               return false;
+
        if (vsyscall_mode == NONE) {
                warn_bad_vsyscall(KERN_INFO, regs,
                                  "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
        return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
 }
 
+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present.  If so, we skip calling
+ * this.
+ */
+static void __init set_vsyscall_pgtable_user_bits(void)
+{
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pgd = pgd_offset_k(VSYSCALL_ADDR);
+       set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+       p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+       p4d->p4d |= _PAGE_USER;
+#endif
+       pud = pud_offset(p4d, VSYSCALL_ADDR);
+       set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
+       pmd = pmd_offset(pud, VSYSCALL_ADDR);
+       set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
+}
+
 void __init map_vsyscall(void)
 {
        extern char __vsyscall_page;
        unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
 
-       if (vsyscall_mode != NONE)
+       if (vsyscall_mode != NONE) {
                __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
                             vsyscall_mode == NATIVE
                             ? PAGE_KERNEL_VSYSCALL
                             : PAGE_KERNEL_VVAR);
+               set_vsyscall_pgtable_user_bits();
+       }
 
        BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
                     (unsigned long)VSYSCALL_ADDR);
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644 (file)
index 0000000..2fbc69a
--- /dev/null
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
+#define _ASM_X86_CPU_ENTRY_AREA_H
+
+#include <linux/percpu-defs.h>
+#include <asm/processor.h>
+
+/*
+ * cpu_entry_area is a percpu region that contains things needed by the CPU
+ * and early entry/exit code.  Real types aren't used for all fields here
+ * to avoid circular header dependencies.
+ *
+ * Every field is a virtual alias of some other allocated backing store.
+ * There is no direct allocation of a struct cpu_entry_area.
+ */
+struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
+
+       /*
+        * The GDT is just below entry_stack and thus serves (on x86_64) as
+        * a a read-only guard page.
+        */
+       struct entry_stack_page entry_stack_page;
+
+       /*
+        * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
+        * we need task switches to work, and task switches write to the TSS.
+        */
+       struct tss_struct tss;
+
+       char entry_trampoline[PAGE_SIZE];
+
+#ifdef CONFIG_X86_64
+       /*
+        * Exception stacks used for IST entries.
+        *
+        * In the future, this should have a separate slot for each stack
+        * with guard pages between them.
+        */
+       char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+#endif
+};
+
+#define CPU_ENTRY_AREA_SIZE    (sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_TOT_SIZE        (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+extern void setup_cpu_entry_areas(void);
+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+
+#define        CPU_ENTRY_AREA_RO_IDT           CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU         (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR    ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE                        \
+       (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
+
+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
+
+static inline struct entry_stack *cpu_entry_stack(int cpu)
+{
+       return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+}
+
+#endif
index bf6a762..ea9a7dd 100644 (file)
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
        set_bit(bit, (unsigned long *)cpu_caps_set);    \
 } while (0)
 
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
index 4011cb0..ec8be07 100644 (file)
@@ -7,6 +7,7 @@
 #include <asm/mmu.h>
 #include <asm/fixmap.h>
 #include <asm/irq_vectors.h>
+#include <asm/cpu_entry_area.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -60,17 +61,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
        return this_cpu_ptr(&gdt_page)->gdt;
 }
 
-/* Get the fixmap index for a specific processor */
-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
-{
-       return FIX_GDT_REMAP_BEGIN + cpu;
-}
-
 /* Provide the fixmap address of the remapped GDT */
 static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
 {
-       unsigned int idx = get_cpu_gdt_ro_index(cpu);
-       return (struct desc_struct *)__fix_to_virt(idx);
+       return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
 }
 
 /* Provide the current read-only GDT */
@@ -185,7 +179,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
 #endif
 }
 
-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
 {
        struct desc_struct *d = get_cpu_gdt_rw(cpu);
        tss_desc tss;
index 0211029..6777480 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_ESPFIX_H
 #define _ASM_X86_ESPFIX_H
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ESPFIX64
 
 #include <asm/percpu.h>
 
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
 extern void init_espfix_bsp(void);
 extern void init_espfix_ap(int cpu);
-
-#endif /* CONFIG_X86_64 */
+#else
+static inline void init_espfix_ap(int cpu) { }
+#endif
 
 #endif /* _ASM_X86_ESPFIX_H */
index b0c505f..64c4a30 100644 (file)
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
                         PAGE_SIZE)
 #endif
 
-
 /*
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
        FIX_IO_APIC_BASE_0,
        FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
 #endif
-       FIX_RO_IDT,     /* Virtual mapping for read-only IDT */
 #ifdef CONFIG_X86_32
        FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
        FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_INTEL_MID
        FIX_LNW_VRTC,
 #endif
-       /* Fixmap entries to remap the GDTs, one per processor. */
-       FIX_GDT_REMAP_BEGIN,
-       FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
 
 #ifdef CONFIG_ACPI_APEI_GHES
        /* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
 extern void reserve_top_address(unsigned long reserve);
 
 #define FIXADDR_SIZE   (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START          (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
 
 extern int fixmaps_set;
 
index 1b0a5ab..96aa6b9 100644 (file)
 #ifndef _ASM_X86_HYPERVISOR_H
 #define _ASM_X86_HYPERVISOR_H
 
-#ifdef CONFIG_HYPERVISOR_GUEST
-
-#include <asm/kvm_para.h>
-#include <asm/x86_init.h>
-#include <asm/xen/hypervisor.h>
-
-/*
- * x86 hypervisor information
- */
-
+/* x86 hypervisor types  */
 enum x86_hypervisor_type {
        X86_HYPER_NATIVE = 0,
        X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
        X86_HYPER_KVM,
 };
 
+#ifdef CONFIG_HYPERVISOR_GUEST
+
+#include <asm/kvm_para.h>
+#include <asm/x86_init.h>
+#include <asm/xen/hypervisor.h>
+
 struct hypervisor_x86 {
        /* Hypervisor name */
        const char      *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
 
 extern enum x86_hypervisor_type x86_hyper_type;
 extern void init_hypervisor_platform(void);
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+       return x86_hyper_type == type;
+}
 #else
 static inline void init_hypervisor_platform(void) { }
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+       return type == X86_HYPER_NATIVE;
+}
 #endif /* CONFIG_HYPERVISOR_GUEST */
 #endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644 (file)
index 0000000..989cfa8
--- /dev/null
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INVPCID
+#define _ASM_X86_INVPCID
+
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+                            unsigned long type)
+{
+       struct { u64 d[2]; } desc = { { pcid, addr } };
+
+       /*
+        * The memory clobber is because the whole point is to invalidate
+        * stale TLB entries and, especially if we're flushing global
+        * mappings, we don't want the compiler to reorder any subsequent
+        * memory accesses before the TLB flush.
+        *
+        * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+        * invpcid (%rcx), %rax in long mode.
+        */
+       asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+                     : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR                0
+#define INVPCID_TYPE_SINGLE_CTXT       1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL   2
+#define INVPCID_TYPE_ALL_NON_GLOBAL    3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+                                    unsigned long addr)
+{
+       __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+       __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+       __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
+#endif /* _ASM_X86_INVPCID */
index c8ef23f..89f0895 100644 (file)
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
        swapgs;                                 \
        sysretl
 
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(x)          pushfq; popq %rax
+#endif
 #else
 #define INTERRUPT_RETURN               iret
 #define ENABLE_INTERRUPTS_SYSEXIT      sti; sysexit
index f86a8ca..395c963 100644 (file)
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
 extern void __show_regs(struct pt_regs *regs, int all);
+extern void show_iret_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
index 9ea26f1..5ff3e8a 100644 (file)
@@ -3,6 +3,7 @@
 #define _ASM_X86_MMU_H
 
 #include <linux/spinlock.h>
+#include <linux/rwsem.h>
 #include <linux/mutex.h>
 #include <linux/atomic.h>
 
@@ -27,7 +28,8 @@ typedef struct {
        atomic64_t tlb_gen;
 
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
-       struct ldt_struct *ldt;
+       struct rw_semaphore     ldt_usr_sem;
+       struct ldt_struct       *ldt;
 #endif
 
 #ifdef CONFIG_X86_64
index 6d16d15..5ede7ca 100644 (file)
@@ -57,11 +57,17 @@ struct ldt_struct {
 /*
  * Used for LDT copy/destruction.
  */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
+static inline void init_new_context_ldt(struct mm_struct *mm)
+{
+       mm->context.ldt = NULL;
+       init_rwsem(&mm->context.ldt_usr_sem);
+}
+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
 void destroy_context_ldt(struct mm_struct *mm);
 #else  /* CONFIG_MODIFY_LDT_SYSCALL */
-static inline int init_new_context_ldt(struct task_struct *tsk,
-                                      struct mm_struct *mm)
+static inline void init_new_context_ldt(struct mm_struct *mm) { }
+static inline int ldt_dup_context(struct mm_struct *oldmm,
+                                 struct mm_struct *mm)
 {
        return 0;
 }
@@ -132,18 +138,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 static inline int init_new_context(struct task_struct *tsk,
                                   struct mm_struct *mm)
 {
+       mutex_init(&mm->context.lock);
+
        mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
        atomic64_set(&mm->context.tlb_gen, 0);
 
-       #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
        if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
                /* pkey 0 is the default and always allocated */
                mm->context.pkey_allocation_map = 0x1;
                /* -1 means unallocated or invalid */
                mm->context.execute_only_pkey = -1;
        }
-       #endif
-       return init_new_context_ldt(tsk, mm);
+#endif
+       init_new_context_ldt(mm);
+       return 0;
 }
 static inline void destroy_context(struct mm_struct *mm)
 {
@@ -176,10 +185,10 @@ do {                                              \
 } while (0)
 #endif
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
        paravirt_arch_dup_mmap(oldmm, mm);
+       return ldt_dup_context(oldmm, mm);
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
@@ -281,33 +290,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
        return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
-/*
- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
- * bits.  This serves two purposes.  It prevents a nasty situation in
- * which PCID-unaware code saves CR3, loads some other value (with PCID
- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
- * the saved ASID was nonzero.  It also means that any bugs involving
- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
- * deterministically.
- */
-
-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
-{
-       if (static_cpu_has(X86_FEATURE_PCID)) {
-               VM_WARN_ON_ONCE(asid > 4094);
-               return __sme_pa(mm->pgd) | (asid + 1);
-       } else {
-               VM_WARN_ON_ONCE(asid != 0);
-               return __sme_pa(mm->pgd);
-       }
-}
-
-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
-{
-       VM_WARN_ON_ONCE(asid > 4094);
-       return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
-}
-
 /*
  * This can be used from process context to figure out what the value of
  * CR3 is without needing to do a (slow) __read_cr3().
@@ -317,7 +299,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
  */
 static inline unsigned long __get_current_cr3_fast(void)
 {
-       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
                this_cpu_read(cpu_tlbstate.loaded_mm_asid));
 
        /* For now, be very restrictive about when this can be called. */
index 283efca..892df37 100644 (file)
@@ -927,6 +927,15 @@ extern void default_banner(void);
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
                  CLBR_NONE,                                            \
                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(clobbers)                                        \
+       PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
+                 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
+                 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
+                 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#endif
+
 #endif /* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
index f2ca9b2..ce245b0 100644 (file)
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
 #define LAST_PKMAP 1024
 #endif
 
-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))     \
-                   & PMD_MASK)
+/*
+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
+ * to avoid include recursion hell
+ */
+#define CPU_ENTRY_AREA_PAGES   (NR_CPUS * 40)
+
+#define CPU_ENTRY_AREA_BASE                            \
+       ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
+
+#define PKMAP_BASE             \
+       ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
 
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END   (PKMAP_BASE - 2 * PAGE_SIZE)
 #else
-# define VMALLOC_END   (FIXADDR_START - 2 * PAGE_SIZE)
+# define VMALLOC_END   (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
 #endif
 
 #define MODULES_VADDR  VMALLOC_START
index 6d5f45d..3d27831 100644 (file)
@@ -76,32 +76,41 @@ typedef struct { pteval_t pte; } pte_t;
 #define PGDIR_MASK     (~(PGDIR_SIZE - 1))
 
 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#define MAXMEM         _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define MAXMEM                 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+
 #ifdef CONFIG_X86_5LEVEL
-#define VMALLOC_SIZE_TB _AC(16384, UL)
-#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+# define VMALLOC_SIZE_TB       _AC(16384, UL)
+# define __VMALLOC_BASE                _AC(0xff92000000000000, UL)
+# define __VMEMMAP_BASE                _AC(0xffd4000000000000, UL)
 #else
-#define VMALLOC_SIZE_TB        _AC(32, UL)
-#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+# define VMALLOC_SIZE_TB       _AC(32, UL)
+# define __VMALLOC_BASE                _AC(0xffffc90000000000, UL)
+# define __VMEMMAP_BASE                _AC(0xffffea0000000000, UL)
 #endif
+
 #ifdef CONFIG_RANDOMIZE_MEMORY
-#define VMALLOC_START  vmalloc_base
-#define VMEMMAP_START  vmemmap_base
+# define VMALLOC_START         vmalloc_base
+# define VMEMMAP_START         vmemmap_base
 #else
-#define VMALLOC_START  __VMALLOC_BASE
-#define VMEMMAP_START  __VMEMMAP_BASE
+# define VMALLOC_START         __VMALLOC_BASE
+# define VMEMMAP_START         __VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
-#define VMALLOC_END    (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+
+#define VMALLOC_END            (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+
+#define MODULES_VADDR          (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 /* The module sections ends with the start of the fixmap */
-#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-#define EFI_VA_START    ( -4 * (_AC(1, UL) << 30))
-#define EFI_VA_END      (-68 * (_AC(1, UL) << 30))
+#define MODULES_END            __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_LEN            (MODULES_END - MODULES_VADDR)
+
+#define ESPFIX_PGD_ENTRY       _AC(-2, UL)
+#define ESPFIX_BASE_ADDR       (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+
+#define CPU_ENTRY_AREA_PGD     _AC(-3, UL)
+#define CPU_ENTRY_AREA_BASE    (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+
+#define EFI_VA_START           ( -4 * (_AC(1, UL) << 30))
+#define EFI_VA_END             (-68 * (_AC(1, UL) << 30))
 
 #define EARLY_DYNAMIC_PAGE_TABLES      64
 
index cc16fa8..cad8dab 100644 (file)
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
 extern struct cpuinfo_x86      boot_cpu_data;
 extern struct cpuinfo_x86      new_cpu_data;
 
-extern struct tss_struct       doublefault_tss;
-extern __u32                   cpu_caps_cleared[NCAPINTS];
-extern __u32                   cpu_caps_set[NCAPINTS];
+extern struct x86_hw_tss       doublefault_tss;
+extern __u32                   cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32                   cpu_caps_set[NCAPINTS + NBUGINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
        write_cr3(__sme_pa(pgdir));
 }
 
+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
+ * unrelated to the task-switch mechanism:
+ */
 #ifdef CONFIG_X86_32
 /* This is the TSS defined by the hardware. */
 struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
 struct x86_hw_tss {
        u32                     reserved1;
        u64                     sp0;
+
+       /*
+        * We store cpu_current_top_of_stack in sp1 so it's always accessible.
+        * Linux does not use ring 1, so sp1 is not otherwise needed.
+        */
        u64                     sp1;
+
        u64                     sp2;
        u64                     reserved2;
        u64                     ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
 #define IO_BITMAP_BITS                 65536
 #define IO_BITMAP_BYTES                        (IO_BITMAP_BITS/8)
 #define IO_BITMAP_LONGS                        (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET               offsetof(struct tss_struct, io_bitmap)
+#define IO_BITMAP_OFFSET               (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
 #define INVALID_IO_BITMAP_OFFSET       0x8000
 
+struct entry_stack {
+       unsigned long           words[64];
+};
+
+struct entry_stack_page {
+       struct entry_stack stack;
+} __aligned(PAGE_SIZE);
+
 struct tss_struct {
        /*
-        * The hardware state:
+        * The fixed hardware portion.  This must not cross a page boundary
+        * at risk of violating the SDM's advice and potentially triggering
+        * errata.
         */
        struct x86_hw_tss       x86_tss;
 
@@ -339,18 +360,9 @@ struct tss_struct {
         * be within the limit.
         */
        unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+} __aligned(PAGE_SIZE);
 
-#ifdef CONFIG_X86_32
-       /*
-        * Space for the temporary SYSENTER stack.
-        */
-       unsigned long           SYSENTER_stack_canary;
-       unsigned long           SYSENTER_stack[64];
-#endif
-
-} ____cacheline_aligned;
-
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 
 /*
  * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
 
 #ifdef CONFIG_X86_32
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#else
+/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
+#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
 #endif
 
 /*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
 static inline void
 native_load_sp0(unsigned long sp0)
 {
-       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+       this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
 static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
 
 static inline unsigned long current_top_of_stack(void)
 {
-#ifdef CONFIG_X86_64
-       return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
-#else
-       /* sp0 on x86_32 is special in and around vm86 mode. */
+       /*
+        *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
+        *  and around vm86 mode and sp0 on x86_64 is special because of the
+        *  entry trampoline.
+        */
        return this_cpu_read_stable(cpu_current_top_of_stack);
-#endif
 }
 
 static inline bool on_thread_stack(void)
index 8da111b..f737068 100644 (file)
@@ -16,6 +16,7 @@ enum stack_type {
        STACK_TYPE_TASK,
        STACK_TYPE_IRQ,
        STACK_TYPE_SOFTIRQ,
+       STACK_TYPE_ENTRY,
        STACK_TYPE_EXCEPTION,
        STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
 };
@@ -28,6 +29,8 @@ struct stack_info {
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info);
 
+bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+
 int get_stack_info(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info, unsigned long *visit_mask);
 
index 8c6bd68..9b6df68 100644 (file)
@@ -79,10 +79,10 @@ do {                                                                        \
 static inline void refresh_sysenter_cs(struct thread_struct *thread)
 {
        /* Only happens when SEP is enabled, no need to test "SEP"arately: */
-       if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+       if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
                return;
 
-       this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+       this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
        wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
 }
 #endif
@@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
 /* This is used when switching tasks or entering/exiting vm86 mode. */
 static inline void update_sp0(struct task_struct *task)
 {
+       /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
 #ifdef CONFIG_X86_32
        load_sp0(task->thread.sp0);
 #else
-       load_sp0(task_top_of_stack(task));
+       if (static_cpu_has(X86_FEATURE_XENPV))
+               load_sp0(task_top_of_stack(task));
 #endif
 }
 
index 70f4259..0022333 100644 (file)
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
 #else /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
 #endif
 
 #endif
index 877b5c1..e1884cf 100644 (file)
@@ -9,70 +9,66 @@
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 #include <asm/smp.h>
+#include <asm/invpcid.h>
 
-static inline void __invpcid(unsigned long pcid, unsigned long addr,
-                            unsigned long type)
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 {
-       struct { u64 d[2]; } desc = { { pcid, addr } };
-
        /*
-        * The memory clobber is because the whole point is to invalidate
-        * stale TLB entries and, especially if we're flushing global
-        * mappings, we don't want the compiler to reorder any subsequent
-        * memory accesses before the TLB flush.
-        *
-        * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
-        * invpcid (%rcx), %rax in long mode.
+        * Bump the generation count.  This also serves as a full barrier
+        * that synchronizes with switch_mm(): callers are required to order
+        * their read of mm_cpumask after their writes to the paging
+        * structures.
         */
-       asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
-                     : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+       return atomic64_inc_return(&mm->context.tlb_gen);
 }
 
-#define INVPCID_TYPE_INDIV_ADDR                0
-#define INVPCID_TYPE_SINGLE_CTXT       1
-#define INVPCID_TYPE_ALL_INCL_GLOBAL   2
-#define INVPCID_TYPE_ALL_NON_GLOBAL    3
+/* There are 12 bits of space for ASIDS in CR3 */
+#define CR3_HW_ASID_BITS               12
+/*
+ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+ * user/kernel switches
+ */
+#define PTI_CONSUMED_ASID_BITS         0
 
-/* Flush all mappings for a given pcid and addr, not including globals. */
-static inline void invpcid_flush_one(unsigned long pcid,
-                                    unsigned long addr)
-{
-       __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
-}
+#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
+/*
+ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
+ * for them being zero-based.  Another -1 is because ASID 0 is reserved for
+ * use by non-PCID-aware users.
+ */
+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
 
-/* Flush all mappings for a given PCID, not including globals. */
-static inline void invpcid_flush_single_context(unsigned long pcid)
+static inline u16 kern_pcid(u16 asid)
 {
-       __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       /*
+        * If PCID is on, ASID-aware code paths put the ASID+1 into the
+        * PCID bits.  This serves two purposes.  It prevents a nasty
+        * situation in which PCID-unaware code saves CR3, loads some other
+        * value (with PCID == 0), and then restores CR3, thus corrupting
+        * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
+        * that any bugs involving loading a PCID-enabled CR3 with
+        * CR4.PCIDE off will trigger deterministically.
+        */
+       return asid + 1;
 }
 
-/* Flush all mappings, including globals, for all PCIDs. */
-static inline void invpcid_flush_all(void)
+struct pgd_t;
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
 {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+               return __sme_pa(pgd) | kern_pcid(asid);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+               return __sme_pa(pgd);
+       }
 }
 
-/* Flush all mappings for all PCIDs except globals. */
-static inline void invpcid_flush_all_nonglobals(void)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
-}
-
-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
-{
-       u64 new_tlb_gen;
-
-       /*
-        * Bump the generation count.  This also serves as a full barrier
-        * that synchronizes with switch_mm(): callers are required to order
-        * their read of mm_cpumask after their writes to the paging
-        * structures.
-        */
-       smp_mb__before_atomic();
-       new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
-       smp_mb__after_atomic();
-
-       return new_tlb_gen;
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+       return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
 }
 
 #ifdef CONFIG_PARAVIRT
@@ -237,6 +233,9 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 
 extern void initialize_tlbstate_and_flush(void);
 
+/*
+ * flush the entire current user mapping
+ */
 static inline void __native_flush_tlb(void)
 {
        /*
@@ -249,20 +248,12 @@ static inline void __native_flush_tlb(void)
        preempt_enable();
 }
 
-static inline void __native_flush_tlb_global_irq_disabled(void)
-{
-       unsigned long cr4;
-
-       cr4 = this_cpu_read(cpu_tlbstate.cr4);
-       /* clear PGE */
-       native_write_cr4(cr4 & ~X86_CR4_PGE);
-       /* write old PGE again and flush TLBs */
-       native_write_cr4(cr4);
-}
-
+/*
+ * flush everything
+ */
 static inline void __native_flush_tlb_global(void)
 {
-       unsigned long flags;
+       unsigned long cr4, flags;
 
        if (static_cpu_has(X86_FEATURE_INVPCID)) {
                /*
@@ -280,22 +271,36 @@ static inline void __native_flush_tlb_global(void)
         */
        raw_local_irq_save(flags);
 
-       __native_flush_tlb_global_irq_disabled();
+       cr4 = this_cpu_read(cpu_tlbstate.cr4);
+       /* toggle PGE */
+       native_write_cr4(cr4 ^ X86_CR4_PGE);
+       /* write old PGE again and flush TLBs */
+       native_write_cr4(cr4);
 
        raw_local_irq_restore(flags);
 }
 
+/*
+ * flush one page in the user mapping
+ */
 static inline void __native_flush_tlb_single(unsigned long addr)
 {
        asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
 }
 
+/*
+ * flush everything
+ */
 static inline void __flush_tlb_all(void)
 {
-       if (boot_cpu_has(X86_FEATURE_PGE))
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
                __flush_tlb_global();
-       else
+       } else {
+               /*
+                * !PGE -> !PCID (setup_pcid()), thus every flush is total.
+                */
                __flush_tlb();
+       }
 
        /*
         * Note: if we somehow had PCID but not PGE, then this wouldn't work --
@@ -306,6 +311,9 @@ static inline void __flush_tlb_all(void)
         */
 }
 
+/*
+ * flush one page in the kernel mapping
+ */
 static inline void __flush_tlb_one(unsigned long addr)
 {
        count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
index 1fadd31..31051f3 100644 (file)
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
 dotraplinkage void do_stack_segment(struct pt_regs *, long);
 #ifdef CONFIG_X86_64
 dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
index e9cc6fe..c1688c2 100644 (file)
@@ -7,6 +7,9 @@
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
+#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
+#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
+
 struct unwind_state {
        struct stack_info stack_info;
        unsigned long stack_mask;
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
 }
 
 #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
+/*
+ * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
+ * only the iret frame registers are accessible.  Use with caution!
+ */
 static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
 {
        if (unwind_done(state))
index 8ea7827..676b7cf 100644 (file)
@@ -93,4 +93,10 @@ void common(void) {
 
        BLANK();
        DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+       OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
+       DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
 }
index dedf428..fa1261e 100644 (file)
@@ -47,13 +47,8 @@ void foo(void)
        BLANK();
 
        /* Offset from the sysenter stack to tss.sp0 */
-       DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-              offsetofend(struct tss_struct, SYSENTER_stack));
-
-       /* Offset from cpu_tss to SYSENTER_stack */
-       OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
-       /* Size of SYSENTER_stack */
-       DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+       DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+              offsetofend(struct cpu_entry_area, entry_stack_page.stack));
 
 #ifdef CONFIG_CC_STACKPROTECTOR
        BLANK();
index 630212f..bf51e51 100644 (file)
@@ -23,6 +23,9 @@ int main(void)
 #ifdef CONFIG_PARAVIRT
        OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
        OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+#ifdef CONFIG_DEBUG_ENTRY
+       OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
+#endif
        BLANK();
 #endif
 
@@ -63,6 +66,7 @@ int main(void)
 
        OFFSET(TSS_ist, tss_struct, x86_tss.ist);
        OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+       OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
        BLANK();
 
 #ifdef CONFIG_CC_STACKPROTECTOR
index fa998ca..c9757f0 100644 (file)
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
        return NULL;            /* Not found */
 }
 
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
 
 void load_percpu_segment(int cpu)
 {
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
        load_stack_canary_segment();
 }
 
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
-       /* On 64-bit systems, we use a read-only fixmap GDT. */
-       pgprot_t prot = PAGE_KERNEL_RO;
-#else
-       /*
-        * On native 32-bit systems, the GDT cannot be read-only because
-        * our double fault handler uses a task gate, and entering through
-        * a task gate needs to change an available TSS to busy.  If the GDT
-        * is read-only, that will triple fault.
-        *
-        * On Xen PV, the GDT must be read-only because the hypervisor requires
-        * it.
-        */
-       pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-               PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 #endif
 
-       __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+};
+#endif
 
 /* Load the original GDT from the per-cpu structure */
 void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
 {
        int i;
 
-       for (i = 0; i < NCAPINTS; i++) {
+       for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
                c->x86_capability[i] &= ~cpu_caps_cleared[i];
                c->x86_capability[i] |= cpu_caps_set[i];
        }
@@ -1250,7 +1245,7 @@ void enable_sep_cpu(void)
                return;
 
        cpu = get_cpu();
-       tss = &per_cpu(cpu_tss, cpu);
+       tss = &per_cpu(cpu_tss_rw, cpu);
 
        /*
         * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1254,7 @@ void enable_sep_cpu(void)
 
        tss->x86_tss.ss1 = __KERNEL_CS;
        wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
-       wrmsr(MSR_IA32_SYSENTER_ESP,
-             (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
-             0);
-
+       wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
        wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
        put_cpu();
@@ -1357,25 +1348,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
-         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
+       extern char _entry_trampoline[];
+       extern char entry_SYSCALL_64_trampoline[];
+
+       int cpu = smp_processor_id();
+       unsigned long SYSCALL64_entry_trampoline =
+               (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+               (entry_SYSCALL_64_trampoline - _entry_trampoline);
+
        wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
-       wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+       wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
 
 #ifdef CONFIG_IA32_EMULATION
        wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1371,7 @@ void syscall_init(void)
         * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
         */
        wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
        wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
 #else
        wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1515,7 @@ void cpu_init(void)
        if (cpu)
                load_ucode_ap();
 
-       t = &per_cpu(cpu_tss, cpu);
+       t = &per_cpu(cpu_tss_rw, cpu);
        oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1569,7 +1554,7 @@ void cpu_init(void)
         * set up and load the per-CPU TSS
         */
        if (!oist->ist[0]) {
-               char *estacks = per_cpu(exception_stacks, cpu);
+               char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
 
                for (v = 0; v < N_EXCEPTION_STACKS; v++) {
                        estacks += exception_stack_sizes[v];
@@ -1580,7 +1565,7 @@ void cpu_init(void)
                }
        }
 
-       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+       t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
        /*
         * <= is required because the CPU will access up to
@@ -1596,11 +1581,12 @@ void cpu_init(void)
        enter_lazy_tlb(&init_mm, me);
 
        /*
-        * Initialize the TSS.  Don't bother initializing sp0, as the initial
-        * task never enters user mode.
+        * Initialize the TSS.  sp0 points to the entry trampoline stack
+        * regardless of what task is running.
         */
-       set_tss_desc(cpu, t);
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
        load_TR_desc();
+       load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
 
        load_mm_ldt(&init_mm);
 
@@ -1612,7 +1598,6 @@ void cpu_init(void)
        if (is_uv_system())
                uv_cpu_init();
 
-       setup_fixmap_gdt(cpu);
        load_fixmap_gdt(cpu);
 }
 
@@ -1622,7 +1607,7 @@ void cpu_init(void)
 {
        int cpu = smp_processor_id();
        struct task_struct *curr = current;
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
 
        wait_for_master_cpu(cpu);
 
@@ -1657,12 +1642,12 @@ void cpu_init(void)
         * Initialize the TSS.  Don't bother initializing sp0, as the initial
         * task never enters user mode.
         */
-       set_tss_desc(cpu, t);
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
        load_TR_desc();
 
        load_mm_ldt(&init_mm);
 
-       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+       t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
 #ifdef CONFIG_DOUBLEFAULT
        /* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1659,6 @@ void cpu_init(void)
 
        fpu__init_cpu();
 
-       setup_fixmap_gdt(cpu);
        load_fixmap_gdt(cpu);
 }
 #endif
index 7dbcb7a..8ccdca6 100644 (file)
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
 }
 #else
 
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
-       __native_flush_tlb_global_irq_disabled();
-}
-
 static inline void print_ucode(struct ucode_cpu_info *uci)
 {
        struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
        if (rev != mc->hdr.rev)
                return -1;
 
-#ifdef CONFIG_X86_64
-       /* Flush global tlb. This is precaution. */
-       flush_tlb_early();
-#endif
        uci->cpu_sig.rev = rev;
 
        if (early)
index 0e662c5..0b8cedb 100644 (file)
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
                cpu_relax();
 }
 
-struct tss_struct doublefault_tss __cacheline_aligned = {
-       .x86_tss = {
-               .sp0            = STACK_START,
-               .ss0            = __KERNEL_DS,
-               .ldt            = 0,
-               .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
-
-               .ip             = (unsigned long) doublefault_fn,
-               /* 0x2 bit is always set */
-               .flags          = X86_EFLAGS_SF | 0x2,
-               .sp             = STACK_START,
-               .es             = __USER_DS,
-               .cs             = __KERNEL_CS,
-               .ss             = __KERNEL_DS,
-               .ds             = __USER_DS,
-               .fs             = __KERNEL_PERCPU,
-
-               .__cr3          = __pa_nodebug(swapper_pg_dir),
-       }
+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
+       .sp0            = STACK_START,
+       .ss0            = __KERNEL_DS,
+       .ldt            = 0,
+       .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+
+       .ip             = (unsigned long) doublefault_fn,
+       /* 0x2 bit is always set */
+       .flags          = X86_EFLAGS_SF | 0x2,
+       .sp             = STACK_START,
+       .es             = __USER_DS,
+       .cs             = __KERNEL_CS,
+       .ss             = __KERNEL_DS,
+       .ds             = __USER_DS,
+       .fs             = __KERNEL_PERCPU,
+
+       .__cr3          = __pa_nodebug(swapper_pg_dir),
 };
 
 /* dummy for do_double_fault() call */
index f13b4c0..36b17e0 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/nmi.h>
 #include <linux/sysfs.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
 
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
        return true;
 }
 
+bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+{
+       struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+
+       void *begin = ss;
+       void *end = ss + 1;
+
+       if ((void *)stack < begin || (void *)stack >= end)
+               return false;
+
+       info->type      = STACK_TYPE_ENTRY;
+       info->begin     = begin;
+       info->end       = end;
+       info->next_sp   = NULL;
+
+       return true;
+}
+
 static void printk_stack_address(unsigned long address, int reliable,
                                 char *log_lvl)
 {
@@ -50,6 +69,28 @@ static void printk_stack_address(unsigned long address, int reliable,
        printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
 }
 
+void show_iret_regs(struct pt_regs *regs)
+{
+       printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+       printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
+               regs->sp, regs->flags);
+}
+
+static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
+{
+       if (on_stack(info, regs, sizeof(*regs)))
+               __show_regs(regs, 0);
+       else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+                         IRET_FRAME_SIZE)) {
+               /*
+                * When an interrupt or exception occurs in entry code, the
+                * full pt_regs might not have been saved yet.  In that case
+                * just print the iret frame.
+                */
+               show_iret_regs(regs);
+       }
+}
+
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                        unsigned long *stack, char *log_lvl)
 {
@@ -71,31 +112,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
         * - task stack
         * - interrupt stack
         * - HW exception stacks (double fault, nmi, debug, mce)
+        * - entry stack
         *
-        * x86-32 can have up to three stacks:
+        * x86-32 can have up to four stacks:
         * - task stack
         * - softirq stack
         * - hardirq stack
+        * - entry stack
         */
        for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
                const char *stack_name;
 
-               /*
-                * If we overflowed the task stack into a guard page, jump back
-                * to the bottom of the usable stack.
-                */
-               if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
-                       stack = task_stack_page(task);
-
-               if (get_stack_info(stack, task, &stack_info, &visit_mask))
-                       break;
+               if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+                       /*
+                        * We weren't on a valid stack.  It's possible that
+                        * we overflowed a valid stack into a guard page.
+                        * See if the next page up is valid so that we can
+                        * generate some kind of backtrace if this happens.
+                        */
+                       stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
+                       if (get_stack_info(stack, task, &stack_info, &visit_mask))
+                               break;
+               }
 
                stack_name = stack_type_name(stack_info.type);
                if (stack_name)
                        printk("%s <%s>\n", log_lvl, stack_name);
 
-               if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-                       __show_regs(regs, 0);
+               if (regs)
+                       show_regs_safe(&stack_info, regs);
 
                /*
                 * Scan the stack, printing any text addresses we find.  At the
@@ -119,7 +164,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
                        /*
                         * Don't print regs->ip again if it was already printed
-                        * by __show_regs() below.
+                        * by show_regs_safe() below.
                         */
                        if (regs && stack == &regs->ip)
                                goto next;
@@ -155,8 +200,8 @@ next:
 
                        /* if the frame has entry regs, print them */
                        regs = unwind_get_entry_regs(&state);
-                       if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-                               __show_regs(regs, 0);
+                       if (regs)
+                               show_regs_safe(&stack_info, regs);
                }
 
                if (stack_name)
index daefae8..04170f6 100644 (file)
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
        if (type == STACK_TYPE_SOFTIRQ)
                return "SOFTIRQ";
 
+       if (type == STACK_TYPE_ENTRY)
+               return "ENTRY_TRAMPOLINE";
+
        return NULL;
 }
 
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
        if (task != current)
                goto unknown;
 
+       if (in_entry_stack(stack, info))
+               goto recursion_check;
+
        if (in_hardirq_stack(stack, info))
                goto recursion_check;
 
index 88ce2ff..563e28d 100644 (file)
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
        if (type == STACK_TYPE_IRQ)
                return "IRQ";
 
+       if (type == STACK_TYPE_ENTRY) {
+               /*
+                * On 64-bit, we have a generic entry stack that we
+                * use for all the kernel entry points, including
+                * SYSENTER.
+                */
+               return "ENTRY_TRAMPOLINE";
+       }
+
        if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
                return exception_stack_names[type - STACK_TYPE_EXCEPTION];
 
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
        if (in_irq_stack(stack, info))
                goto recursion_check;
 
+       if (in_entry_stack(stack, info))
+               goto recursion_check;
+
        goto unknown;
 
 recursion_check:
index 3feb648..2f72330 100644 (file)
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
         * because the ->io_bitmap_max value must match the bitmap
         * contents:
         */
-       tss = &per_cpu(cpu_tss, get_cpu());
+       tss = &per_cpu(cpu_tss_rw, get_cpu());
 
        if (turn_on)
                bitmap_clear(t->io_bitmap_ptr, from, num);
index 49cfd9f..68e1867 100644 (file)
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
        /* high bit used in ret_from_ code  */
        unsigned vector = ~regs->orig_ax;
 
-       /*
-        * NB: Unlike exception entries, IRQ entries do not reliably
-        * handle context tracking in the low-level entry code.  This is
-        * because syscall entries execute briefly with IRQs on before
-        * updating context tracking state, so we can take an IRQ from
-        * kernel mode with CONTEXT_USER.  The low-level entry code only
-        * updates the context if we came from user mode, so we won't
-        * switch to CONTEXT_KERNEL.  We'll fix that once the syscall
-        * code is cleaned up enough that we can cleanly defer enabling
-        * IRQs.
-        */
-
        entering_irq();
 
        /* entering_irq() tells RCU that we're not quiescent.  Check it. */
index 020efbf..d86e344 100644 (file)
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
        if (regs->sp >= estack_top && regs->sp <= estack_bottom)
                return;
 
-       WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
+       WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
                current->comm, curbase, regs->sp,
                irq_stack_top, irq_stack_bottom,
-               estack_top, estack_bottom);
+               estack_top, estack_bottom, (void *)regs->ip);
 
        if (sysctl_panic_on_stackoverflow)
                panic("low stack detected by irq handler - check messages\n");
index 1c1eae9..a6b5d62 100644 (file)
@@ -5,6 +5,11 @@
  * Copyright (C) 2002 Andi Kleen
  *
  * This handles calls from both 32bit and 64bit mode.
+ *
+ * Lock order:
+ *     contex.ldt_usr_sem
+ *       mmap_sem
+ *         context.lock
  */
 
 #include <linux/errno.h>
@@ -42,7 +47,7 @@ static void refresh_ldt_segments(void)
 #endif
 }
 
-/* context.lock is held for us, so we don't need any locking. */
+/* context.lock is held by the task which issued the smp function call */
 static void flush_ldt(void *__mm)
 {
        struct mm_struct *mm = __mm;
@@ -99,15 +104,17 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
        paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
 }
 
-/* context.lock is held */
-static void install_ldt(struct mm_struct *current_mm,
-                       struct ldt_struct *ldt)
+static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
 {
+       mutex_lock(&mm->context.lock);
+
        /* Synchronizes with READ_ONCE in load_mm_ldt. */
-       smp_store_release(&current_mm->context.ldt, ldt);
+       smp_store_release(&mm->context.ldt, ldt);
 
-       /* Activate the LDT for all CPUs using current_mm. */
-       on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
+       /* Activate the LDT for all CPUs using currents mm. */
+       on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
+
+       mutex_unlock(&mm->context.lock);
 }
 
 static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +131,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
 }
 
 /*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
+ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
+ * the new task is not running, so nothing can be installed.
  */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
+int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
 {
        struct ldt_struct *new_ldt;
-       struct mm_struct *old_mm;
        int retval = 0;
 
-       mutex_init(&mm->context.lock);
-       old_mm = current->mm;
-       if (!old_mm) {
-               mm->context.ldt = NULL;
+       if (!old_mm)
                return 0;
-       }
 
        mutex_lock(&old_mm->context.lock);
-       if (!old_mm->context.ldt) {
-               mm->context.ldt = NULL;
+       if (!old_mm->context.ldt)
                goto out_unlock;
-       }
 
        new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
        if (!new_ldt) {
@@ -180,7 +180,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
        unsigned long entries_size;
        int retval;
 
-       mutex_lock(&mm->context.lock);
+       down_read(&mm->context.ldt_usr_sem);
 
        if (!mm->context.ldt) {
                retval = 0;
@@ -209,7 +209,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
        retval = bytecount;
 
 out_unlock:
-       mutex_unlock(&mm->context.lock);
+       up_read(&mm->context.ldt_usr_sem);
        return retval;
 }
 
@@ -269,7 +269,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
                        ldt.avl = 0;
        }
 
-       mutex_lock(&mm->context.lock);
+       if (down_write_killable(&mm->context.ldt_usr_sem))
+               return -EINTR;
 
        old_ldt       = mm->context.ldt;
        old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -291,7 +292,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
        error = 0;
 
 out_unlock:
-       mutex_unlock(&mm->context.lock);
+       up_write(&mm->context.ldt_usr_sem);
 out:
        return error;
 }
index ac0be82..9edadab 100644 (file)
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
 DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_mmu_ops, read_cr2);
                PATCH_SITE(pv_mmu_ops, read_cr3);
                PATCH_SITE(pv_mmu_ops, write_cr3);
-               PATCH_SITE(pv_mmu_ops, flush_tlb_single);
                PATCH_SITE(pv_cpu_ops, wbinvd);
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
                case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
index bb988a2..aed9d94 100644 (file)
@@ -47,7 +47,7 @@
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
        .x86_tss = {
                /*
                 * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
                 * Poison it.
                 */
                .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+
+#ifdef CONFIG_X86_64
+               /*
+                * .sp1 is cpu_current_top_of_stack.  The init task never
+                * runs user code, but cpu_current_top_of_stack should still
+                * be well defined before the first context switch.
+                */
+               .sp1 = TOP_OF_INIT_STACK,
+#endif
+
 #ifdef CONFIG_X86_32
                .ss0 = __KERNEL_DS,
                .ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
          */
        .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
-#ifdef CONFIG_X86_32
-       .SYSENTER_stack_canary  = STACK_END_MAGIC,
-#endif
 };
-EXPORT_PER_CPU_SYMBOL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
 
 DEFINE_PER_CPU(bool, __tss_limit_invalid);
 EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
        struct fpu *fpu = &t->fpu;
 
        if (bp) {
-               struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
+               struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
 
                t->io_bitmap_ptr = NULL;
                clear_thread_flag(TIF_IO_BITMAP);
index 45bf0c5..5224c60 100644 (file)
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct fpu *prev_fpu = &prev->fpu;
        struct fpu *next_fpu = &next->fpu;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
        /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
index eeeb34f..c754662 100644 (file)
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
        unsigned int fsindex, gsindex;
        unsigned int ds, cs, es;
 
-       printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
-       printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
-               regs->sp, regs->flags);
+       show_iret_regs(regs);
+
        if (regs->orig_ax != -1)
                pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
        else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
        printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
               regs->r13, regs->r14, regs->r15);
 
+       if (!all)
+               return;
+
        asm("movl %%ds,%0" : "=r" (ds));
        asm("movl %%cs,%0" : "=r" (cs));
        asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
        rdmsrl(MSR_GS_BASE, gs);
        rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 
-       if (!all)
-               return;
-
        cr0 = read_cr0();
        cr2 = read_cr2();
        cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct fpu *prev_fpu = &prev->fpu;
        struct fpu *next_fpu = &next->fpu;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
        WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
                     this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * Switch the PDA and FPU contexts.
         */
        this_cpu_write(current_task, next_p);
+       this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
 
        /* Reload sp0. */
        update_sp0(next_p);
index 35cb209..c5970ef 100644 (file)
@@ -932,12 +932,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
        initial_code = (unsigned long)start_secondary;
        initial_stack  = idle->thread.sp;
 
-       /*
-        * Enable the espfix hack for this CPU
-       */
-#ifdef CONFIG_X86_ESPFIX64
+       /* Enable the espfix hack for this CPU */
        init_espfix_ap(cpu);
-#endif
 
        /* So we see what's up */
        announce_cpu(cpu, apicid);
index 989514c..f69dbd4 100644 (file)
@@ -51,6 +51,7 @@
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/fpu/internal.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/mce.h>
 #include <asm/fixmap.h>
 #include <asm/mach_traps.h>
@@ -348,9 +349,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 
        /*
         * If IRET takes a non-IST fault on the espfix64 stack, then we
-        * end up promoting it to a doublefault.  In that case, modify
-        * the stack to make it look like we just entered the #GP
-        * handler from user space, similar to bad_iret.
+        * end up promoting it to a doublefault.  In that case, take
+        * advantage of the fact that we're not using the normal (TSS.sp0)
+        * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
+        * and then modify our own IRET frame so that, when we return,
+        * we land directly at the #GP(0) vector with the stack already
+        * set up according to its expectations.
+        *
+        * The net result is that our #GP handler will think that we
+        * entered from usermode with the bad user context.
         *
         * No need for ist_enter here because we don't use RCU.
         */
@@ -358,13 +365,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
                regs->cs == __KERNEL_CS &&
                regs->ip == (unsigned long)native_irq_return_iret)
        {
-               struct pt_regs *normal_regs = task_pt_regs(current);
+               struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
-               /* Fake a #GP(0) from userspace. */
-               memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
-               normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
+               /*
+                * regs->sp points to the failing IRET frame on the
+                * ESPFIX64 stack.  Copy it to the entry stack.  This fills
+                * in gpregs->ss through gpregs->ip.
+                *
+                */
+               memmove(&gpregs->ip, (void *)regs->sp, 5*8);
+               gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */
+
+               /*
+                * Adjust our frame so that we return straight to the #GP
+                * vector with the expected RSP value.  This is safe because
+                * we won't enable interupts or schedule before we invoke
+                * general_protection, so nothing will clobber the stack
+                * frame we just set up.
+                */
                regs->ip = (unsigned long)general_protection;
-               regs->sp = (unsigned long)&normal_regs->orig_ax;
+               regs->sp = (unsigned long)&gpregs->orig_ax;
 
                return;
        }
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
         *
         *   Processors update CR2 whenever a page fault is detected. If a
         *   second page fault occurs while an earlier page fault is being
-        *   delivered, the faulting linear address of the second fault will
+        *   delivered, the faulting linear address of the second fault will
         *   overwrite the contents of CR2 (replacing the previous
         *   address). These updates to CR2 occur even if the page fault
         *   results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
 
 #ifdef CONFIG_X86_64
 /*
- * Help handler running on IST stack to switch off the IST stack if the
- * interrupted code was in user mode. The actual stack switch is done in
- * entry_64.S
+ * Help handler running on a per-cpu (IST or entry trampoline) stack
+ * to switch to the normal thread stack if the interrupted code was in
+ * user mode. The actual stack switch is done in entry_64.S
  */
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
-       struct pt_regs *regs = task_pt_regs(current);
-       *regs = *eregs;
+       struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
+       if (regs != eregs)
+               *regs = *eregs;
        return regs;
 }
 NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
        /*
         * This is called from entry_64.S early in handling a fault
         * caused by a bad iret to user mode.  To handle the fault
-        * correctly, we want move our stack frame to task_pt_regs
-        * and we want to pretend that the exception came from the
-        * iret target.
+        * correctly, we want to move our stack frame to where it would
+        * be had we entered directly on the entry stack (rather than
+        * just below the IRET frame) and we want to pretend that the
+        * exception came from the IRET target.
         */
        struct bad_iret_stack *new_stack =
-               container_of(task_pt_regs(current),
-                            struct bad_iret_stack, regs);
+               (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
        /* Copy the IRET target to the new stack. */
        memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
        debug_stack_usage_dec();
 
 exit:
-#if defined(CONFIG_X86_32)
-       /*
-        * This is the most likely code path that involves non-trivial use
-        * of the SYSENTER stack.  Check that we haven't overrun it.
-        */
-       WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
-            "Overran or corrupted SYSENTER stack\n");
-#endif
        ist_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 
 void __init trap_init(void)
 {
+       /* Init cpu_entry_area before IST entries are set up */
+       setup_cpu_entry_areas();
+
        idt_setup_traps();
 
        /*
@@ -936,8 +952,9 @@ void __init trap_init(void)
         * "sidt" instruction will not leak the location of the kernel, and
         * to defend the IDT against arbitrary memory write vulnerabilities.
         * It will be reloaded in cpu_init() */
-       __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
-       idt_descr.address = fix_to_virt(FIX_RO_IDT);
+       cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
+                   PAGE_KERNEL_RO);
+       idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
 
        /*
         * Should be a barrier for any external CPU state:
index a3f973b..be86a86 100644 (file)
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
        return NULL;
 }
 
-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
+static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
                            size_t len)
 {
        struct stack_info *info = &state->stack_info;
+       void *addr = (void *)_addr;
 
-       /*
-        * If the address isn't on the current stack, switch to the next one.
-        *
-        * We may have to traverse multiple stacks to deal with the possibility
-        * that info->next_sp could point to an empty stack and the address
-        * could be on a subsequent stack.
-        */
-       while (!on_stack(info, (void *)addr, len))
-               if (get_stack_info(info->next_sp, state->task, info,
-                                  &state->stack_mask))
-                       return false;
+       if (!on_stack(info, addr, len) &&
+           (get_stack_info(addr, state->task, info, &state->stack_mask)))
+               return false;
 
        return true;
 }
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
        return true;
 }
 
-#define REGS_SIZE (sizeof(struct pt_regs))
-#define SP_OFFSET (offsetof(struct pt_regs, sp))
-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
-
 static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
-                            unsigned long *ip, unsigned long *sp, bool full)
+                            unsigned long *ip, unsigned long *sp)
 {
-       size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
-       size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
-       struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
-
-       if (IS_ENABLED(CONFIG_X86_64)) {
-               if (!stack_access_ok(state, addr, regs_size))
-                       return false;
+       struct pt_regs *regs = (struct pt_regs *)addr;
 
-               *ip = regs->ip;
-               *sp = regs->sp;
+       /* x86-32 support will be more complicated due to the &regs->sp hack */
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
 
-               return true;
-       }
-
-       if (!stack_access_ok(state, addr, sp_offset))
+       if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
                return false;
 
        *ip = regs->ip;
+       *sp = regs->sp;
+       return true;
+}
 
-       if (user_mode(regs)) {
-               if (!stack_access_ok(state, addr + sp_offset,
-                                    REGS_SIZE - SP_OFFSET))
-                       return false;
+static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
+                                 unsigned long *ip, unsigned long *sp)
+{
+       struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
 
-               *sp = regs->sp;
-       } else
-               *sp = (unsigned long)&regs->sp;
+       if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
+               return false;
 
+       *ip = regs->ip;
+       *sp = regs->sp;
        return true;
 }
 
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
        unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
        enum stack_type prev_type = state->stack_info.type;
        struct orc_entry *orc;
-       struct pt_regs *ptregs;
        bool indirect = false;
 
        if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
                break;
 
        case ORC_TYPE_REGS:
-               if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+               if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
                        orc_warn("can't dereference registers at %p for ip %pB\n",
                                 (void *)sp, (void *)orig_ip);
                        goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
                break;
 
        case ORC_TYPE_REGS_IRET:
-               if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+               if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
                        orc_warn("can't dereference iret registers at %p for ip %pB\n",
                                 (void *)sp, (void *)orig_ip);
                        goto done;
                }
 
-               ptregs = container_of((void *)sp, struct pt_regs, ip);
-               if ((unsigned long)ptregs >= prev_sp &&
-                   on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
-                       state->regs = ptregs;
-                       state->full_regs = false;
-               } else
-                       state->regs = NULL;
-
+               state->regs = (void *)sp - IRET_FRAME_OFFSET;
+               state->full_regs = false;
                state->signal = true;
                break;
 
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
        }
 
        if (get_stack_info((unsigned long *)state->sp, state->task,
-                          &state->stack_info, &state->stack_mask))
-               return;
+                          &state->stack_info, &state->stack_mask)) {
+               /*
+                * We weren't on a valid stack.  It's possible that
+                * we overflowed a valid stack into a guard page.
+                * See if the next page up is valid so that we can
+                * generate some kind of backtrace if this happens.
+                */
+               void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
+               if (get_stack_info(next_page, state->task, &state->stack_info,
+                                  &state->stack_mask))
+                       return;
+       }
 
        /*
         * The caller can provide the address of the first frame directly
index a4009fb..d2a8b5a 100644 (file)
@@ -107,6 +107,15 @@ SECTIONS
                SOFTIRQENTRY_TEXT
                *(.fixup)
                *(.gnu.warning)
+
+#ifdef CONFIG_X86_64
+               . = ALIGN(PAGE_SIZE);
+               _entry_trampoline = .;
+               *(.entry_trampoline)
+               . = ALIGN(PAGE_SIZE);
+               ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
+#endif
+
                /* End of text section */
                _etext = .;
        } :text = 0x9090
index abe74f7..b514b2b 100644 (file)
@@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
 }
 
 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
-                                    u64 cr0, u64 cr4)
+                                   u64 cr0, u64 cr3, u64 cr4)
 {
        int bad;
+       u64 pcid;
+
+       /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
+       pcid = 0;
+       if (cr4 & X86_CR4_PCIDE) {
+               pcid = cr3 & 0xfff;
+               cr3 &= ~0xfff;
+       }
+
+       bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+       if (bad)
+               return X86EMUL_UNHANDLEABLE;
 
        /*
         * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
                bad = ctxt->ops->set_cr(ctxt, 4, cr4);
                if (bad)
                        return X86EMUL_UNHANDLEABLE;
+               if (pcid) {
+                       bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+                       if (bad)
+                               return X86EMUL_UNHANDLEABLE;
+               }
+
        }
 
        return X86EMUL_CONTINUE;
@@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
        struct desc_struct desc;
        struct desc_ptr dt;
        u16 selector;
-       u32 val, cr0, cr4;
+       u32 val, cr0, cr3, cr4;
        int i;
 
        cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
-       ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+       cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
        ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
        ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
 
@@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
 
        ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
 
-       return rsm_enter_protected_mode(ctxt, cr0, cr4);
+       return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
 }
 
 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 {
        struct desc_struct desc;
        struct desc_ptr dt;
-       u64 val, cr0, cr4;
+       u64 val, cr0, cr3, cr4;
        u32 base3;
        u16 selector;
        int i, r;
@@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
        ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
 
        cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
-       ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
+       cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
        cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
        ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
        val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
        dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
        ctxt->ops->set_gdt(ctxt, &dt);
 
-       r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+       r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
        if (r != X86EMUL_CONTINUE)
                return r;
 
index e5e66e5..c4deb1f 100644 (file)
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                spin_lock(&vcpu->kvm->mmu_lock);
                if(make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, 0, 0,
                                vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                        spin_lock(&vcpu->kvm->mmu_lock);
                        if (make_mmu_pages_available(vcpu) < 0) {
                                spin_unlock(&vcpu->kvm->mmu_lock);
-                               return 1;
+                               return -ENOSPC;
                        }
                        sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
                                        i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                spin_lock(&vcpu->kvm->mmu_lock);
                if (make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
                                vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                spin_lock(&vcpu->kvm->mmu_lock);
                if (make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
                                      0, ACC_ALL);
index 8eba631..023afa0 100644 (file)
@@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                 * processors.  See 22.2.4.
                 */
                vmcs_writel(HOST_TR_BASE,
-                           (unsigned long)this_cpu_ptr(&cpu_tss));
+                           (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
                vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
 
                /*
index faf843c..1cec2c6 100644 (file)
@@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
                                         addr, n, v))
                    && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
                        break;
-               trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
                handled += n;
                addr += n;
                len -= n;
@@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
 {
        if (vcpu->mmio_read_completed) {
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
-                              vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+                              vcpu->mmio_fragments[0].gpa, val);
                vcpu->mmio_read_completed = 0;
                return 1;
        }
@@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
 {
-       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
        return vcpu_mmio_write(vcpu, gpa, bytes, val);
 }
 
 static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
                          void *val, int bytes)
 {
-       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
        return X86EMUL_IO_NEEDED;
 }
 
@@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-       struct fpu *fpu = &current->thread.fpu;
        int r;
 
-       fpu__initialize(fpu);
-
        kvm_sigset_activate(vcpu);
 
+       kvm_load_guest_fpu(vcpu);
+
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                if (kvm_run->immediate_exit) {
                        r = -EINTR;
@@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                }
        }
 
-       kvm_load_guest_fpu(vcpu);
-
        if (unlikely(vcpu->arch.complete_userspace_io)) {
                int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
                vcpu->arch.complete_userspace_io = NULL;
                r = cui(vcpu);
                if (r <= 0)
-                       goto out_fpu;
+                       goto out;
        } else
                WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
@@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        else
                r = vcpu_run(vcpu);
 
-out_fpu:
-       kvm_put_guest_fpu(vcpu);
 out:
+       kvm_put_guest_fpu(vcpu);
        post_kvm_run_save(vcpu);
        kvm_sigset_deactivate(vcpu);
 
@@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 #endif
 
        kvm_rip_write(vcpu, regs->rip);
-       kvm_set_rflags(vcpu, regs->rflags);
+       kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
 
        vcpu->arch.exception.pending = false;
 
@@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
+int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+       if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
+               /*
+                * When EFER.LME and CR0.PG are set, the processor is in
+                * 64-bit mode (though maybe in a 32-bit code segment).
+                * CR4.PAE and EFER.LMA must be set.
+                */
+               if (!(sregs->cr4 & X86_CR4_PAE_BIT)
+                   || !(sregs->efer & EFER_LMA))
+                       return -EINVAL;
+       } else {
+               /*
+                * Not in 64-bit mode: EFER.LMA is clear and the code
+                * segment cannot be 64-bit.
+                */
+               if (sregs->efer & EFER_LMA || sregs->cs.l)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                  struct kvm_sregs *sregs)
 {
@@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                        (sregs->cr4 & X86_CR4_OSXSAVE))
                return -EINVAL;
 
+       if (kvm_valid_sregs(vcpu, sregs))
+               return -EINVAL;
+
        apic_base_msr.data = sregs->apic_base;
        apic_base_msr.host_initiated = true;
        if (kvm_set_apic_base(vcpu, &apic_base_msr))
index 553f8fd..4846eff 100644 (file)
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
                delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
 
                /*
-                * Use cpu_tss as a cacheline-aligned, seldomly
+                * Use cpu_tss_rw as a cacheline-aligned, seldomly
                 * accessed per-cpu variable as the monitor target.
                 */
-               __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
+               __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
 
                /*
                 * AMD, like Intel, supports the EAX hint and EAX=0xf
index 8e13b8c..52195ee 100644 (file)
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o   = -pg
 endif
 
 obj-y  :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-           pat.o pgtable.o physaddr.o setup_nx.o tlb.o
+           pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644 (file)
index 0000000..fe814fd
--- /dev/null
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/desc.h>
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
+
+#ifdef CONFIG_X86_64
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+#endif
+
+struct cpu_entry_area *get_cpu_entry_area(int cpu)
+{
+       unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+       BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+
+       return (struct cpu_entry_area *) va;
+}
+EXPORT_SYMBOL(get_cpu_entry_area);
+
+void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
+{
+       unsigned long va = (unsigned long) cea_vaddr;
+
+       set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
+}
+
+static void __init
+cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+{
+       for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
+               cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+}
+
+/* Setup the fixmap mappings only once per-processor */
+static void __init setup_cpu_entry_area(int cpu)
+{
+#ifdef CONFIG_X86_64
+       extern char _entry_trampoline[];
+
+       /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+       pgprot_t gdt_prot = PAGE_KERNEL_RO;
+       pgprot_t tss_prot = PAGE_KERNEL_RO;
+#else
+       /*
+        * On native 32-bit systems, the GDT cannot be read-only because
+        * our double fault handler uses a task gate, and entering through
+        * a task gate needs to change an available TSS to busy.  If the
+        * GDT is read-only, that will triple fault.  The TSS cannot be
+        * read-only because the CPU writes to it on task switches.
+        *
+        * On Xen PV, the GDT must be read-only because the hypervisor
+        * requires it.
+        */
+       pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+               PAGE_KERNEL_RO : PAGE_KERNEL;
+       pgprot_t tss_prot = PAGE_KERNEL;
+#endif
+
+       cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
+                   gdt_prot);
+
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+                            per_cpu_ptr(&entry_stack_storage, cpu), 1,
+                            PAGE_KERNEL);
+
+       /*
+        * The Intel SDM says (Volume 3, 7.2.1):
+        *
+        *  Avoid placing a page boundary in the part of the TSS that the
+        *  processor reads during a task switch (the first 104 bytes). The
+        *  processor may not correctly perform address translations if a
+        *  boundary occurs in this area. During a task switch, the processor
+        *  reads and writes into the first 104 bytes of each TSS (using
+        *  contiguous physical addresses beginning with the physical address
+        *  of the first byte of the TSS). So, after TSS access begins, if
+        *  part of the 104 bytes is not physically contiguous, the processor
+        *  will access incorrect information without generating a page-fault
+        *  exception.
+        *
+        * There are also a lot of errata involving the TSS spanning a page
+        * boundary.  Assert that we're not doing that.
+        */
+       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
+                            &per_cpu(cpu_tss_rw, cpu),
+                            sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+
+#ifdef CONFIG_X86_32
+       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+#endif
+
+#ifdef CONFIG_X86_64
+       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+       BUILD_BUG_ON(sizeof(exception_stacks) !=
+                    sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
+                            &per_cpu(exception_stacks, cpu),
+                            sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+
+       cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+#endif
+}
+
+static __init void setup_cpu_entry_area_ptes(void)
+{
+#ifdef CONFIG_X86_32
+       unsigned long start, end;
+
+       BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+       BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
+
+       start = CPU_ENTRY_AREA_BASE;
+       end = start + CPU_ENTRY_AREA_MAP_SIZE;
+
+       /* Careful here: start + PMD_SIZE might wrap around */
+       for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+               populate_extra_pte(start);
+#endif
+}
+
+void __init setup_cpu_entry_areas(void)
+{
+       unsigned int cpu;
+
+       setup_cpu_entry_area_ptes();
+
+       for_each_possible_cpu(cpu)
+               setup_cpu_entry_area(cpu);
+}
index 5e3ac6f..43dedbf 100644 (file)
@@ -44,10 +44,12 @@ struct addr_marker {
        unsigned long max_lines;
 };
 
-/* indices for address_markers; keep sync'd w/ address_markers below */
+/* Address space markers hints */
+
+#ifdef CONFIG_X86_64
+
 enum address_markers_idx {
        USER_SPACE_NR = 0,
-#ifdef CONFIG_X86_64
        KERNEL_SPACE_NR,
        LOW_KERNEL_NR,
        VMALLOC_START_NR,
@@ -56,56 +58,74 @@ enum address_markers_idx {
        KASAN_SHADOW_START_NR,
        KASAN_SHADOW_END_NR,
 #endif
-# ifdef CONFIG_X86_ESPFIX64
+       CPU_ENTRY_AREA_NR,
+#ifdef CONFIG_X86_ESPFIX64
        ESPFIX_START_NR,
-# endif
+#endif
+#ifdef CONFIG_EFI
+       EFI_END_NR,
+#endif
        HIGH_KERNEL_NR,
        MODULES_VADDR_NR,
        MODULES_END_NR,
-#else
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+       [USER_SPACE_NR]         = { 0,                  "User Space" },
+       [KERNEL_SPACE_NR]       = { (1UL << 63),        "Kernel Space" },
+       [LOW_KERNEL_NR]         = { 0UL,                "Low Kernel Mapping" },
+       [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
+       [VMEMMAP_START_NR]      = { 0UL,                "Vmemmap" },
+#ifdef CONFIG_KASAN
+       [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+       [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
+#endif
+       [CPU_ENTRY_AREA_NR]     = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+#ifdef CONFIG_X86_ESPFIX64
+       [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
+#endif
+#ifdef CONFIG_EFI
+       [EFI_END_NR]            = { EFI_VA_END,         "EFI Runtime Services" },
+#endif
+       [HIGH_KERNEL_NR]        = { __START_KERNEL_map, "High Kernel Mapping" },
+       [MODULES_VADDR_NR]      = { MODULES_VADDR,      "Modules" },
+       [MODULES_END_NR]        = { MODULES_END,        "End Modules" },
+       [FIXADDR_START_NR]      = { FIXADDR_START,      "Fixmap Area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
+};
+
+#else /* CONFIG_X86_64 */
+
+enum address_markers_idx {
+       USER_SPACE_NR = 0,
        KERNEL_SPACE_NR,
        VMALLOC_START_NR,
        VMALLOC_END_NR,
-# ifdef CONFIG_HIGHMEM
+#ifdef CONFIG_HIGHMEM
        PKMAP_BASE_NR,
-# endif
-       FIXADDR_START_NR,
 #endif
+       CPU_ENTRY_AREA_NR,
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
 };
 
-/* Address space markers hints */
 static struct addr_marker address_markers[] = {
-       { 0, "User Space" },
-#ifdef CONFIG_X86_64
-       { 0x8000000000000000UL, "Kernel Space" },
-       { 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
-       { 0/* VMALLOC_START */, "vmalloc() Area" },
-       { 0/* VMEMMAP_START */, "Vmemmap" },
-#ifdef CONFIG_KASAN
-       { KASAN_SHADOW_START,   "KASAN shadow" },
-       { KASAN_SHADOW_END,     "KASAN shadow end" },
+       [USER_SPACE_NR]         = { 0,                  "User Space" },
+       [KERNEL_SPACE_NR]       = { PAGE_OFFSET,        "Kernel Mapping" },
+       [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
+       [VMALLOC_END_NR]        = { 0UL,                "vmalloc() End" },
+#ifdef CONFIG_HIGHMEM
+       [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
 #endif
-# ifdef CONFIG_X86_ESPFIX64
-       { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
-# endif
-# ifdef CONFIG_EFI
-       { EFI_VA_END,           "EFI Runtime Services" },
-# endif
-       { __START_KERNEL_map,   "High Kernel Mapping" },
-       { MODULES_VADDR,        "Modules" },
-       { MODULES_END,          "End Modules" },
-#else
-       { PAGE_OFFSET,          "Kernel Mapping" },
-       { 0/* VMALLOC_START */, "vmalloc() Area" },
-       { 0/*VMALLOC_END*/,     "vmalloc() End" },
-# ifdef CONFIG_HIGHMEM
-       { 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
-# endif
-       { 0/*FIXADDR_START*/,   "Fixmap Area" },
-#endif
-       { -1, NULL }            /* End of list */
+       [CPU_ENTRY_AREA_NR]     = { 0UL,                "CPU entry area" },
+       [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
 };
 
+#endif /* !CONFIG_X86_64 */
+
 /* Multipliers for offsets within the PTEs */
 #define PTE_LEVEL_MULT (PAGE_SIZE)
 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +160,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
        static const char * const level_name[] =
                { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
 
-       if (!pgprot_val(prot)) {
+       if (!(pr & _PAGE_PRESENT)) {
                /* Not present */
                pt_dump_cont_printf(m, dmsg, "                              ");
        } else {
@@ -525,8 +545,8 @@ static int __init pt_dump_init(void)
        address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
 # endif
        address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
+       address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
 #endif
-
        return 0;
 }
 __initcall(pt_dump_init);
index febf698..06fe3d5 100644 (file)
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+       printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), address,
                (void *)regs->ip, (void *)regs->sp, error_code);
index 8a64a6f..135c9a7 100644 (file)
@@ -50,6 +50,7 @@
 #include <asm/setup.h>
 #include <asm/set_memory.h>
 #include <asm/page_types.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/init.h>
 
 #include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
        mem_init_print_info(NULL);
        printk(KERN_INFO "virtual kernel memory layout:\n"
                "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+               "  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HIGHMEM
                "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
                FIXADDR_START, FIXADDR_TOP,
                (FIXADDR_TOP - FIXADDR_START) >> 10,
 
+               CPU_ENTRY_AREA_BASE,
+               CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
+               CPU_ENTRY_AREA_MAP_SIZE >> 10,
+
 #ifdef CONFIG_HIGHMEM
                PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
                (LAST_PKMAP*PAGE_SIZE) >> 10,
index 99dfed6..47388f0 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
+#include <asm/cpu_entry_area.h>
 
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
 void __init kasan_init(void)
 {
        int i;
+       void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
 
 #ifdef CONFIG_KASAN_INLINE
        register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
                map_range(&pfn_mapped[i]);
        }
 
+       shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+       shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+       shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+                                               PAGE_SIZE);
+
+       shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
+                                       CPU_ENTRY_AREA_MAP_SIZE);
+       shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+       shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+                                       PAGE_SIZE);
+
        kasan_populate_zero_shadow(
                kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
-               kasan_mem_to_shadow((void *)__START_KERNEL_map));
+               shadow_cpu_entry_begin);
+
+       kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+                             (unsigned long)shadow_cpu_entry_end, 0);
+
+       kasan_populate_zero_shadow(shadow_cpu_entry_end,
+                               kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
        kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
                              (unsigned long)kasan_mem_to_shadow(_end),
                              early_pfn_to_nid(__pa(_stext)));
 
        kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
-                       (void *)KASAN_SHADOW_END);
+                               (void *)KASAN_SHADOW_END);
 
        load_cr3(init_top_pgt);
        __flush_tlb_all();
index 6b9bf02..c3c5274 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/fixmap.h>
index 3118392..0a1be3a 100644 (file)
@@ -128,7 +128,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         * isn't free.
         */
 #ifdef CONFIG_DEBUG_VM
-       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
+       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
                /*
                 * If we were to BUG here, we'd be very likely to kill
                 * the system so hard that we don't see the call trace.
@@ -195,7 +195,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                if (need_flush) {
                        this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
                        this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-                       write_cr3(build_cr3(next, new_asid));
+                       write_cr3(build_cr3(next->pgd, new_asid));
 
                        /*
                         * NB: This gets called via leave_mm() in the idle path
@@ -208,7 +208,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                        trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
                } else {
                        /* The new ASID is already up to date. */
-                       write_cr3(build_cr3_noflush(next, new_asid));
+                       write_cr3(build_cr3_noflush(next->pgd, new_asid));
 
                        /* See above wrt _rcuidle. */
                        trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +288,7 @@ void initialize_tlbstate_and_flush(void)
                !(cr4_read_shadow() & X86_CR4_PCIDE));
 
        /* Force ASID 0 and force a TLB flush. */
-       write_cr3(build_cr3(mm, 0));
+       write_cr3(build_cr3(mm->pgd, 0));
 
        /* Reinitialize tlbstate. */
        this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +551,7 @@ static void do_kernel_range_flush(void *info)
 
        /* flush range by one by one 'invlpg' */
        for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
-               __flush_tlb_single(addr);
+               __flush_tlb_one(addr);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
index f44c0bc..8538a67 100644 (file)
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
                local_flush_tlb();
                stat->d_alltlb++;
        } else {
-               __flush_tlb_one(msg->address);
+               __flush_tlb_single(msg->address);
                stat->d_onetlb++;
        }
        stat->d_requestee++;
index 36a28ed..a7d9669 100644 (file)
@@ -152,17 +152,19 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
        int cpu = smp_processor_id();
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
        struct desc_struct *desc = get_cpu_gdt_rw(cpu);
        tss_desc tss;
 #endif
-       set_tss_desc(cpu, t);   /*
-                                * This just modifies memory; should not be
-                                * necessary. But... This is necessary, because
-                                * 386 hardware has concept of busy TSS or some
-                                * similar stupidity.
-                                */
+
+       /*
+        * We need to reload TR, which requires that we change the
+        * GDT entry to indicate "available" first.
+        *
+        * XXX: This could probably all be replaced by a call to
+        * force_reload_TR().
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 
 #ifdef CONFIG_X86_64
        memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
index d669e9d..c9081c6 100644 (file)
@@ -1,8 +1,12 @@
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+#include <linux/bootmem.h>
+#endif
 #include <linux/cpu.h>
 #include <linux/kexec.h>
 
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/interface/memory.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
 }
 EXPORT_SYMBOL(xen_arch_unregister_cpu);
 #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+void __init arch_xen_balloon_init(struct resource *hostmem_resource)
+{
+       struct xen_memory_map memmap;
+       int rc;
+       unsigned int i, last_guest_ram;
+       phys_addr_t max_addr = PFN_PHYS(max_pfn);
+       struct e820_table *xen_e820_table;
+       const struct e820_entry *entry;
+       struct resource *res;
+
+       if (!xen_initial_domain())
+               return;
+
+       xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
+       if (!xen_e820_table)
+               return;
+
+       memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
+       set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
+       rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
+       if (rc) {
+               pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
+               goto out;
+       }
+
+       last_guest_ram = 0;
+       for (i = 0; i < memmap.nr_entries; i++) {
+               if (xen_e820_table->entries[i].addr >= max_addr)
+                       break;
+               if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
+                       last_guest_ram = i;
+       }
+
+       entry = &xen_e820_table->entries[last_guest_ram];
+       if (max_addr >= entry->addr + entry->size)
+               goto out; /* No unallocated host RAM. */
+
+       hostmem_resource->start = max_addr;
+       hostmem_resource->end = entry->addr + entry->size;
+
+       /*
+        * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
+        * as unavailable. The rest of that region can be used for hotplug-based
+        * ballooning.
+        */
+       for (; i < memmap.nr_entries; i++) {
+               entry = &xen_e820_table->entries[i];
+
+               if (entry->type == E820_TYPE_RAM)
+                       continue;
+
+               if (entry->addr >= hostmem_resource->end)
+                       break;
+
+               res = kzalloc(sizeof(*res), GFP_KERNEL);
+               if (!res)
+                       goto out;
+
+               res->name = "Unavailable host RAM";
+               res->start = entry->addr;
+               res->end = (entry->addr + entry->size < hostmem_resource->end) ?
+                           entry->addr + entry->size : hostmem_resource->end;
+               rc = insert_resource(hostmem_resource, res);
+               if (rc) {
+                       pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
+                               __func__, res->start, res->end, rc);
+                       kfree(res);
+                       goto  out;
+               }
+       }
+
+ out:
+       kfree(xen_e820_table);
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
index f2414c6..c047f42 100644 (file)
@@ -88,6 +88,8 @@
 #include "multicalls.h"
 #include "pmu.h"
 
+#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
+
 void *xen_initial_gdt;
 
 static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -826,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
        mcs = xen_mc_entry(0);
        MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
        xen_mc_issue(PARAVIRT_LAZY_CPU);
-       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+       this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
 void xen_set_iopl_mask(unsigned mask)
@@ -1258,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
        __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
 
        /* Work out if we support NX */
+       get_cpu_cap(&boot_cpu_data);
        x86_configure_nx();
 
        /* Get mfn list */
index fc048ec..4d62c07 100644 (file)
@@ -1902,6 +1902,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
        /* Graft it onto L4[511][510] */
        copy_page(level2_kernel_pgt, l2);
 
+       /*
+        * Zap execute permission from the ident map. Due to the sharing of
+        * L1 entries we need to do this in the L2.
+        */
+       if (__supported_pte_mask & _PAGE_NX) {
+               for (i = 0; i < PTRS_PER_PMD; ++i) {
+                       if (pmd_none(level2_ident_pgt[i]))
+                               continue;
+                       level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
+               }
+       }
+
        /* Copy the initial P->M table mappings if necessary. */
        i = pgd_index(xen_start_info->mfn_list);
        if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 
        switch (idx) {
        case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-       case FIX_RO_IDT:
 #ifdef CONFIG_X86_32
        case FIX_WP_TEST:
 # ifdef CONFIG_HIGHMEM
@@ -2272,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #endif
        case FIX_TEXT_POKE0:
        case FIX_TEXT_POKE1:
-       case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
                /* All local page mappings */
                pte = pfn_pte(phys, prot);
                break;
index c114ca7..6e0d208 100644 (file)
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
        addr = xen_e820_table.entries[0].addr;
        size = xen_e820_table.entries[0].size;
        while (i < xen_e820_table.nr_entries) {
-               bool discard = false;
 
                chunk_size = size;
                type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
                                xen_add_extra_mem(pfn_s, n_pfns);
                                xen_max_p2m_pfn = pfn_s + n_pfns;
                        } else
-                               discard = true;
+                               type = E820_TYPE_UNUSABLE;
                }
 
-               if (!discard)
-                       xen_align_and_add_e820_region(addr, chunk_size, type);
+               xen_align_and_add_e820_region(addr, chunk_size, type);
 
                addr += chunk_size;
                size -= chunk_size;
index 8bfdea5..9ef6cf3 100644 (file)
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
        bio->bi_disk = bio_src->bi_disk;
        bio->bi_partno = bio_src->bi_partno;
        bio_set_flag(bio, BIO_CLONED);
+       if (bio_flagged(bio_src, BIO_THROTTLED))
+               bio_set_flag(bio, BIO_THROTTLED);
        bio->bi_opf = bio_src->bi_opf;
        bio->bi_write_hint = bio_src->bi_write_hint;
        bio->bi_iter = bio_src->bi_iter;
index b21f8e8..d3a9471 100644 (file)
 #include "blk.h"
 
 /*
- * Append a bio to a passthrough request.  Only works can be merged into
- * the request based on the driver constraints.
+ * Append a bio to a passthrough request.  Only works if the bio can be merged
+ * into the request based on the driver constraints.
  */
-int blk_rq_append_bio(struct request *rq, struct bio *bio)
+int blk_rq_append_bio(struct request *rq, struct bio **bio)
 {
-       blk_queue_bounce(rq->q, &bio);
+       struct bio *orig_bio = *bio;
+
+       blk_queue_bounce(rq->q, bio);
 
        if (!rq->bio) {
-               blk_rq_bio_prep(rq->q, rq, bio);
+               blk_rq_bio_prep(rq->q, rq, *bio);
        } else {
-               if (!ll_back_merge_fn(rq->q, rq, bio))
+               if (!ll_back_merge_fn(rq->q, rq, *bio)) {
+                       if (orig_bio != *bio) {
+                               bio_put(*bio);
+                               *bio = orig_bio;
+                       }
                        return -EINVAL;
+               }
 
-               rq->biotail->bi_next = bio;
-               rq->biotail = bio;
-               rq->__data_len += bio->bi_iter.bi_size;
+               rq->biotail->bi_next = *bio;
+               rq->biotail = *bio;
+               rq->__data_len += (*bio)->bi_iter.bi_size;
        }
 
        return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
         * We link the bounce buffer in and could have to traverse it
         * later so we have to get a ref to prevent it from being freed
         */
-       ret = blk_rq_append_bio(rq, bio);
-       bio_get(bio);
+       ret = blk_rq_append_bio(rq, &bio);
        if (ret) {
-               bio_endio(bio);
                __blk_rq_unmap_user(orig_bio);
-               bio_put(bio);
                return ret;
        }
+       bio_get(bio);
 
        return 0;
 }
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        int reading = rq_data_dir(rq) == READ;
        unsigned long addr = (unsigned long) kbuf;
        int do_copy = 0;
-       struct bio *bio;
+       struct bio *bio, *orig_bio;
        int ret;
 
        if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        if (do_copy)
                rq->rq_flags |= RQF_COPY_USER;
 
-       ret = blk_rq_append_bio(rq, bio);
+       orig_bio = bio;
+       ret = blk_rq_append_bio(rq, &bio);
        if (unlikely(ret)) {
                /* request is too big */
-               bio_put(bio);
+               bio_put(orig_bio);
                return ret;
        }
 
index 825bc29..d19f416 100644 (file)
@@ -2226,13 +2226,7 @@ again:
 out_unlock:
        spin_unlock_irq(q->queue_lock);
 out:
-       /*
-        * As multiple blk-throtls may stack in the same issue path, we
-        * don't want bios to leave with the flag set.  Clear the flag if
-        * being issued.
-        */
-       if (!throttled)
-               bio_clear_flag(bio, BIO_THROTTLED);
+       bio_set_flag(bio, BIO_THROTTLED);
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
        if (throttled || !td->track_bio_latency)
index fceb1a9..1d05c42 100644 (file)
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        unsigned i = 0;
        bool bounce = false;
        int sectors = 0;
+       bool passthrough = bio_is_passthrough(*bio_orig);
 
        bio_for_each_segment(from, *bio_orig, iter) {
                if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        if (!bounce)
                return;
 
-       if (sectors < bio_sectors(*bio_orig)) {
+       if (!passthrough && sectors < bio_sectors(*bio_orig)) {
                bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
                bio_chain(bio, *bio_orig);
                generic_make_request(*bio_orig);
                *bio_orig = bio;
        }
-       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
+       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
+                       bounce_bio_set);
 
        bio_for_each_segment_all(to, bio, i) {
                struct page *page = to->bv_page;
index b4df317..f95c607 100644 (file)
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
        unsigned int cur_domain;
        unsigned int batching;
        wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
+       struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
        atomic_t wait_index[KYBER_NUM_DOMAINS];
 };
 
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
+                            void *key);
+
 static int rq_sched_domain(const struct request *rq)
 {
        unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
        for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
                INIT_LIST_HEAD(&khd->rqs[i]);
+               init_waitqueue_func_entry(&khd->domain_wait[i],
+                                         kyber_domain_wake);
+               khd->domain_wait[i].private = hctx;
                INIT_LIST_HEAD(&khd->domain_wait[i].entry);
                atomic_set(&khd->wait_index[i], 0);
        }
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
        int nr;
 
        nr = __sbitmap_queue_get(domain_tokens);
-       if (nr >= 0)
-               return nr;
 
        /*
         * If we failed to get a domain token, make sure the hardware queue is
         * run when one becomes available. Note that this is serialized on
         * khd->lock, but we still need to be careful about the waker.
         */
-       if (list_empty_careful(&wait->entry)) {
-               init_waitqueue_func_entry(wait, kyber_domain_wake);
-               wait->private = hctx;
+       if (nr < 0 && list_empty_careful(&wait->entry)) {
                ws = sbq_wait_ptr(domain_tokens,
                                  &khd->wait_index[sched_domain]);
+               khd->domain_ws[sched_domain] = ws;
                add_wait_queue(&ws->wait, wait);
 
                /*
                 * Try again in case a token was freed before we got on the wait
-                * queue. The waker may have already removed the entry from the
-                * wait queue, but list_del_init() is okay with that.
+                * queue.
                 */
                nr = __sbitmap_queue_get(domain_tokens);
-               if (nr >= 0) {
-                       unsigned long flags;
+       }
 
-                       spin_lock_irqsave(&ws->wait.lock, flags);
-                       list_del_init(&wait->entry);
-                       spin_unlock_irqrestore(&ws->wait.lock, flags);
-               }
+       /*
+        * If we got a token while we were on the wait queue, remove ourselves
+        * from the wait queue to ensure that all wake ups make forward
+        * progress. It's possible that the waker already deleted the entry
+        * between the !list_empty_careful() check and us grabbing the lock, but
+        * list_del_init() is okay with that.
+        */
+       if (nr >= 0 && !list_empty_careful(&wait->entry)) {
+               ws = khd->domain_ws[sched_domain];
+               spin_lock_irq(&ws->wait.lock);
+               list_del_init(&wait->entry);
+               spin_unlock_irq(&ws->wait.lock);
        }
+
        return nr;
 }
 
index 415a54c..444a387 100644 (file)
@@ -1138,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
                if (!af_alg_readable(sk))
                        break;
 
-               if (!ctx->used) {
-                       err = af_alg_wait_for_data(sk, flags);
-                       if (err)
-                               return err;
-               }
-
                seglen = min_t(size_t, (maxsize - len),
                               msg_data_left(msg));
 
index 48b34e9..ddcc45f 100644 (file)
@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
        size_t usedpages = 0;           /* [in]  RX bufs to be used from user */
        size_t processed = 0;           /* [in]  TX bufs to be consumed */
 
+       if (!ctx->used) {
+               err = af_alg_wait_for_data(sk, flags);
+               if (err)
+                       return err;
+       }
+
        /*
         * Data length provided by caller via sendmsg/sendpage that has not
         * yet been processed.
@@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                /* AIO operation */
                sock_hold(sk);
                areq->iocb = msg->msg_iocb;
+
+               /* Remember output size that will be generated. */
+               areq->outlen = outlen;
+
                aead_request_set_callback(&areq->cra_u.aead_req,
                                          CRYPTO_TFM_REQ_MAY_BACKLOG,
                                          af_alg_async_cb, areq);
@@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                                 crypto_aead_decrypt(&areq->cra_u.aead_req);
 
                /* AIO operation in progress */
-               if (err == -EINPROGRESS || err == -EBUSY) {
-                       /* Remember output size that will be generated. */
-                       areq->outlen = outlen;
-
+               if (err == -EINPROGRESS || err == -EBUSY)
                        return -EIOCBQUEUED;
-               }
 
                sock_put(sk);
        } else {
index 30cff82..baef9bf 100644 (file)
@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
        int err = 0;
        size_t len = 0;
 
+       if (!ctx->used) {
+               err = af_alg_wait_for_data(sk, flags);
+               if (err)
+                       return err;
+       }
+
        /* Allocate cipher request for current operation. */
        areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
                                     crypto_skcipher_reqsize(tfm));
@@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
                /* AIO operation */
                sock_hold(sk);
                areq->iocb = msg->msg_iocb;
+
+               /* Remember output size that will be generated. */
+               areq->outlen = len;
+
                skcipher_request_set_callback(&areq->cra_u.skcipher_req,
                                              CRYPTO_TFM_REQ_MAY_SLEEP,
                                              af_alg_async_cb, areq);
@@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
                        crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
 
                /* AIO operation in progress */
-               if (err == -EINPROGRESS || err == -EBUSY) {
-                       /* Remember output size that will be generated. */
-                       areq->outlen = len;
-
+               if (err == -EINPROGRESS || err == -EBUSY)
                        return -EIOCBQUEUED;
-               }
 
                sock_put(sk);
        } else {
index 4e64726..eca04d3 100644 (file)
@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue,
                pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
                crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
                INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
+               spin_lock_init(&cpu_queue->q_lock);
        }
        return 0;
 }
@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
        int cpu, err;
        struct mcryptd_cpu_queue *cpu_queue;
 
-       cpu = get_cpu();
-       cpu_queue = this_cpu_ptr(queue->cpu_queue);
-       rctx->tag.cpu = cpu;
+       cpu_queue = raw_cpu_ptr(queue->cpu_queue);
+       spin_lock(&cpu_queue->q_lock);
+       cpu = smp_processor_id();
+       rctx->tag.cpu = smp_processor_id();
 
        err = crypto_enqueue_request(&cpu_queue->queue, request);
        pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
                 cpu, cpu_queue, request);
+       spin_unlock(&cpu_queue->q_lock);
        queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
-       put_cpu();
 
        return err;
 }
@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work)
        cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
        i = 0;
        while (i < MCRYPTD_BATCH || single_task_running()) {
-               /*
-                * preempt_disable/enable is used to prevent
-                * being preempted by mcryptd_enqueue_request()
-                */
-               local_bh_disable();
-               preempt_disable();
+
+               spin_lock_bh(&cpu_queue->q_lock);
                backlog = crypto_get_backlog(&cpu_queue->queue);
                req = crypto_dequeue_request(&cpu_queue->queue);
-               preempt_enable();
-               local_bh_enable();
+               spin_unlock_bh(&cpu_queue->q_lock);
 
                if (!req) {
                        mcryptd_opportunistic_flush();
@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work)
                ++i;
        }
        if (cpu_queue->queue.qlen)
-               queue_work(kcrypto_wq, &cpu_queue->work);
+               queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
 }
 
 void mcryptd_flusher(struct work_struct *__work)
index 778e0ff..11af5fd 100644 (file)
@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
 
        walk->total = req->cryptlen;
        walk->nbytes = 0;
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
 
        if (unlikely(!walk->total))
                return 0;
@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
        scatterwalk_start(&walk->in, req->src);
        scatterwalk_start(&walk->out, req->dst);
 
-       walk->iv = req->iv;
-       walk->oiv = req->iv;
-
        walk->flags &= ~SKCIPHER_WALK_SLEEP;
        walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
                       SKCIPHER_WALK_SLEEP : 0;
@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
        int err;
 
        walk->nbytes = 0;
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
 
        if (unlikely(!walk->total))
                return 0;
@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
        scatterwalk_done(&walk->in, 0, walk->total);
        scatterwalk_done(&walk->out, 0, walk->total);
 
-       walk->iv = req->iv;
-       walk->oiv = req->iv;
-
        if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
                walk->flags |= SKCIPHER_WALK_SLEEP;
        else
index 6742f6c..9bff853 100644 (file)
@@ -1007,7 +1007,7 @@ skip:
        /* The record may be cleared by others, try read next record */
        if (len == -ENOENT)
                goto skip;
-       else if (len < sizeof(*rcd)) {
+       else if (len < 0 || len < sizeof(*rcd)) {
                rc = -EIO;
                goto out;
        }
index 30e84cc..06ea474 100644 (file)
@@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
        struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
        struct cpc_register_resource *desired_reg;
        int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
-       struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
+       struct cppc_pcc_data *pcc_ss_data;
        int ret = 0;
 
        if (!cpc_desc || pcc_ss_id < 0) {
index ff2580e..abeb4df 100644 (file)
@@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                                dev_name(&adev_dimm->dev));
                return -ENXIO;
        }
+       /*
+        * Record nfit_mem for the notification path to track back to
+        * the nfit sysfs attributes for this dimm device object.
+        */
+       dev_set_drvdata(&adev_dimm->dev, nfit_mem);
 
        /*
         * Until standardization materializes we need to consider 4
@@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data)
                        sysfs_put(nfit_mem->flags_attr);
                        nfit_mem->flags_attr = NULL;
                }
-               if (adev_dimm)
+               if (adev_dimm) {
                        acpi_remove_notify_handler(adev_dimm->handle,
                                        ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
+                       dev_set_drvdata(&adev_dimm->dev, NULL);
+               }
        }
        mutex_unlock(&acpi_desc->init_mutex);
 }
index ccb9975..ad0477a 100644 (file)
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
 struct nullb_cmd {
        struct list_head list;
        struct llist_node ll_list;
-       call_single_data_t csd;
+       struct __call_single_data csd;
        struct request *rq;
        struct bio *bio;
        unsigned int tag;
+       blk_status_t error;
        struct nullb_queue *nq;
        struct hrtimer timer;
-       blk_status_t error;
 };
 
 struct nullb_queue {
index 647d056..8a1860a 100644 (file)
@@ -1564,6 +1564,9 @@ static void clk_change_rate(struct clk_core *core)
                best_parent_rate = core->parent->rate;
        }
 
+       if (clk_pm_runtime_get(core))
+               return;
+
        if (core->flags & CLK_SET_RATE_UNGATE) {
                unsigned long flags;
 
@@ -1634,6 +1637,8 @@ static void clk_change_rate(struct clk_core *core)
        /* handle the new child who might not be in core->children yet */
        if (core->new_child)
                clk_change_rate(core->new_child);
+
+       clk_pm_runtime_put(core);
 }
 
 static int clk_core_set_rate_nolock(struct clk_core *core,
index a1a6342..f00d875 100644 (file)
@@ -16,6 +16,7 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev,
        return 0;
 }
 
+static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
+                                unsigned long id)
+{
+       sun9i_mmc_reset_assert(rcdev, id);
+       udelay(10);
+       sun9i_mmc_reset_deassert(rcdev, id);
+
+       return 0;
+}
+
 static const struct reset_control_ops sun9i_mmc_reset_ops = {
        .assert         = sun9i_mmc_reset_assert,
        .deassert       = sun9i_mmc_reset_deassert,
+       .reset          = sun9i_mmc_reset_reset,
 };
 
 static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
index 58d4f4e..ca38229 100644 (file)
@@ -22,6 +22,8 @@
 
 #include "cpufreq_governor.h"
 
+#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL      (2 * TICK_NSEC / NSEC_PER_USEC)
+
 static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
 
 static DEFINE_MUTEX(gov_dbs_data_mutex);
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 {
        struct dbs_data *dbs_data = to_dbs_data(attr_set);
        struct policy_dbs_info *policy_dbs;
+       unsigned int sampling_interval;
        int ret;
-       ret = sscanf(buf, "%u", &dbs_data->sampling_rate);
-       if (ret != 1)
+
+       ret = sscanf(buf, "%u", &sampling_interval);
+       if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
                return -EINVAL;
 
+       dbs_data->sampling_rate = sampling_interval;
+
        /*
         * We are operating under dbs_data->mutex and so the list and its
         * entries can't be freed concurrently.
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
        if (ret)
                goto free_policy_dbs_info;
 
-       dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy);
+       /*
+        * The sampling interval should not be less than the transition latency
+        * of the CPU and it also cannot be too small for dbs_update() to work
+        * correctly.
+        */
+       dbs_data->sampling_rate = max_t(unsigned int,
+                                       CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
+                                       cpufreq_policy_transition_delay_us(policy));
 
        if (!have_governor_per_policy())
                gov->gdbs_data = dbs_data;
index 628fe89..d9b2c2d 100644 (file)
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
        val >>= OCOTP_CFG3_SPEED_SHIFT;
        val &= 0x3;
 
-       if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
-            of_machine_is_compatible("fsl,imx6q"))
-               if (dev_pm_opp_disable(dev, 1200000000))
-                       dev_warn(dev, "failed to disable 1.2GHz OPP\n");
        if (val < OCOTP_CFG3_SPEED_996MHZ)
                if (dev_pm_opp_disable(dev, 996000000))
                        dev_warn(dev, "failed to disable 996MHz OPP\n");
-       if (of_machine_is_compatible("fsl,imx6q")) {
+
+       if (of_machine_is_compatible("fsl,imx6q") ||
+           of_machine_is_compatible("fsl,imx6qp")) {
                if (val != OCOTP_CFG3_SPEED_852MHZ)
                        if (dev_pm_opp_disable(dev, 852000000))
                                dev_warn(dev, "failed to disable 852MHz OPP\n");
+               if (val != OCOTP_CFG3_SPEED_1P2GHZ)
+                       if (dev_pm_opp_disable(dev, 1200000000))
+                               dev_warn(dev, "failed to disable 1.2GHz OPP\n");
        }
        iounmap(base);
 put_node:
index 23e771d..e85903e 100644 (file)
@@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset)
        struct gpio_reg *r = to_gpio_reg(gc);
        int irq = r->irqs[offset];
 
-       if (irq >= 0 && r->irq.domain)
-               irq = irq_find_mapping(r->irq.domain, irq);
+       if (irq >= 0 && r->irqdomain)
+               irq = irq_find_mapping(r->irqdomain, irq);
 
        return irq;
 }
index eb4528c..d6f3d9e 100644 (file)
@@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
        }
 
        if (!chip->names)
-               devprop_gpiochip_set_names(chip);
+               devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent));
 
        acpi_gpiochip_request_regions(acpi_gpio);
        acpi_gpiochip_scan_gpios(acpi_gpio);
index 27f383b..f748aa3 100644 (file)
 /**
  * devprop_gpiochip_set_names - Set GPIO line names using device properties
  * @chip: GPIO chip whose lines should be named, if possible
+ * @fwnode: Property Node containing the gpio-line-names property
  *
  * Looks for device property "gpio-line-names" and if it exists assigns
  * GPIO line names for the chip. The memory allocated for the assigned
  * names belong to the underlying firmware node and should not be released
  * by the caller.
  */
-void devprop_gpiochip_set_names(struct gpio_chip *chip)
+void devprop_gpiochip_set_names(struct gpio_chip *chip,
+                               const struct fwnode_handle *fwnode)
 {
        struct gpio_device *gdev = chip->gpiodev;
        const char **names;
        int ret, i;
 
-       if (!chip->parent) {
-               dev_warn(&gdev->dev, "GPIO chip parent is NULL\n");
-               return;
-       }
-
-       ret = device_property_read_string_array(chip->parent, "gpio-line-names",
+       ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
                                                NULL, 0);
        if (ret < 0)
                return;
 
        if (ret != gdev->ngpio) {
-               dev_warn(chip->parent,
+               dev_warn(&gdev->dev,
                         "names %d do not match number of GPIOs %d\n", ret,
                         gdev->ngpio);
                return;
@@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip)
        if (!names)
                return;
 
-       ret = device_property_read_string_array(chip->parent, "gpio-line-names",
+       ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
                                                names, gdev->ngpio);
        if (ret < 0) {
-               dev_warn(chip->parent, "failed to read GPIO line names\n");
+               dev_warn(&gdev->dev, "failed to read GPIO line names\n");
                kfree(names);
                return;
        }
index e0d59e6..72a0695 100644 (file)
@@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip)
 
        /* If the chip defines names itself, these take precedence */
        if (!chip->names)
-               devprop_gpiochip_set_names(chip);
+               devprop_gpiochip_set_names(chip,
+                                          of_fwnode_handle(chip->of_node));
 
        of_node_get(chip->of_node);
 
index af48322..6c44d16 100644 (file)
@@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc)
        return desc - &desc->gdev->descs[0];
 }
 
-void devprop_gpiochip_set_names(struct gpio_chip *chip);
+void devprop_gpiochip_set_names(struct gpio_chip *chip,
+                               const struct fwnode_handle *fwnode);
 
 /* With descriptor prefix */
 
index da43813..5aeb5f8 100644 (file)
@@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
                                  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
                                  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
                                  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
-                                 PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */
+                                 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
                                  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
                                  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
                amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
index f71fe6d..bb5fa89 100644 (file)
@@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                       const struct dm_connector_state *dm_state)
 {
        struct drm_display_mode *preferred_mode = NULL;
-       const struct drm_connector *drm_connector;
+       struct drm_connector *drm_connector;
        struct dc_stream_state *stream = NULL;
        struct drm_display_mode mode = *drm_mode;
        bool native_mode_found = false;
@@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
        if (!aconnector->dc_sink) {
                /*
-                * Exclude MST from creating fake_sink
-                * TODO: need to enable MST into fake_sink feature
+                * Create dc_sink when necessary to MST
+                * Don't apply fake_sink to MST
                 */
-               if (aconnector->mst_port)
-                       goto stream_create_fail;
+               if (aconnector->mst_port) {
+                       dm_dp_mst_dc_sink_create(drm_connector);
+                       goto mst_dc_sink_create_done;
+               }
 
                if (create_fake_sink(aconnector))
                        goto stream_create_fail;
@@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 stream_create_fail:
 dm_state_null:
 drm_connector_null:
+mst_dc_sink_create_done:
        return stream;
 }
 
index 117521c..0230250 100644 (file)
@@ -189,6 +189,8 @@ struct amdgpu_dm_connector {
        struct mutex hpd_lock;
 
        bool fake_enable;
+
+       bool mst_connected;
 };
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
index f8efb98..638c2c2 100644 (file)
@@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector,
        return ret;
 }
 
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
+{
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+       struct edid *edid;
+       struct dc_sink *dc_sink;
+       struct dc_sink_init_data init_params = {
+                       .link = aconnector->dc_link,
+                       .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
+
+       edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
+
+       if (!edid) {
+               drm_mode_connector_update_edid_property(
+                       &aconnector->base,
+                       NULL);
+               return;
+       }
+
+       aconnector->edid = edid;
+
+       dc_sink = dc_link_add_remote_sink(
+               aconnector->dc_link,
+               (uint8_t *)aconnector->edid,
+               (aconnector->edid->extensions + 1) * EDID_LENGTH,
+               &init_params);
+
+       dc_sink->priv = aconnector;
+       aconnector->dc_sink = dc_sink;
+
+       amdgpu_dm_add_sink_to_freesync_module(
+                       connector, aconnector->edid);
+
+       drm_mode_connector_update_edid_property(
+                                       &aconnector->base, aconnector->edid);
+}
+
 static int dm_dp_mst_get_modes(struct drm_connector *connector)
 {
        struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
                        drm_mode_connector_set_path_property(connector, pathprop);
 
                        drm_connector_list_iter_end(&conn_iter);
+                       aconnector->mst_connected = true;
                        return &aconnector->base;
                }
        }
@@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
         */
        amdgpu_dm_connector_funcs_reset(connector);
 
+       aconnector->mst_connected = true;
+
        DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
                        aconnector, connector->base.id, aconnector->mst_port);
 
@@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
        drm_mode_connector_update_edid_property(
                        &aconnector->base,
                        NULL);
+
+       aconnector->mst_connected = false;
 }
 
 static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
@@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
        drm_kms_helper_hotplug_event(dev);
 }
 
+static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
+{
+       mutex_lock(&connector->dev->mode_config.mutex);
+       drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD);
+       mutex_unlock(&connector->dev->mode_config.mutex);
+}
+
 static void dm_dp_mst_register_connector(struct drm_connector *connector)
 {
        struct drm_device *dev = connector->dev;
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 
        if (adev->mode_info.rfbdev)
                drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector);
@@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector)
 
        drm_connector_register(connector);
 
+       if (aconnector->mst_connected)
+               dm_dp_mst_link_status_reset(connector);
 }
 
 static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
index 2da851b..8cf51da 100644 (file)
@@ -31,5 +31,6 @@ struct amdgpu_dm_connector;
 
 void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
                                       struct amdgpu_dm_connector *aconnector);
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector);
 
 #endif
index 3dce35e..b142629 100644 (file)
@@ -900,6 +900,15 @@ bool dcn_validate_bandwidth(
                        v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps;
                        v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c;
                        v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c;
+                       /*
+                        * Spreadsheet doesn't handle taps_c is one properly,
+                        * need to force Chroma to always be scaled to pass
+                        * bandwidth validation.
+                        */
+                       if (v->override_hta_pschroma[input_idx] == 1)
+                               v->override_hta_pschroma[input_idx] = 2;
+                       if (v->override_vta_pschroma[input_idx] == 1)
+                               v->override_vta_pschroma[input_idx] = 2;
                        v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor;
                }
                if (v->is_line_buffer_bpp_fixed == dcn_bw_yes)
index e27ed4a..42a111b 100644 (file)
@@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal)
                link->link_enc->funcs->disable_output(link->link_enc, signal, link);
 }
 
-bool dp_active_dongle_validate_timing(
+static bool dp_active_dongle_validate_timing(
                const struct dc_crtc_timing *timing,
                const struct dc_dongle_caps *dongle_caps)
 {
@@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing(
        /* Check Color Depth and Pixel Clock */
        if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
                required_pix_clk /= 2;
+       else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+               required_pix_clk = required_pix_clk * 2 / 3;
 
        switch (timing->display_color_depth) {
        case COLOR_DEPTH_666:
index 07ff8d2..d844fad 100644 (file)
@@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface(
                int num_planes,
                struct dc_state *context)
 {
-       int i, be_idx;
+       int i;
 
        if (num_planes == 0)
                return;
 
-       be_idx = -1;
        for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (stream == context->res_ctx.pipe_ctx[i].stream) {
-                       be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst;
-                       break;
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+               if (stream == pipe_ctx->stream) {
+                       if (!pipe_ctx->top_pipe &&
+                               (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
+                               dc->hwss.pipe_control_lock(dc, pipe_ctx, true);
                }
        }
 
@@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface(
                                        context->stream_count);
 
                dce110_program_front_end_for_pipe(dc, pipe_ctx);
+
+               dc->hwss.update_plane_addr(dc, pipe_ctx);
+
                program_surface_visibility(dc, pipe_ctx);
 
        }
+
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+               if ((stream == pipe_ctx->stream) &&
+                       (!pipe_ctx->top_pipe) &&
+                       (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
+                       dc->hwss.pipe_control_lock(dc, pipe_ctx, false);
+       }
 }
 
 static void dce110_power_down_fe(struct dc *dc, int fe_idx)
index 74e7c82..a9d55d0 100644 (file)
@@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps(
                        scl_data->taps.h_taps = 1;
                if (IDENTITY_RATIO(scl_data->ratios.vert))
                        scl_data->taps.v_taps = 1;
-               /*
-                * Spreadsheet doesn't handle taps_c is one properly,
-                * need to force Chroma to always be scaled to pass
-                * bandwidth validation.
-                */
+               if (IDENTITY_RATIO(scl_data->ratios.horz_c))
+                       scl_data->taps.h_taps_c = 1;
+               if (IDENTITY_RATIO(scl_data->ratios.vert_c))
+                       scl_data->taps.v_taps_c = 1;
        }
 
        return true;
index 59849f0..1402c0e 100644 (file)
@@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
 
        mutex_lock(&dev->mode_config.idr_mutex);
 
-       /* Insert the new lessee into the tree */
-       id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
-       if (id < 0) {
-               error = id;
-               goto out_lessee;
-       }
-
-       lessee->lessee_id = id;
-       lessee->lessor = drm_master_get(lessor);
-       list_add_tail(&lessee->lessee_list, &lessor->lessees);
-
        idr_for_each_entry(leases, entry, object) {
                error = 0;
                if (!idr_find(&dev->mode_config.crtc_idr, object))
@@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
                }
        }
 
+       /* Insert the new lessee into the tree */
+       id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
+       if (id < 0) {
+               error = id;
+               goto out_lessee;
+       }
+
+       lessee->lessee_id = id;
+       lessee->lessor = drm_master_get(lessor);
+       list_add_tail(&lessee->lessee_list, &lessor->lessees);
+
        /* Move the leases over */
        lessee->leases = *leases;
        DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor);
index 37a93cd..2c90519 100644 (file)
@@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format)
 }
 
 /*
- * setplane_internal - setplane handler for internal callers
+ * __setplane_internal - setplane handler for internal callers
  *
- * Note that we assume an extra reference has already been taken on fb.  If the
- * update fails, this reference will be dropped before return; if it succeeds,
- * the previous framebuffer (if any) will be unreferenced instead.
+ * This function will take a reference on the new fb for the plane
+ * on success.
  *
  * src_{x,y,w,h} are provided in 16.16 fixed point format
  */
@@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane,
        if (!ret) {
                plane->crtc = crtc;
                plane->fb = fb;
-               fb = NULL;
+               drm_framebuffer_get(plane->fb);
        } else {
                plane->old_fb = NULL;
        }
 
 out:
-       if (fb)
-               drm_framebuffer_put(fb);
        if (plane->old_fb)
                drm_framebuffer_put(plane->old_fb);
        plane->old_fb = NULL;
@@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
        struct drm_plane *plane;
        struct drm_crtc *crtc = NULL;
        struct drm_framebuffer *fb = NULL;
+       int ret;
 
        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EINVAL;
@@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
                }
        }
 
-       /*
-        * setplane_internal will take care of deref'ing either the old or new
-        * framebuffer depending on success.
-        */
-       return setplane_internal(plane, crtc, fb,
-                                plane_req->crtc_x, plane_req->crtc_y,
-                                plane_req->crtc_w, plane_req->crtc_h,
-                                plane_req->src_x, plane_req->src_y,
-                                plane_req->src_w, plane_req->src_h);
+       ret = setplane_internal(plane, crtc, fb,
+                               plane_req->crtc_x, plane_req->crtc_y,
+                               plane_req->crtc_w, plane_req->crtc_h,
+                               plane_req->src_x, plane_req->src_y,
+                               plane_req->src_w, plane_req->src_h);
+
+       if (fb)
+               drm_framebuffer_put(fb);
+
+       return ret;
 }
 
 static int drm_mode_cursor_universal(struct drm_crtc *crtc,
@@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
                src_h = fb->height << 16;
        }
 
-       /*
-        * setplane_internal will take care of deref'ing either the old or new
-        * framebuffer depending on success.
-        */
        ret = __setplane_internal(crtc->cursor, crtc, fb,
-                               crtc_x, crtc_y, crtc_w, crtc_h,
-                               0, 0, src_w, src_h, ctx);
+                                 crtc_x, crtc_y, crtc_w, crtc_h,
+                                 0, 0, src_w, src_h, ctx);
+
+       if (fb)
+               drm_framebuffer_put(fb);
 
        /* Update successful; save new cursor position, if necessary */
        if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
index f776fc1..cb4d09c 100644 (file)
@@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = {
        .release = drm_syncobj_file_release,
 };
 
-static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
-{
-       struct file *file = anon_inode_getfile("syncobj_file",
-                                              &drm_syncobj_file_fops,
-                                              syncobj, 0);
-       if (IS_ERR(file))
-               return PTR_ERR(file);
-
-       drm_syncobj_get(syncobj);
-       if (cmpxchg(&syncobj->file, NULL, file)) {
-               /* lost the race */
-               fput(file);
-       }
-
-       return 0;
-}
-
 int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
 {
-       int ret;
+       struct file *file;
        int fd;
 
        fd = get_unused_fd_flags(O_CLOEXEC);
        if (fd < 0)
                return fd;
 
-       if (!syncobj->file) {
-               ret = drm_syncobj_alloc_file(syncobj);
-               if (ret) {
-                       put_unused_fd(fd);
-                       return ret;
-               }
+       file = anon_inode_getfile("syncobj_file",
+                                 &drm_syncobj_file_fops,
+                                 syncobj, 0);
+       if (IS_ERR(file)) {
+               put_unused_fd(fd);
+               return PTR_ERR(file);
        }
-       fd_install(fd, syncobj->file);
+
+       drm_syncobj_get(syncobj);
+       fd_install(fd, file);
+
        *p_fd = fd;
        return 0;
 }
@@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
        return ret;
 }
 
-static struct drm_syncobj *drm_syncobj_fdget(int fd)
-{
-       struct file *file = fget(fd);
-
-       if (!file)
-               return NULL;
-       if (file->f_op != &drm_syncobj_file_fops)
-               goto err;
-
-       return file->private_data;
-err:
-       fput(file);
-       return NULL;
-};
-
 static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
                                    int fd, u32 *handle)
 {
-       struct drm_syncobj *syncobj = drm_syncobj_fdget(fd);
+       struct drm_syncobj *syncobj;
+       struct file *file;
        int ret;
 
-       if (!syncobj)
+       file = fget(fd);
+       if (!file)
                return -EINVAL;
 
+       if (file->f_op != &drm_syncobj_file_fops) {
+               fput(file);
+               return -EINVAL;
+       }
+
        /* take a reference to put in the idr */
+       syncobj = file->private_data;
        drm_syncobj_get(syncobj);
 
        idr_preload(GFP_KERNEL);
@@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
        spin_unlock(&file_private->syncobj_table_lock);
        idr_preload_end();
 
-       if (ret < 0) {
-               fput(syncobj->file);
-               return ret;
-       }
-       *handle = ret;
-       return 0;
+       if (ret > 0) {
+               *handle = ret;
+               ret = 0;
+       } else
+               drm_syncobj_put(syncobj);
+
+       fput(file);
+       return ret;
 }
 
 static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
index ad4050f..18de656 100644 (file)
@@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
         * must wait for all rendering to complete to the object (as unbinding
         * must anyway), and retire the requests.
         */
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED |
-                                  I915_WAIT_ALL,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+       ret = i915_gem_object_set_to_cpu_domain(obj, false);
        if (ret)
                return ret;
 
-       i915_gem_retire_requests(to_i915(obj->base.dev));
-
        while ((vma = list_first_entry_or_null(&obj->vma_list,
                                               struct i915_vma,
                                               obj_link))) {
index e8ca67a..ac236b8 100644 (file)
@@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb {
        struct dma_fence *dma;
        struct timer_list timer;
        struct irq_work work;
+       struct rcu_head rcu;
 };
 
 static void timer_i915_sw_fence_wake(struct timer_list *t)
@@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk)
        del_timer_sync(&cb->timer);
        dma_fence_put(cb->dma);
 
-       kfree(cb);
+       kfree_rcu(cb, rcu);
 }
 
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
index 5f8b9f1..bcbc7ab 100644 (file)
@@ -186,7 +186,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
        struct intel_wait *wait, *n, *first;
 
        if (!b->irq_armed)
-               return;
+               goto wakeup_signaler;
 
        /* We only disarm the irq when we are idle (all requests completed),
         * so if the bottom-half remains asleep, it missed the request
@@ -208,6 +208,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
        b->waiters = RB_ROOT;
 
        spin_unlock_irq(&b->rb_lock);
+
+       /*
+        * The signaling thread may be asleep holding a reference to a request,
+        * that had its signaling cancelled prior to being preempted. We need
+        * to kick the signaler, just in case, to release any such reference.
+        */
+wakeup_signaler:
+       wake_up_process(b->signaler);
 }
 
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -651,23 +659,15 @@ static int intel_breadcrumbs_signaler(void *arg)
                }
 
                if (unlikely(do_schedule)) {
-                       DEFINE_WAIT(exec);
-
                        if (kthread_should_park())
                                kthread_parkme();
 
-                       if (kthread_should_stop()) {
-                               GEM_BUG_ON(request);
+                       if (unlikely(kthread_should_stop())) {
+                               i915_gem_request_put(request);
                                break;
                        }
 
-                       if (request)
-                               add_wait_queue(&request->execute, &exec);
-
                        schedule();
-
-                       if (request)
-                               remove_wait_queue(&request->execute, &exec);
                }
                i915_gem_request_put(request);
        } while (1);
index e0843bb..58a3755 100644 (file)
@@ -2128,6 +2128,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
        if (WARN_ON(!pll))
                return;
 
+        mutex_lock(&dev_priv->dpll_lock);
+
        if (IS_CANNONLAKE(dev_priv)) {
                /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
                val = I915_READ(DPCLKA_CFGCR0);
@@ -2157,6 +2159,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
        } else if (INTEL_INFO(dev_priv)->gen < 9) {
                I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll));
        }
+
+       mutex_unlock(&dev_priv->dpll_lock);
 }
 
 static void intel_ddi_clk_disable(struct intel_encoder *encoder)
index e8ccf89..ff93970 100644 (file)
@@ -9944,11 +9944,10 @@ found:
        }
 
        ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0);
+       drm_framebuffer_put(fb);
        if (ret)
                goto fail;
 
-       drm_framebuffer_put(fb);
-
        ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
        if (ret)
                goto fail;
index 3bf6528..5809b29 100644 (file)
@@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv)
                };
 
                if (!pci_dev_present(atom_hdaudio_ids)) {
-                       DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n");
+                       DRM_INFO("HDaudio controller not detected, using LPE audio instead\n");
                        lpe_present = true;
                }
        }
index 2615912..435ff86 100644 (file)
@@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
                /* Determine if we can get a cache-coherent map, forcing
                 * uncached mapping if we can't.
                 */
-               if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
+               if (!nouveau_drm_use_coherent_gpu_mapping(drm))
                        nvbo->force_coherent = true;
        }
 
@@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
                if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
                    (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram)
                        continue;
-               if ((flags & TTM_PL_FLAG_TT  ) && !vmm->page[i].host)
+               if ((flags & TTM_PL_FLAG_TT) &&
+                   (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
                        continue;
 
                /* Select this page size if it's the first that supports
index 3331e82..96f6bd8 100644 (file)
@@ -157,8 +157,8 @@ struct nouveau_drm {
                struct nvif_object copy;
                int mtrr;
                int type_vram;
-               int type_host;
-               int type_ncoh;
+               int type_host[2];
+               int type_ncoh[2];
        } ttm;
 
        /* GEM interface support */
@@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev)
        return dev->dev_private;
 }
 
+static inline bool
+nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm)
+{
+       struct nvif_mmu *mmu = &drm->client.mmu;
+       return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED);
+}
+
 int nouveau_pmops_suspend(struct device *);
 int nouveau_pmops_resume(struct device *);
 bool nouveau_pmops_runtime(void);
index c533d8e..be7357b 100644 (file)
@@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
        drm_fb_helper_unregister_fbi(&fbcon->helper);
        drm_fb_helper_fini(&fbcon->helper);
 
-       if (nouveau_fb->nvbo) {
+       if (nouveau_fb && nouveau_fb->nvbo) {
                nouveau_vma_del(&nouveau_fb->vma);
                nouveau_bo_unmap(nouveau_fb->nvbo);
                nouveau_bo_unpin(nouveau_fb->nvbo);
index 589a962..c002f89 100644 (file)
@@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt)
        u8 type;
        int ret;
 
-       if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
-               type = drm->ttm.type_ncoh;
+       if (!nouveau_drm_use_coherent_gpu_mapping(drm))
+               type = drm->ttm.type_ncoh[!!mem->kind];
        else
-               type = drm->ttm.type_host;
+               type = drm->ttm.type_host[0];
 
        if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND))
                mem->comp = mem->kind = 0;
index 08b974b..dff51a0 100644 (file)
@@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
        drm->ttm.mem_global_ref.release = NULL;
 }
 
-int
-nouveau_ttm_init(struct nouveau_drm *drm)
+static int
+nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind)
 {
-       struct nvkm_device *device = nvxx_device(&drm->client.device);
-       struct nvkm_pci *pci = device->pci;
        struct nvif_mmu *mmu = &drm->client.mmu;
-       struct drm_device *dev = drm->dev;
-       int typei, ret;
+       int typei;
 
        typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE |
-                                                  NVIF_MEM_COHERENT);
+                                           kind | NVIF_MEM_COHERENT);
        if (typei < 0)
                return -ENOSYS;
 
-       drm->ttm.type_host = typei;
+       drm->ttm.type_host[!!kind] = typei;
 
-       typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE);
+       typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind);
        if (typei < 0)
                return -ENOSYS;
 
-       drm->ttm.type_ncoh = typei;
+       drm->ttm.type_ncoh[!!kind] = typei;
+       return 0;
+}
+
+int
+nouveau_ttm_init(struct nouveau_drm *drm)
+{
+       struct nvkm_device *device = nvxx_device(&drm->client.device);
+       struct nvkm_pci *pci = device->pci;
+       struct nvif_mmu *mmu = &drm->client.mmu;
+       struct drm_device *dev = drm->dev;
+       int typei, ret;
+
+       ret = nouveau_ttm_init_host(drm, 0);
+       if (ret)
+               return ret;
+
+       if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+           drm->client.device.info.chipset != 0x50) {
+               ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND);
+               if (ret)
+                       return ret;
+       }
 
        if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC &&
            drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
index 9e2628d..f5371d9 100644 (file)
@@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma)
                        nvif_vmm_put(&vma->vmm->vmm, &tmp);
                }
                list_del(&vma->head);
-               *pvma = NULL;
                kfree(*pvma);
+               *pvma = NULL;
        }
 }
 
index e146436..00eeaaf 100644 (file)
@@ -2369,7 +2369,7 @@ nv13b_chipset = {
        .imem = gk20a_instmem_new,
        .ltc = gp100_ltc_new,
        .mc = gp10b_mc_new,
-       .mmu = gf100_mmu_new,
+       .mmu = gp10b_mmu_new,
        .secboot = gp10b_secboot_new,
        .pmu = gm20b_pmu_new,
        .timer = gk20a_timer_new,
index 972370e..7c7efa4 100644 (file)
@@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
                        if (data) {
                                *ver = nvbios_rd08(bios, data + 0x00);
                                switch (*ver) {
+                               case 0x20:
                                case 0x21:
                                case 0x30:
                                case 0x40:
@@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
        if (data && idx < *cnt) {
                u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len);
                switch (*ver * !!outp) {
+               case 0x20:
                case 0x21:
                case 0x30:
                        *hdr = nvbios_rd08(bios, data + 0x04);
@@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
                info->type = nvbios_rd16(bios, data + 0x00);
                info->mask = nvbios_rd16(bios, data + 0x02);
                switch (*ver) {
+               case 0x20:
+                       info->mask |= 0x00c0; /* match any link */
+                       /* fall-through */
                case 0x21:
                case 0x30:
                        info->flags     = nvbios_rd08(bios, data + 0x05);
                        info->script[0] = nvbios_rd16(bios, data + 0x06);
                        info->script[1] = nvbios_rd16(bios, data + 0x08);
-                       info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
+                       if (*len >= 0x0c)
+                               info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
                        if (*len >= 0x0f) {
                                info->script[2] = nvbios_rd16(bios, data + 0x0c);
                                info->script[3] = nvbios_rd16(bios, data + 0x0e);
@@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
        memset(info, 0x00, sizeof(*info));
        if (data) {
                switch (*ver) {
+               case 0x20:
                case 0x21:
                        info->dc    = nvbios_rd08(bios, data + 0x02);
                        info->pe    = nvbios_rd08(bios, data + 0x03);
index 1ba7289..db48a1d 100644 (file)
@@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
                        iobj->base.memory.ptrs = &nv50_instobj_fast;
                else
                        iobj->base.memory.ptrs = &nv50_instobj_slow;
-               refcount_inc(&iobj->maps);
+               refcount_set(&iobj->maps, 1);
        }
 
        mutex_unlock(&imem->subdev.mutex);
index b1b1f36..deb96de 100644 (file)
@@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev)
                return ret;
 
        pci->irq = pdev->irq;
+
+       /* Ensure MSI interrupts are armed, for the case where there are
+        * already interrupts pending (for whatever reason) at load time.
+        */
+       if (pci->msi)
+               pci->func->msi_rearm(pci);
+
        return ret;
 }
 
index dda904e..500b6fb 100644 (file)
@@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder,
        writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG);
 }
 
+static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder,
+                                       const struct drm_display_mode *mode)
+{
+       struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder);
+       unsigned long rate = mode->clock * 1000;
+       unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */
+       long rounded_rate;
+
+       /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */
+       if (rate > 165000000)
+               return MODE_CLOCK_HIGH;
+       rounded_rate = clk_round_rate(hdmi->tmds_clk, rate);
+       if (rounded_rate > 0 &&
+           max_t(unsigned long, rounded_rate, rate) -
+           min_t(unsigned long, rounded_rate, rate) < diff)
+               return MODE_OK;
+       return MODE_NOCLOCK;
+}
+
 static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = {
        .atomic_check   = sun4i_hdmi_atomic_check,
        .disable        = sun4i_hdmi_disable,
        .enable         = sun4i_hdmi_enable,
        .mode_set       = sun4i_hdmi_mode_set,
+       .mode_valid     = sun4i_hdmi_mode_valid,
 };
 
 static const struct drm_encoder_funcs sun4i_hdmi_funcs = {
index e122f5b..f4284b5 100644 (file)
@@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
        if (IS_ERR(tcon->crtc)) {
                dev_err(dev, "Couldn't create our CRTC\n");
                ret = PTR_ERR(tcon->crtc);
-               goto err_free_clocks;
+               goto err_free_dotclock;
        }
 
        ret = sun4i_rgb_init(drm, tcon);
        if (ret < 0)
-               goto err_free_clocks;
+               goto err_free_dotclock;
 
        if (tcon->quirks->needs_de_be_mux) {
                /*
index 44343a2..b5ba644 100644 (file)
@@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                freed += (nr_free_pool - shrink_pages) << pool->order;
                if (freed >= sc->nr_to_scan)
                        break;
+               shrink_pages <<= pool->order;
        }
        mutex_unlock(&lock);
        return freed;
@@ -543,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
        int r = 0;
        unsigned i, j, cpages;
        unsigned npages = 1 << order;
-       unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC);
+       unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC);
 
        /* allocate array for page caching change */
        caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL);
index c9790e2..af51230 100644 (file)
@@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev,
                                    struct hwmon_device *hwdev, int index)
 {
        struct hwmon_thermal_data *tdata;
+       struct thermal_zone_device *tzd;
 
        tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
        if (!tdata)
@@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev,
        tdata->hwdev = hwdev;
        tdata->index = index;
 
-       devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
-                                            &hwmon_thermal_ops);
+       tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
+                                                  &hwmon_thermal_ops);
+       /*
+        * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
+        * so ignore that error but forward any other error.
+        */
+       if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
+               return PTR_ERR(tzd);
 
        return 0;
 }
@@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
                                if (!chip->ops->is_visible(drvdata, hwmon_temp,
                                                           hwmon_temp_input, j))
                                        continue;
-                               if (info[i]->config[j] & HWMON_T_INPUT)
-                                       hwmon_thermal_add_sensor(dev, hwdev, j);
+                               if (info[i]->config[j] & HWMON_T_INPUT) {
+                                       err = hwmon_thermal_add_sensor(dev,
+                                                               hwdev, j);
+                                       if (err)
+                                               goto free_device;
+                               }
                        }
                }
        }
 
        return hdev;
 
+free_device:
+       device_unregister(hdev);
 free_hwmon:
        kfree(hwdev);
 ida_remove:
index c971407..59c82cd 100644 (file)
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
        u8 *ptr;
        u8 *rx_buf;
        u8 sum;
+       u8 rx_byte;
        int ret = 0, final_ret;
 
        len = cros_ec_prepare_tx(ec_dev, ec_msg);
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
        if (!ret) {
                /* Verify that EC can process command */
                for (i = 0; i < len; i++) {
-                       switch (rx_buf[i]) {
-                       case EC_SPI_PAST_END:
-                       case EC_SPI_RX_BAD_DATA:
-                       case EC_SPI_NOT_READY:
-                               ret = -EAGAIN;
-                               ec_msg->result = EC_RES_IN_PROGRESS;
-                       default:
+                       rx_byte = rx_buf[i];
+                       if (rx_byte == EC_SPI_PAST_END  ||
+                           rx_byte == EC_SPI_RX_BAD_DATA ||
+                           rx_byte == EC_SPI_NOT_READY) {
+                               ret = -EREMOTEIO;
                                break;
                        }
-                       if (ret)
-                               break;
                }
-               if (!ret)
-                       ret = cros_ec_spi_receive_packet(ec_dev,
-                                       ec_msg->insize + sizeof(*response));
-       } else {
-               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
        }
 
+       if (!ret)
+               ret = cros_ec_spi_receive_packet(ec_dev,
+                               ec_msg->insize + sizeof(*response));
+       else
+               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
        final_ret = terminate_request(ec_dev);
 
        spi_bus_unlock(ec_spi->spi->master);
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
        int i, len;
        u8 *ptr;
        u8 *rx_buf;
+       u8 rx_byte;
        int sum;
        int ret = 0, final_ret;
 
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
        if (!ret) {
                /* Verify that EC can process command */
                for (i = 0; i < len; i++) {
-                       switch (rx_buf[i]) {
-                       case EC_SPI_PAST_END:
-                       case EC_SPI_RX_BAD_DATA:
-                       case EC_SPI_NOT_READY:
-                               ret = -EAGAIN;
-                               ec_msg->result = EC_RES_IN_PROGRESS;
-                       default:
+                       rx_byte = rx_buf[i];
+                       if (rx_byte == EC_SPI_PAST_END  ||
+                           rx_byte == EC_SPI_RX_BAD_DATA ||
+                           rx_byte == EC_SPI_NOT_READY) {
+                               ret = -EREMOTEIO;
                                break;
                        }
-                       if (ret)
-                               break;
                }
-               if (!ret)
-                       ret = cros_ec_spi_receive_response(ec_dev,
-                                       ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
-       } else {
-               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
        }
 
+       if (!ret)
+               ret = cros_ec_spi_receive_response(ec_dev,
+                               ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
+       else
+               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
        final_ret = terminate_request(ec_dev);
 
        spi_bus_unlock(ec_spi->spi->master);
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
                           sizeof(struct ec_response_get_protocol_info);
        ec_dev->dout_size = sizeof(struct ec_host_request);
 
+       ec_spi->last_transfer_ns = ktime_get_ns();
 
        err = cros_ec_register(ec_dev);
        if (err) {
index da16bf4..dc94ffc 100644 (file)
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
 EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
 
 static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
-                             struct device_node *node)
+                             struct device_node *parent)
 {
+       struct device_node *node;
+
        if (pdata && pdata->codec)
                return true;
 
-       if (of_find_node_by_name(node, "codec"))
+       node = of_get_child_by_name(parent, "codec");
+       if (node) {
+               of_node_put(node);
                return true;
+       }
 
        return false;
 }
index d66502d..dd19f17 100644 (file)
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
 };
 
 
-static bool twl6040_has_vibra(struct device_node *node)
+static bool twl6040_has_vibra(struct device_node *parent)
 {
-#ifdef CONFIG_OF
-       if (of_find_node_by_name(node, "vibra"))
+       struct device_node *node;
+
+       node = of_get_child_by_name(parent, "vibra");
+       if (node) {
+               of_node_put(node);
                return true;
-#endif
+       }
+
        return false;
 }
 
index f80e911..73b6055 100644 (file)
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
        if (!ops->oobbuf)
                ops->ooblen = 0;
 
-       if (offs < 0 || offs + ops->len >= mtd->size)
+       if (offs < 0 || offs + ops->len > mtd->size)
                return -EINVAL;
 
        if (ops->ooblen) {
index e0eb51d..dd56a67 100644 (file)
@@ -1763,7 +1763,7 @@ try_dmaread:
                        err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
                                                              addr);
                        /* erased page bitflips corrected */
-                       if (err > 0)
+                       if (err >= 0)
                                return err;
                }
 
index 484f7fb..a8bde66 100644 (file)
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
                goto out_ce;
        }
 
-       gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
-       if (IS_ERR(gpiomtd->nwp)) {
-               ret = PTR_ERR(gpiomtd->nwp);
+       gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
+       if (IS_ERR(gpiomtd->ale)) {
+               ret = PTR_ERR(gpiomtd->ale);
                goto out_ce;
        }
 
index 50f8d4a..d4d824e 100644 (file)
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
                return ret;
        }
 
-       /* handle the block mark swapping */
-       block_mark_swapping(this, payload_virt, auxiliary_virt);
-
        /* Loop over status bytes, accumulating ECC status. */
        status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
 
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
                max_bitflips = max_t(unsigned int, max_bitflips, *status);
        }
 
+       /* handle the block mark swapping */
+       block_mark_swapping(this, buf, auxiliary_virt);
+
        if (oob_required) {
                /*
                 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
index 3c63b16..d9efbc8 100644 (file)
@@ -159,6 +159,8 @@ struct arc_emac_priv {
        unsigned int link;
        unsigned int duplex;
        unsigned int speed;
+
+       unsigned int rx_missed_errors;
 };
 
 /**
index 3241af1..bd277b0 100644 (file)
@@ -26,6 +26,8 @@
 
 #include "emac.h"
 
+static void arc_emac_restart(struct net_device *ndev);
+
 /**
  * arc_emac_tx_avail - Return the number of available slots in the tx ring.
  * @priv: Pointer to ARC EMAC private data structure.
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
                        continue;
                }
 
-               pktlen = info & LEN_MASK;
-               stats->rx_packets++;
-               stats->rx_bytes += pktlen;
-               skb = rx_buff->skb;
-               skb_put(skb, pktlen);
-               skb->dev = ndev;
-               skb->protocol = eth_type_trans(skb, ndev);
-
-               dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
-                                dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
-
-               /* Prepare the BD for next cycle */
-               rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
-                                                        EMAC_BUFFER_SIZE);
-               if (unlikely(!rx_buff->skb)) {
+               /* Prepare the BD for next cycle. netif_receive_skb()
+                * only if new skb was allocated and mapped to avoid holes
+                * in the RX fifo.
+                */
+               skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
+               if (unlikely(!skb)) {
+                       if (net_ratelimit())
+                               netdev_err(ndev, "cannot allocate skb\n");
+                       /* Return ownership to EMAC */
+                       rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
                        stats->rx_errors++;
-                       /* Because receive_skb is below, increment rx_dropped */
                        stats->rx_dropped++;
                        continue;
                }
 
-               /* receive_skb only if new skb was allocated to avoid holes */
-               netif_receive_skb(skb);
-
-               addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
+               addr = dma_map_single(&ndev->dev, (void *)skb->data,
                                      EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
                if (dma_mapping_error(&ndev->dev, addr)) {
                        if (net_ratelimit())
-                               netdev_err(ndev, "cannot dma map\n");
-                       dev_kfree_skb(rx_buff->skb);
+                               netdev_err(ndev, "cannot map dma buffer\n");
+                       dev_kfree_skb(skb);
+                       /* Return ownership to EMAC */
+                       rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
                        stats->rx_errors++;
+                       stats->rx_dropped++;
                        continue;
                }
+
+               /* unmap previosly mapped skb */
+               dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
+                                dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
+
+               pktlen = info & LEN_MASK;
+               stats->rx_packets++;
+               stats->rx_bytes += pktlen;
+               skb_put(rx_buff->skb, pktlen);
+               rx_buff->skb->dev = ndev;
+               rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
+
+               netif_receive_skb(rx_buff->skb);
+
+               rx_buff->skb = skb;
                dma_unmap_addr_set(rx_buff, addr, addr);
                dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
 
@@ -258,6 +269,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
        return work_done;
 }
 
+/**
+ * arc_emac_rx_miss_handle - handle R_MISS register
+ * @ndev:      Pointer to the net_device structure.
+ */
+static void arc_emac_rx_miss_handle(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct net_device_stats *stats = &ndev->stats;
+       unsigned int miss;
+
+       miss = arc_reg_get(priv, R_MISS);
+       if (miss) {
+               stats->rx_errors += miss;
+               stats->rx_missed_errors += miss;
+               priv->rx_missed_errors += miss;
+       }
+}
+
+/**
+ * arc_emac_rx_stall_check - check RX stall
+ * @ndev:      Pointer to the net_device structure.
+ * @budget:    How many BDs requested to process on 1 call.
+ * @work_done: How many BDs processed
+ *
+ * Under certain conditions EMAC stop reception of incoming packets and
+ * continuously increment R_MISS register instead of saving data into
+ * provided buffer. This function detect that condition and restart
+ * EMAC.
+ */
+static void arc_emac_rx_stall_check(struct net_device *ndev,
+                                   int budget, unsigned int work_done)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct arc_emac_bd *rxbd;
+
+       if (work_done)
+               priv->rx_missed_errors = 0;
+
+       if (priv->rx_missed_errors && budget) {
+               rxbd = &priv->rxbd[priv->last_rx_bd];
+               if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
+                       arc_emac_restart(ndev);
+                       priv->rx_missed_errors = 0;
+               }
+       }
+}
+
 /**
  * arc_emac_poll - NAPI poll handler.
  * @napi:      Pointer to napi_struct structure.
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
        unsigned int work_done;
 
        arc_emac_tx_clean(ndev);
+       arc_emac_rx_miss_handle(ndev);
 
        work_done = arc_emac_rx(ndev, budget);
        if (work_done < budget) {
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
                arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
        }
 
+       arc_emac_rx_stall_check(ndev, budget, work_done);
+
        return work_done;
 }
 
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
                if (status & MSER_MASK) {
                        stats->rx_missed_errors += 0x100;
                        stats->rx_errors += 0x100;
+                       priv->rx_missed_errors += 0x100;
+                       napi_schedule(&priv->napi);
                }
 
                if (status & RXCR_MASK) {
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 }
 
 
+/**
+ * arc_emac_restart - Restart EMAC
+ * @ndev:      Pointer to net_device structure.
+ *
+ * This function do hardware reset of EMAC in order to restore
+ * network packets reception.
+ */
+static void arc_emac_restart(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct net_device_stats *stats = &ndev->stats;
+       int i;
+
+       if (net_ratelimit())
+               netdev_warn(ndev, "restarting stalled EMAC\n");
+
+       netif_stop_queue(ndev);
+
+       /* Disable interrupts */
+       arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+       /* Disable EMAC */
+       arc_reg_clr(priv, R_CTRL, EN_MASK);
+
+       /* Return the sk_buff to system */
+       arc_free_tx_queue(ndev);
+
+       /* Clean Tx BD's */
+       priv->txbd_curr = 0;
+       priv->txbd_dirty = 0;
+       memset(priv->txbd, 0, TX_RING_SZ);
+
+       for (i = 0; i < RX_BD_NUM; i++) {
+               struct arc_emac_bd *rxbd = &priv->rxbd[i];
+               unsigned int info = le32_to_cpu(rxbd->info);
+
+               if (!(info & FOR_EMAC)) {
+                       stats->rx_errors++;
+                       stats->rx_dropped++;
+               }
+               /* Return ownership to EMAC */
+               rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
+       }
+       priv->last_rx_bd = 0;
+
+       /* Make sure info is visible to EMAC before enable */
+       wmb();
+
+       /* Enable interrupts */
+       arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+       /* Enable EMAC */
+       arc_reg_or(priv, R_CTRL, EN_MASK);
+
+       netif_start_queue(ndev);
+}
+
 static const struct net_device_ops arc_emac_netdev_ops = {
        .ndo_open               = arc_emac_open,
        .ndo_stop               = arc_emac_stop,
index de51c21..d09c5a9 100644 (file)
@@ -14225,7 +14225,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
        /* Reset PHY, otherwise the read DMA engine will be in a mode that
         * breaks all requests to 256 bytes.
         */
-       if (tg3_asic_rev(tp) == ASIC_REV_57766)
+       if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
+           tg3_asic_rev(tp) == ASIC_REV_5717 ||
+           tg3_asic_rev(tp) == ASIC_REV_5719)
                reset_phy = true;
 
        err = tg3_restart_hw(tp, reset_phy);
index bc93b69..a539263 100644 (file)
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
        val &= ~MVNETA_GMAC0_PORT_ENABLE;
        mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
 
+       pp->link = 0;
+       pp->duplex = -1;
+       pp->speed = 0;
+
        udelay(200);
 }
 
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 
                if (!mvneta_rxq_desc_is_first_last(rx_status) ||
                    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+                       mvneta_rx_error(pp, rx_desc);
 err_drop_frame:
                        dev->stats.rx_errors++;
-                       mvneta_rx_error(pp, rx_desc);
                        /* leave the descriptor untouched */
                        continue;
                }
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
 {
        int queue;
 
-       for (queue = 0; queue < txq_number; queue++)
+       for (queue = 0; queue < rxq_number; queue++)
                mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
 }
 
index 54adfd9..fc67e35 100644 (file)
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
        /* set GE2 TUNE */
        regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
 
-       /* GE1, Force 1000M/FD, FC ON */
-       mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0));
-
-       /* GE2, Force 1000M/FD, FC ON */
-       mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1));
+       /* Set linkdown as the default for each GMAC. Its own MCR would be set
+        * up with the more appropriate value when mtk_phy_link_adjust call is
+        * being invoked.
+        */
+       for (i = 0; i < MTK_MAC_COUNT; i++)
+               mtk_w32(eth, 0, MTK_MAC_MCR(i));
 
        /* Indicates CDM to parse the MTK special tag from CPU
         * which also is working out for untag packets.
index 1fffdeb..e9a1fbc 100644 (file)
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
        case MLX5_CMD_OP_ALLOC_Q_COUNTER:
        case MLX5_CMD_OP_QUERY_Q_COUNTER:
-       case MLX5_CMD_OP_SET_RATE_LIMIT:
+       case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
        case MLX5_CMD_OP_QUERY_RATE_LIMIT:
        case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
        case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
-       MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+       MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
        MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
        MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
        MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
index c0872b3..543060c 100644 (file)
@@ -82,6 +82,9 @@
        max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
 #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)       MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
 #define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
+#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
+       (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
+       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
 
 #define MLX5_MPWRQ_LOG_WQE_SZ                  18
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -590,6 +593,7 @@ struct mlx5e_channel {
        struct mlx5_core_dev      *mdev;
        struct hwtstamp_config    *tstamp;
        int                        ix;
+       int                        cpu;
 };
 
 struct mlx5e_channels {
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
                                 u8 cq_period_mode);
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
                                 u8 cq_period_mode);
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-                             struct mlx5e_params *params, u8 rq_type);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+                              struct mlx5e_params *params,
+                              u8 rq_type);
 
 static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
 {
index c6d90b6..9bcf38f 100644 (file)
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
                                    struct ieee_ets *ets)
 {
+       bool have_ets_tc = false;
        int bw_sum = 0;
        int i;
 
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
        }
 
        /* Validate Bandwidth Sum */
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+                       have_ets_tc = true;
                        bw_sum += ets->tc_tx_bw[i];
+               }
+       }
 
-       if (bw_sum != 0 && bw_sum != 100) {
+       if (have_ets_tc && bw_sum != 100) {
                netdev_err(netdev,
                           "Failed to validate ETS: BW sum is illegal\n");
                return -EINVAL;
index 23425f0..8f05efa 100644 (file)
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
        new_channels.params = priv->channels.params;
        MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
 
-       mlx5e_set_rq_type_params(priv->mdev, &new_channels.params,
-                                new_channels.params.rq_wq_type);
+       new_channels.params.mpwqe_log_stride_sz =
+               MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
+       new_channels.params.mpwqe_log_num_strides =
+               MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
 
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
                priv->channels.params = new_channels.params;
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
                return err;
 
        mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+       mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
+                 MLX5E_GET_PFLAG(&priv->channels.params,
+                                 MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
+
        return 0;
 }
 
index d2b057a..d9d8227 100644 (file)
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
        struct mlx5e_cq_param      icosq_cq;
 };
 
-static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
-{
-       return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
-}
-
 static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
        return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
                MLX5_CAP_ETH(mdev, reg_umr_sq);
 }
 
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-                             struct mlx5e_params *params, u8 rq_type)
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+                              struct mlx5e_params *params, u8 rq_type)
 {
        params->rq_wq_type = rq_type;
        params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
                params->log_rq_size = is_kdump_kernel() ?
                        MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
                        MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-               params->mpwqe_log_stride_sz =
-                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
-                       MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
-                       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+               params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
+                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
                params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
                        params->mpwqe_log_stride_sz;
                break;
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
-static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
+                               struct mlx5e_params *params)
 {
        u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
                    !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
                    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
                    MLX5_WQ_TYPE_LINKED_LIST;
-       mlx5e_set_rq_type_params(mdev, params, rq_type);
+       mlx5e_init_rq_type_params(mdev, params, rq_type);
 }
 
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
        int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
        int mtt_sz = mlx5e_get_wqe_mtt_sz();
        int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
-       int node = mlx5e_get_node(c->priv, c->ix);
        int i;
 
        rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
-                                       GFP_KERNEL, node);
+                                     GFP_KERNEL, cpu_to_node(c->cpu));
        if (!rq->mpwqe.info)
                goto err_out;
 
        /* We allocate more than mtt_sz as we will align the pointer */
-       rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
-                                       GFP_KERNEL, node);
+       rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+                                       cpu_to_node(c->cpu));
        if (unlikely(!rq->mpwqe.mtt_no_align))
                goto err_free_wqe_info;
 
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
        int err;
        int i;
 
-       rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       rqp->wq.db_numa_node = cpu_to_node(c->cpu);
 
        err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
                                &rq->wq_ctrl);
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                rq->wqe.frag_info =
                        kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
-                                    GFP_KERNEL,
-                                    mlx5e_get_node(c->priv, c->ix));
+                                    GFP_KERNEL, cpu_to_node(c->cpu));
                if (!rq->wqe.frag_info) {
                        err = -ENOMEM;
                        goto err_rq_wq_destroy;
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
        sq->min_inline_mode = params->tx_min_inline_mode;
 
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
                return err;
        sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
        sq->channel   = c;
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
                return err;
        sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        if (MLX5_IPSEC_DEV(c->priv->mdev))
                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
                return err;
        sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
        struct mlx5_core_dev *mdev = c->priv->mdev;
        int err;
 
-       param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
-       param->wq.db_numa_node  = mlx5e_get_node(c->priv, c->ix);
+       param->wq.buf_numa_node = cpu_to_node(c->cpu);
+       param->wq.db_numa_node  = cpu_to_node(c->cpu);
        param->eq_ix   = c->ix;
 
        err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
        mlx5e_free_cq(cq);
 }
 
+static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
+{
+       return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
+}
+
 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
                             struct mlx5e_params *params,
                             struct mlx5e_channel_param *cparam)
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 {
        struct mlx5e_cq_moder icocq_moder = {0, 0};
        struct net_device *netdev = priv->netdev;
+       int cpu = mlx5e_get_cpu(priv, ix);
        struct mlx5e_channel *c;
        unsigned int irq;
        int err;
        int eqn;
 
-       c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
+       c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
        if (!c)
                return -ENOMEM;
 
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        c->mdev     = priv->mdev;
        c->tstamp   = &priv->tstamp;
        c->ix       = ix;
+       c->cpu      = cpu;
        c->pdev     = &priv->mdev->pdev->dev;
        c->netdev   = priv->netdev;
        c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
        for (tc = 0; tc < c->num_tc; tc++)
                mlx5e_activate_txqsq(&c->sq[tc]);
        mlx5e_activate_rq(&c->rq);
-       netif_set_xps_queue(c->netdev,
-               mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
+       netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
 }
 
 static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                                                     struct sk_buff *skb,
                                                     netdev_features_t features)
 {
+       unsigned int offset = 0;
        struct udphdr *udph;
        u8 proto;
        u16 port;
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                proto = ip_hdr(skb)->protocol;
                break;
        case htons(ETH_P_IPV6):
-               proto = ipv6_hdr(skb)->nexthdr;
+               proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
                break;
        default:
                goto out;
index 6077186..e7e7cef 100644 (file)
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                        break;
                case MLX5_EVENT_TYPE_CQ_ERROR:
                        cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
-                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
+                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
                                       cqn, eqe->data.cq_err.syndrome);
                        mlx5_cq_event(dev, cqn, eqe->type);
                        break;
@@ -775,7 +775,7 @@ err1:
        return err;
 }
 
-int mlx5_stop_eqs(struct mlx5_core_dev *dev)
+void mlx5_stop_eqs(struct mlx5_core_dev *dev)
 {
        struct mlx5_eq_table *table = &dev->priv.eq_table;
        int err;
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
        if (MLX5_CAP_GEN(dev, pg)) {
                err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
                if (err)
-                       return err;
+                       mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
+                                     err);
        }
 #endif
 
        err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
        if (err)
-               return err;
+               mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
+                             err);
 
-       mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       if (err)
+               mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
+                             err);
        mlx5_cmd_use_polling(dev);
 
        err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
        if (err)
-               mlx5_cmd_use_events(dev);
-
-       return err;
+               mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
+                             err);
 }
 
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
index 3c11d6e..1496296 100644 (file)
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
        u8 actual_size;
        int err;
 
+       if (!size)
+               return -EINVAL;
+
        if (!fdev->mdev)
                return -ENOTCONN;
 
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
        u8 actual_size;
        int err;
 
+       if (!size)
+               return -EINVAL;
+
        if (!fdev->mdev)
                return -ENOTCONN;
 
index c70fd66..dfaad9e 100644 (file)
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
 static void del_sw_flow_table(struct fs_node *node);
 static void del_sw_flow_group(struct fs_node *node);
 static void del_sw_fte(struct fs_node *node);
+static void del_sw_prio(struct fs_node *node);
+static void del_sw_ns(struct fs_node *node);
 /* Delete rule (destination) is special case that 
  * requires to lock the FTE for all the deletion process.
  */
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
        return NULL;
 }
 
+static void del_sw_ns(struct fs_node *node)
+{
+       kfree(node);
+}
+
+static void del_sw_prio(struct fs_node *node)
+{
+       kfree(node);
+}
+
 static void del_hw_flow_table(struct fs_node *node)
 {
        struct mlx5_flow_table *ft;
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
                return ERR_PTR(-ENOMEM);
 
        fs_prio->node.type = FS_TYPE_PRIO;
-       tree_init_node(&fs_prio->node, NULL, NULL);
+       tree_init_node(&fs_prio->node, NULL, del_sw_prio);
        tree_add_node(&fs_prio->node, &ns->node);
        fs_prio->num_levels = num_levels;
        fs_prio->prio = prio;
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
                return ERR_PTR(-ENOMEM);
 
        fs_init_namespace(ns);
-       tree_init_node(&ns->node, NULL, NULL);
+       tree_init_node(&ns->node, NULL, del_sw_ns);
        tree_add_node(&ns->node, &prio->node);
        list_add_tail(&ns->node.list, &prio->node.children);
 
index 1a0e797..21d29f7 100644 (file)
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
        u32 fw;
        int i;
 
-       /* If the syndrom is 0, the device is OK and no need to print buffer */
+       /* If the syndrome is 0, the device is OK and no need to print buffer */
        if (!ioread8(&h->synd))
                return;
 
index d2a66dc..8812d72 100644 (file)
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
                                   struct mlx5e_params *params)
 {
        /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
-       mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
+       mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
 
        /* RQ size in ipoib by default is 512 */
        params->log_rq_size = is_kdump_kernel() ?
index 5f32344..8a89c7e 100644 (file)
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 {
        struct mlx5_priv *priv = &dev->priv;
        struct mlx5_eq_table *table = &priv->eq_table;
-       struct irq_affinity irqdesc = {
-               .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
-       };
        int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
        int nvec;
 
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
        if (!priv->irq_info)
                goto err_free_msix;
 
-       nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
+       nvec = pci_alloc_irq_vectors(dev->pdev,
                        MLX5_EQ_VEC_COMP_BASE + 1, nvec,
-                       PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
-                       &irqdesc);
+                       PCI_IRQ_MSIX);
        if (nvec < 0)
                return nvec;
 
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
        return (u64)timer_l | (u64)timer_h1 << 32;
 }
 
+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+       if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
+               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+               return -ENOMEM;
+       }
+
+       cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+                       priv->irq_info[i].mask);
+
+       if (IS_ENABLED(CONFIG_SMP) &&
+           irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+       return 0;
+}
+
+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+       irq_set_affinity_hint(irq, NULL);
+       free_cpumask_var(priv->irq_info[i].mask);
+}
+
+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int err;
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
+               err = mlx5_irq_set_affinity_hint(mdev, i);
+               if (err)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       for (i--; i >= 0; i--)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+
+       return err;
+}
+
+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+}
+
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                    unsigned int *irqn)
 {
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_stop_eqs;
        }
 
+       err = mlx5_irq_set_affinity_hints(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
+               goto err_affinity_hints;
+       }
+
        err = mlx5_init_fs(dev);
        if (err) {
                dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ err_sriov:
        mlx5_cleanup_fs(dev);
 
 err_fs:
+       mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
        free_comp_eqs(dev);
 
 err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 
        mlx5_sriov_detach(dev);
        mlx5_cleanup_fs(dev);
+       mlx5_irq_clear_affinity_hints(dev);
        free_comp_eqs(dev);
        mlx5_stop_eqs(dev);
        mlx5_put_uars_page(dev, priv->uar);
index db9e665..889130e 100644 (file)
@@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 err_cmd:
        memset(din, 0, sizeof(din));
        memset(dout, 0, sizeof(dout));
-       MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
-       MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+       MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+       MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
        mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
        return err;
 }
index e651e4c..d3c33e9 100644 (file)
@@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
        return ret_entry;
 }
 
-static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
+static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
                                   u32 rate, u16 index)
 {
-       u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
 
-       MLX5_SET(set_rate_limit_in, in, opcode,
-                MLX5_CMD_OP_SET_RATE_LIMIT);
-       MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
-       MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
+       MLX5_SET(set_pp_rate_limit_in, in, opcode,
+                MLX5_CMD_OP_SET_PP_RATE_LIMIT);
+       MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
+       MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
@@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
                entry->refcount++;
        } else {
                /* new rate limit */
-               err = mlx5_set_rate_limit_cmd(dev, rate, entry->index);
+               err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
                if (err) {
                        mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
                                      rate, err);
@@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
        entry->refcount--;
        if (!entry->refcount) {
                /* need to remove rate */
-               mlx5_set_rate_limit_cmd(dev, 0, entry->index);
+               mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
                entry->rate = 0;
        }
 
@@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
        /* Clear all configured rates */
        for (i = 0; i < table->max_size; i++)
                if (table->rl_entry[i].rate)
-                       mlx5_set_rate_limit_cmd(dev, 0,
-                                               table->rl_entry[i].index);
+                       mlx5_set_pp_rate_limit_cmd(dev, 0,
+                                                  table->rl_entry[i].index);
 
        kfree(dev->priv.rl_table.rl_entry);
 }
index 07a9ba6..2f74953 100644 (file)
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
        struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
        struct mlx5e_vxlan *vxlan;
 
-       spin_lock(&vxlan_db->lock);
+       spin_lock_bh(&vxlan_db->lock);
        vxlan = radix_tree_lookup(&vxlan_db->tree, port);
-       spin_unlock(&vxlan_db->lock);
+       spin_unlock_bh(&vxlan_db->lock);
 
        return vxlan;
 }
@@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
        struct mlx5e_vxlan *vxlan;
        int err;
 
-       if (mlx5e_vxlan_lookup_port(priv, port))
+       mutex_lock(&priv->state_lock);
+       vxlan = mlx5e_vxlan_lookup_port(priv, port);
+       if (vxlan) {
+               atomic_inc(&vxlan->refcount);
                goto free_work;
+       }
 
        if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
                goto free_work;
@@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
                goto err_delete_port;
 
        vxlan->udp_port = port;
+       atomic_set(&vxlan->refcount, 1);
 
-       spin_lock_irq(&vxlan_db->lock);
+       spin_lock_bh(&vxlan_db->lock);
        err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
-       spin_unlock_irq(&vxlan_db->lock);
+       spin_unlock_bh(&vxlan_db->lock);
        if (err)
                goto err_free;
 
@@ -113,35 +118,39 @@ err_free:
 err_delete_port:
        mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
 free_work:
+       mutex_unlock(&priv->state_lock);
        kfree(vxlan_work);
 }
 
-static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port)
+static void mlx5e_vxlan_del_port(struct work_struct *work)
 {
+       struct mlx5e_vxlan_work *vxlan_work =
+               container_of(work, struct mlx5e_vxlan_work, work);
+       struct mlx5e_priv *priv         = vxlan_work->priv;
        struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
+       u16 port = vxlan_work->port;
        struct mlx5e_vxlan *vxlan;
+       bool remove = false;
 
-       spin_lock_irq(&vxlan_db->lock);
-       vxlan = radix_tree_delete(&vxlan_db->tree, port);
-       spin_unlock_irq(&vxlan_db->lock);
-
+       mutex_lock(&priv->state_lock);
+       spin_lock_bh(&vxlan_db->lock);
+       vxlan = radix_tree_lookup(&vxlan_db->tree, port);
        if (!vxlan)
-               return;
-
-       mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
-
-       kfree(vxlan);
-}
+               goto out_unlock;
 
-static void mlx5e_vxlan_del_port(struct work_struct *work)
-{
-       struct mlx5e_vxlan_work *vxlan_work =
-               container_of(work, struct mlx5e_vxlan_work, work);
-       struct mlx5e_priv *priv = vxlan_work->priv;
-       u16 port = vxlan_work->port;
+       if (atomic_dec_and_test(&vxlan->refcount)) {
+               radix_tree_delete(&vxlan_db->tree, port);
+               remove = true;
+       }
 
-       __mlx5e_vxlan_core_del_port(priv, port);
+out_unlock:
+       spin_unlock_bh(&vxlan_db->lock);
 
+       if (remove) {
+               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+               kfree(vxlan);
+       }
+       mutex_unlock(&priv->state_lock);
        kfree(vxlan_work);
 }
 
@@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
        struct mlx5e_vxlan *vxlan;
        unsigned int port = 0;
 
-       spin_lock_irq(&vxlan_db->lock);
+       /* Lockless since we are the only radix-tree consumers, wq is disabled */
        while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
                port = vxlan->udp_port;
-               spin_unlock_irq(&vxlan_db->lock);
-               __mlx5e_vxlan_core_del_port(priv, (u16)port);
-               spin_lock_irq(&vxlan_db->lock);
+               radix_tree_delete(&vxlan_db->tree, port);
+               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+               kfree(vxlan);
        }
-       spin_unlock_irq(&vxlan_db->lock);
 }
index 5def12c..5ef6ae7 100644 (file)
@@ -36,6 +36,7 @@
 #include "en.h"
 
 struct mlx5e_vxlan {
+       atomic_t refcount;
        u16 udp_port;
 };
 
index 72ef4f8..be657b8 100644 (file)
@@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
        rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
 }
 
-static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
-                                   const struct mlxsw_sp_rif *rif)
-{
-       char rauht_pl[MLXSW_REG_RAUHT_LEN];
-
-       mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
-                            rif->rif_index, rif->addr);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
-}
-
 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
                                         struct mlxsw_sp_rif *rif)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
 
-       mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
        list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
-                                rif_list_node)
+                                rif_list_node) {
+               mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
                mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+       }
 }
 
 enum mlxsw_sp_nexthop_type {
index e379b78..13190aa 100644 (file)
@@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
        return nfp_net_ebpf_capable(nn) ? "BPF" : "";
 }
 
+static int
+nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
+{
+       int err;
+
+       nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
+       if (!nn->app_priv)
+               return -ENOMEM;
+
+       err = nfp_app_nic_vnic_alloc(app, nn, id);
+       if (err)
+               goto err_free_priv;
+
+       return 0;
+err_free_priv:
+       kfree(nn->app_priv);
+       return err;
+}
+
 static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 {
+       struct nfp_bpf_vnic *bv = nn->app_priv;
+
        if (nn->dp.bpf_offload_xdp)
                nfp_bpf_xdp_offload(app, nn, NULL);
+       WARN_ON(bv->tc_prog);
+       kfree(bv);
 }
 
 static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
@@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
 {
        struct tc_cls_bpf_offload *cls_bpf = type_data;
        struct nfp_net *nn = cb_priv;
+       struct bpf_prog *oldprog;
+       struct nfp_bpf_vnic *bv;
+       int err;
 
        if (type != TC_SETUP_CLSBPF ||
            !tc_can_offload(nn->dp.netdev) ||
@@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
            cls_bpf->common.protocol != htons(ETH_P_ALL) ||
            cls_bpf->common.chain_index)
                return -EOPNOTSUPP;
-       if (nn->dp.bpf_offload_xdp)
-               return -EBUSY;
 
        /* Only support TC direct action */
        if (!cls_bpf->exts_integrated ||
@@ -110,16 +134,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
                return -EOPNOTSUPP;
        }
 
-       switch (cls_bpf->command) {
-       case TC_CLSBPF_REPLACE:
-               return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
-       case TC_CLSBPF_ADD:
-               return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
-       case TC_CLSBPF_DESTROY:
-               return nfp_net_bpf_offload(nn, NULL, true);
-       default:
+       if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
                return -EOPNOTSUPP;
+
+       bv = nn->app_priv;
+       oldprog = cls_bpf->oldprog;
+
+       /* Don't remove if oldprog doesn't match driver's state */
+       if (bv->tc_prog != oldprog) {
+               oldprog = NULL;
+               if (!cls_bpf->prog)
+                       return 0;
        }
+
+       err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
+       if (err)
+               return err;
+
+       bv->tc_prog = cls_bpf->prog;
+       return 0;
 }
 
 static int nfp_bpf_setup_tc_block(struct net_device *netdev,
@@ -167,7 +200,7 @@ const struct nfp_app_type app_bpf = {
 
        .extra_cap      = nfp_bpf_extra_cap,
 
-       .vnic_alloc     = nfp_app_nic_vnic_alloc,
+       .vnic_alloc     = nfp_bpf_vnic_alloc,
        .vnic_free      = nfp_bpf_vnic_free,
 
        .setup_tc       = nfp_bpf_setup_tc,
index 082a15f..57b6043 100644 (file)
@@ -172,6 +172,14 @@ struct nfp_prog {
        struct list_head insns;
 };
 
+/**
+ * struct nfp_bpf_vnic - per-vNIC BPF priv structure
+ * @tc_prog:   currently loaded cls_bpf program
+ */
+struct nfp_bpf_vnic {
+       struct bpf_prog *tc_prog;
+};
+
 int nfp_bpf_jit(struct nfp_prog *prog);
 
 extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
index 70c92b6..38c924b 100644 (file)
@@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev)
                return ret;
        }
 
-       ret = emac_mac_up(adpt);
+       ret = adpt->phy.open(adpt);
        if (ret) {
                emac_mac_rx_tx_rings_free_all(adpt);
                free_irq(irq->irq, irq);
                return ret;
        }
 
-       ret = adpt->phy.open(adpt);
+       ret = emac_mac_up(adpt);
        if (ret) {
-               emac_mac_down(adpt);
                emac_mac_rx_tx_rings_free_all(adpt);
                free_irq(irq->irq, irq);
+               adpt->phy.close(adpt);
                return ret;
        }
 
index e1e5ac0..ce2ea2d 100644 (file)
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
        /* get timestamp value */
         u64(*get_timestamp) (void *desc, u32 ats);
        /* get rx timestamp status */
-       int (*get_rx_timestamp_status) (void *desc, u32 ats);
+       int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
        /* Display ring */
        void (*display_ring)(void *head, unsigned int size, bool rx);
        /* set MSS via context descriptor */
index 4b286e2..7e089bf 100644 (file)
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
        return ret;
 }
 
-static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
+static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
+                                                u32 ats)
 {
        struct dma_desc *p = (struct dma_desc *)desc;
        int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
 
                        /* Check if timestamp is OK from context descriptor */
                        do {
-                               ret = dwmac4_rx_check_timestamp(desc);
+                               ret = dwmac4_rx_check_timestamp(next_desc);
                                if (ret < 0)
                                        goto exit;
                                i++;
index 7546b36..2a828a3 100644 (file)
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
        return ns;
 }
 
-static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats)
+static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
+                                           u32 ats)
 {
        if (ats) {
                struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
index f817f8f..db4cee5 100644 (file)
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
        return ns;
 }
 
-static int ndesc_get_rx_timestamp_status(void *desc, u32 ats)
+static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
 {
        struct dma_desc *p = (struct dma_desc *)desc;
 
index 721b616..08c19eb 100644 (file)
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
 {
        u32 value = readl(ioaddr + PTP_TCR);
        unsigned long data;
+       u32 reg_value;
 
        /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
         *      formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
 
        data &= PTP_SSIR_SSINC_MASK;
 
+       reg_value = data;
        if (gmac4)
-               data = data << GMAC4_PTP_SSIR_SSINC_SHIFT;
+               reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
 
-       writel(data, ioaddr + PTP_SSIR);
+       writel(reg_value, ioaddr + PTP_SSIR);
 
        return data;
 }
index d725053..337d53d 100644 (file)
@@ -482,7 +482,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
                desc = np;
 
        /* Check if timestamp is available */
-       if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) {
+       if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
                ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
                netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
                shhwtstamp = skb_hwtstamps(skb);
index b5a8f75..82104ed 100644 (file)
@@ -879,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev)
 
        /* SGMII-to-Copper mode initialization */
        if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+               u32 pause;
+
                /* Select page 18 */
                err = marvell_set_page(phydev, 18);
                if (err < 0)
@@ -902,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev)
                err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
                if (err < 0)
                        return err;
+
+               /* There appears to be a bug in the 88e1512 when used in
+                * SGMII to copper mode, where the AN advertisment register
+                * clears the pause bits each time a negotiation occurs.
+                * This means we can never be truely sure what was advertised,
+                * so disable Pause support.
+                */
+               pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               phydev->supported &= ~pause;
+               phydev->advertising &= ~pause;
        }
 
        return m88e1121_config_init(phydev);
@@ -2073,7 +2085,7 @@ static struct phy_driver marvell_drivers[] = {
                .flags = PHY_HAS_INTERRUPT,
                .probe = marvell_probe,
                .config_init = &m88e1145_config_init,
-               .config_aneg = &marvell_config_aneg,
+               .config_aneg = &m88e1101_config_aneg,
                .read_status = &genphy_read_status,
                .ack_interrupt = &marvell_ack_interrupt,
                .config_intr = &marvell_config_intr,
index bfd3090..07c6048 100644 (file)
@@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
        }
 
        ret = xgene_enet_ecc_init(pdata);
-       if (ret)
+       if (ret) {
+               if (pdata->dev->of_node)
+                       clk_disable_unprepare(pdata->clk);
                return ret;
+       }
        xgene_gmac_reset(pdata);
 
        return 0;
@@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev)
                return ret;
 
        mdio_bus = mdiobus_alloc();
-       if (!mdio_bus)
-               return -ENOMEM;
+       if (!mdio_bus) {
+               ret = -ENOMEM;
+               goto out_clk;
+       }
 
        mdio_bus->name = "APM X-Gene MDIO bus";
 
@@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev)
                mdio_bus->phy_mask = ~0;
                ret = mdiobus_register(mdio_bus);
                if (ret)
-                       goto out;
+                       goto out_mdiobus;
 
                acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
                                    acpi_register_phy, NULL, mdio_bus, NULL);
@@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev)
        }
 
        if (ret)
-               goto out;
+               goto out_mdiobus;
 
        pdata->mdio_bus = mdio_bus;
        xgene_mdio_status = true;
 
        return 0;
 
-out:
+out_mdiobus:
        mdiobus_free(mdio_bus);
 
+out_clk:
+       if (dev->of_node)
+               clk_disable_unprepare(pdata->clk);
+
        return ret;
 }
 
index 19b9cc5..31f4b79 100644 (file)
@@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                }
 
                ndst = &rt->dst;
+               if (skb_dst(skb)) {
+                       int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
+
+                       skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+                                                      skb, mtu);
+               }
+
                tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
                err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                goto out_unlock;
                }
 
+               if (skb_dst(skb)) {
+                       int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
+
+                       skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+                                                      skb, mtu);
+               }
+
                tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                ttl = ttl ? : ip6_dst_hoplimit(ndst);
                skb_scrub_packet(skb, xnet);
@@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev,
 
                max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
                                           VXLAN_HEADROOM);
+               if (max_mtu < ETH_MIN_MTU)
+                       max_mtu = ETH_MIN_MTU;
+
+               if (!changelink && !conf->mtu)
+                       dev->mtu = max_mtu;
        }
 
        if (dev->mtu > max_mtu)
index 10b075a..e8189c0 100644 (file)
@@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
        hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
        hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
                                         IEEE80211_STYPE_NULLFUNC |
+                                        IEEE80211_FCTL_TODS |
                                         (ps ? IEEE80211_FCTL_PM : 0));
        hdr->duration_id = cpu_to_le16(0);
        memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
@@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
                if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
                        continue;
 
-               skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+               skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
                if (!skb) {
                        res = -ENOMEM;
                        goto out_err;
index e949e33..c586bcd 100644 (file)
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
        return ret;
 }
 
-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
-                       struct log_entry *ent)
+static int btt_log_group_read(struct arena_info *arena, u32 lane,
+                       struct log_group *log)
 {
        return arena_read_bytes(arena,
-                       arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
-                       2 * LOG_ENT_SIZE, 0);
+                       arena->logoff + (lane * LOG_GRP_SIZE), log,
+                       LOG_GRP_SIZE, 0);
 }
 
 static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
        debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
        debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
        debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+       debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
+       debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
 }
 
 static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
        }
 }
 
+static u32 log_seq(struct log_group *log, int log_idx)
+{
+       return le32_to_cpu(log->ent[log_idx].seq);
+}
+
 /*
  * This function accepts two log entries, and uses the
  * sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
  *
  * TODO The logic feels a bit kludge-y. make it better..
  */
-static int btt_log_get_old(struct log_entry *ent)
+static int btt_log_get_old(struct arena_info *a, struct log_group *log)
 {
+       int idx0 = a->log_index[0];
+       int idx1 = a->log_index[1];
        int old;
 
        /*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
         * the next time, the following logic works out to put this
         * (next) entry into [1]
         */
-       if (ent[0].seq == 0) {
-               ent[0].seq = cpu_to_le32(1);
+       if (log_seq(log, idx0) == 0) {
+               log->ent[idx0].seq = cpu_to_le32(1);
                return 0;
        }
 
-       if (ent[0].seq == ent[1].seq)
+       if (log_seq(log, idx0) == log_seq(log, idx1))
                return -EINVAL;
-       if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+       if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
                return -EINVAL;
 
-       if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
-               if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+       if (log_seq(log, idx0) < log_seq(log, idx1)) {
+               if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
                        old = 0;
                else
                        old = 1;
        } else {
-               if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+               if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
                        old = 1;
                else
                        old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
 {
        int ret;
        int old_ent, ret_ent;
-       struct log_entry log[2];
+       struct log_group log;
 
-       ret = btt_log_read_pair(arena, lane, log);
+       ret = btt_log_group_read(arena, lane, &log);
        if (ret)
                return -EIO;
 
-       old_ent = btt_log_get_old(log);
+       old_ent = btt_log_get_old(arena, &log);
        if (old_ent < 0 || old_ent > 1) {
                dev_err(to_dev(arena),
                                "log corruption (%d): lane %d seq [%d, %d]\n",
-                       old_ent, lane, log[0].seq, log[1].seq);
+                               old_ent, lane, log.ent[arena->log_index[0]].seq,
+                               log.ent[arena->log_index[1]].seq);
                /* TODO set error state? */
                return -EIO;
        }
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
        ret_ent = (old_flag ? old_ent : (1 - old_ent));
 
        if (ent != NULL)
-               memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+               memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
 
        return ret_ent;
 }
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
                        u32 sub, struct log_entry *ent, unsigned long flags)
 {
        int ret;
-       /*
-        * Ignore the padding in log_entry for calculating log_half.
-        * The entry is 'committed' when we write the sequence number,
-        * and we want to ensure that that is the last thing written.
-        * We don't bother writing the padding as that would be extra
-        * media wear and write amplification
-        */
-       unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
-       u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+       u32 group_slot = arena->log_index[sub];
+       unsigned int log_half = LOG_ENT_SIZE / 2;
        void *src = ent;
+       u64 ns_off;
 
+       ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
+               (group_slot * LOG_ENT_SIZE);
        /* split the 16B write into atomic, durable halves */
        ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
        if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
 {
        size_t logsize = arena->info2off - arena->logoff;
        size_t chunk_size = SZ_4K, offset = 0;
-       struct log_entry log;
+       struct log_entry ent;
        void *zerobuf;
        int ret;
        u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
        }
 
        for (i = 0; i < arena->nfree; i++) {
-               log.lba = cpu_to_le32(i);
-               log.old_map = cpu_to_le32(arena->external_nlba + i);
-               log.new_map = cpu_to_le32(arena->external_nlba + i);
-               log.seq = cpu_to_le32(LOG_SEQ_INIT);
-               ret = __btt_log_write(arena, i, 0, &log, 0);
+               ent.lba = cpu_to_le32(i);
+               ent.old_map = cpu_to_le32(arena->external_nlba + i);
+               ent.new_map = cpu_to_le32(arena->external_nlba + i);
+               ent.seq = cpu_to_le32(LOG_SEQ_INIT);
+               ret = __btt_log_write(arena, i, 0, &ent, 0);
                if (ret)
                        goto free;
        }
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
        return 0;
 }
 
+static bool ent_is_padding(struct log_entry *ent)
+{
+       return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
+               && (ent->seq == 0);
+}
+
+/*
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
+ * to detect a padding slot vs. an actual entry.
+ *
+ * If a log_group is in the initial state, i.e. hasn't been used since the
+ * creation of this BTT layout, it will have three of the four slots with
+ * zeroes. We skip over these log_groups for the detection of log_index. If
+ * all log_groups are in the initial state (i.e. the BTT has never been
+ * written to), it is safe to assume the 'new format' of log entries in slots
+ * (0, 1).
+ */
+static int log_set_indices(struct arena_info *arena)
+{
+       bool idx_set = false, initial_state = true;
+       int ret, log_index[2] = {-1, -1};
+       u32 i, j, next_idx = 0;
+       struct log_group log;
+       u32 pad_count = 0;
+
+       for (i = 0; i < arena->nfree; i++) {
+               ret = btt_log_group_read(arena, i, &log);
+               if (ret < 0)
+                       return ret;
+
+               for (j = 0; j < 4; j++) {
+                       if (!idx_set) {
+                               if (ent_is_padding(&log.ent[j])) {
+                                       pad_count++;
+                                       continue;
+                               } else {
+                                       /* Skip if index has been recorded */
+                                       if ((next_idx == 1) &&
+                                               (j == log_index[0]))
+                                               continue;
+                                       /* valid entry, record index */
+                                       log_index[next_idx] = j;
+                                       next_idx++;
+                               }
+                               if (next_idx == 2) {
+                                       /* two valid entries found */
+                                       idx_set = true;
+                               } else if (next_idx > 2) {
+                                       /* too many valid indices */
+                                       return -ENXIO;
+                               }
+                       } else {
+                               /*
+                                * once the indices have been set, just verify
+                                * that all subsequent log groups are either in
+                                * their initial state or follow the same
+                                * indices.
+                                */
+                               if (j == log_index[0]) {
+                                       /* entry must be 'valid' */
+                                       if (ent_is_padding(&log.ent[j]))
+                                               return -ENXIO;
+                               } else if (j == log_index[1]) {
+                                       ;
+                                       /*
+                                        * log_index[1] can be padding if the
+                                        * lane never got used and it is still
+                                        * in the initial state (three 'padding'
+                                        * entries)
+                                        */
+                               } else {
+                                       /* entry must be invalid (padding) */
+                                       if (!ent_is_padding(&log.ent[j]))
+                                               return -ENXIO;
+                               }
+                       }
+               }
+               /*
+                * If any of the log_groups have more than one valid,
+                * non-padding entry, then the we are no longer in the
+                * initial_state
+                */
+               if (pad_count < 3)
+                       initial_state = false;
+               pad_count = 0;
+       }
+
+       if (!initial_state && !idx_set)
+               return -ENXIO;
+
+       /*
+        * If all the entries in the log were in the initial state,
+        * assume new padding scheme
+        */
+       if (initial_state)
+               log_index[1] = 1;
+
+       /*
+        * Only allow the known permutations of log/padding indices,
+        * i.e. (0, 1), and (0, 2)
+        */
+       if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
+               ; /* known index possibilities */
+       else {
+               dev_err(to_dev(arena), "Found an unknown padding scheme\n");
+               return -ENXIO;
+       }
+
+       arena->log_index[0] = log_index[0];
+       arena->log_index[1] = log_index[1];
+       dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
+       dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
+       return 0;
+}
+
 static int btt_rtt_init(struct arena_info *arena)
 {
        arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
        available -= 2 * BTT_PG_SIZE;
 
        /* The log takes a fixed amount of space based on nfree */
-       logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
-                               BTT_PG_SIZE);
+       logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
        available -= logsize;
 
        /* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
        arena->mapoff = arena->dataoff + datasize;
        arena->logoff = arena->mapoff + mapsize;
        arena->info2off = arena->logoff + logsize;
+
+       /* Default log indices are (0,1) */
+       arena->log_index[0] = 0;
+       arena->log_index[1] = 1;
        return arena;
 }
 
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
                arena->external_lba_start = cur_nlba;
                parse_arena_meta(arena, super, cur_off);
 
+               ret = log_set_indices(arena);
+               if (ret) {
+                       dev_err(to_dev(arena),
+                               "Unable to deduce log/padding indices\n");
+                       goto out;
+               }
+
                mutex_init(&arena->err_lock);
                ret = btt_freelist_init(arena);
                if (ret)
index 578c205..db3cb6d 100644 (file)
@@ -27,6 +27,7 @@
 #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
 #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
 #define MAP_ENT_NORMAL 0xC0000000
+#define LOG_GRP_SIZE sizeof(struct log_group)
 #define LOG_ENT_SIZE sizeof(struct log_entry)
 #define ARENA_MIN_SIZE (1UL << 24)     /* 16 MB */
 #define ARENA_MAX_SIZE (1ULL << 39)    /* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
        INIT_READY
 };
 
+/*
+ * A log group represents one log 'lane', and consists of four log entries.
+ * Two of the four entries are valid entries, and the remaining two are
+ * padding. Due to an old bug in the padding location, we need to perform a
+ * test to determine the padding scheme being used, and use that scheme
+ * thereafter.
+ *
+ * In kernels prior to 4.15, 'log group' would have actual log entries at
+ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
+ * format has log entries at indices (0, 1) and padding at indices (2, 3).
+ *
+ * Old (pre 4.15) format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------+-----------------+
+ *
+ * New format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq | lba/old/new/seq |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * |       pad       |       pad       |
+ * +-----------------+-----------------+
+ *
+ * We detect during start-up which format is in use, and set
+ * arena->log_index[(0, 1)] with the detected format.
+ */
+
 struct log_entry {
        __le32 lba;
        __le32 old_map;
        __le32 new_map;
        __le32 seq;
-       __le64 padding[2];
+};
+
+struct log_group {
+       struct log_entry ent[4];
 };
 
 struct btt_sb {
@@ -125,6 +166,8 @@ struct aligned_lock {
  * @list:              List head for list of arenas
  * @debugfs_dir:       Debugfs dentry
  * @flags:             Arena flags - may signify error states.
+ * @err_lock:          Mutex for synchronizing error clearing.
+ * @log_index:         Indices of the valid log entries in a log_group
  *
  * arena_info is a per-arena handle. Once an arena is narrowed down for an
  * IO, this struct is passed around for the duration of the IO.
@@ -157,6 +200,7 @@ struct arena_info {
        /* Arena flags */
        u32 flags;
        struct mutex err_lock;
+       int log_index[2];
 };
 
 /**
@@ -176,6 +220,7 @@ struct arena_info {
  * @init_lock:         Mutex used for the BTT initialization
  * @init_state:                Flag describing the initialization state for the BTT
  * @num_arenas:                Number of arenas in the BTT instance
+ * @phys_bb:           Pointer to the namespace's badblocks structure
  */
 struct btt {
        struct gendisk *btt_disk;
index 65cc171..2adada1 100644 (file)
@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
        u64 checksum, offset;
-       unsigned long align;
        enum nd_pfn_mode mode;
        struct nd_namespace_io *nsio;
+       unsigned long align, start_pad;
        struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
        struct nd_namespace_common *ndns = nd_pfn->ndns;
        const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 
        align = le32_to_cpu(pfn_sb->align);
        offset = le64_to_cpu(pfn_sb->dataoff);
+       start_pad = le32_to_cpu(pfn_sb->start_pad);
        if (align == 0)
                align = 1UL << ilog2(offset);
        mode = le32_to_cpu(pfn_sb->mode);
@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
                return -EBUSY;
        }
 
-       if ((align && !IS_ALIGNED(offset, align))
+       if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
                        || !IS_ALIGNED(offset, PAGE_SIZE)) {
                dev_err(&nd_pfn->dev,
                                "bad offset: %#llx dax disabled align: %#lx\n",
@@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
        return altmap;
 }
 
+static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
+{
+       return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
+                       ALIGN_DOWN(phys, nd_pfn->align));
+}
+
 static int nd_pfn_init(struct nd_pfn *nd_pfn)
 {
        u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
@@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
        start = nsio->res.start;
        size = PHYS_SECTION_ALIGN_UP(start + size) - start;
        if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
-                               IORES_DESC_NONE) == REGION_MIXED) {
+                               IORES_DESC_NONE) == REGION_MIXED
+                       || !IS_ALIGNED(start + resource_size(&nsio->res),
+                               nd_pfn->align)) {
                size = resource_size(&nsio->res);
-               end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
+               end_trunc = start + size - phys_pmem_align_down(nd_pfn,
+                               start + size);
        }
 
        if (start_pad + end_trunc)
-               dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
+               dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
                                dev_name(&ndns->dev), start_pad + end_trunc);
 
        /*
index f837d66..1e46e60 100644 (file)
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
        BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                        NVME_DSM_MAX_RANGES);
 
-       queue->limits.discard_alignment = size;
+       queue->limits.discard_alignment = 0;
        queue->limits.discard_granularity = size;
 
        blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
                blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
                blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
        }
-       if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+       if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+           is_power_of_2(ctrl->max_hw_sectors))
                blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
        blk_queue_virt_boundary(q, ctrl->page_size - 1);
        if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
        nvme_set_queue_limits(ctrl, ns->queue);
-       nvme_setup_streams_ns(ctrl, ns);
 
        id = nvme_identify_ns(ctrl, nsid);
        if (!id)
@@ -2880,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        if (nvme_init_ns_head(ns, nsid, id, &new))
                goto out_free_id;
+       nvme_setup_streams_ns(ctrl, ns);
        
 #ifdef CONFIG_NVME_MULTIPATH
        /*
@@ -2965,8 +2966,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                return;
 
        if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-               if (blk_get_integrity(ns->disk))
-                       blk_integrity_unregister(ns->disk);
                nvme_mpath_remove_disk_links(ns);
                sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
                                        &nvme_ns_id_attr_group);
@@ -2974,6 +2973,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                        nvme_nvm_unregister_sysfs(ns);
                del_gendisk(ns->disk);
                blk_cleanup_queue(ns->queue);
+               if (blk_get_integrity(ns->disk))
+                       blk_integrity_unregister(ns->disk);
        }
 
        mutex_lock(&ns->ctrl->subsys->lock);
index 0a8af4d..794e66e 100644 (file)
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
                /* initiate nvme ctrl ref counting teardown */
                nvme_uninit_ctrl(&ctrl->ctrl);
-               nvme_put_ctrl(&ctrl->ctrl);
 
                /* Remove core ctrl ref. */
                nvme_put_ctrl(&ctrl->ctrl);
index a25fed5..41b740a 100644 (file)
@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
        iounmap(base_addr);
 }
 
+
+/*
+ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
+ * seems rushed, so that many built-in components simply don't work.
+ * The following quirks disable the serial AUX port and the built-in ATI RV100
+ * Radeon 7000 graphics card which both don't have any external connectors and
+ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
+ * such makes those machines the only PARISC machines on which we can't use
+ * ttyS0 as boot console.
+ */
+static void quirk_diva_ati_card(struct pci_dev *dev)
+{
+       if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+           dev->subsystem_device != 0x1292)
+               return;
+
+       dev_info(&dev->dev, "Hiding Diva built-in ATI card");
+       dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
+       quirk_diva_ati_card);
+
+static void quirk_diva_aux_disable(struct pci_dev *dev)
+{
+       if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+           dev->subsystem_device != 0x1291)
+               return;
+
+       dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
+       dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
+       quirk_diva_aux_disable);
index 945099d..14fd865 100644 (file)
@@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
        if (pci_has_legacy_pm_support(pci_dev))
                return pci_legacy_resume_early(dev);
 
-       pci_update_current_state(pci_dev, PCI_D0);
+       /*
+        * pci_restore_state() requires the device to be in D0 (because of MSI
+        * restoration among other things), so force it into D0 in case the
+        * driver's "freeze" callbacks put it into a low-power state directly.
+        */
+       pci_set_power_state(pci_dev, PCI_D0);
        pci_restore_state(pci_dev);
 
        if (drv && drv->pm && drv->pm->thaw_noirq)
index bdedb63..4471fd9 100644 (file)
@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
                        clear_bit(i, chip->irq.valid_mask);
        }
 
+       /*
+        * The same set of machines in chv_no_valid_mask[] have incorrectly
+        * configured GPIOs that generate spurious interrupts so we use
+        * this same list to apply another quirk for them.
+        *
+        * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
+        */
+       if (!need_valid_mask) {
+               /*
+                * Mask all interrupts the community is able to generate
+                * but leave the ones that can only generate GPEs unmasked.
+                */
+               chv_writel(GENMASK(31, pctrl->community->nirqs),
+                          pctrl->regs + CHV_INTMASK);
+       }
+
        /* Clear all interrupts */
        chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
 
index 6c81520..3614df6 100644 (file)
@@ -5386,6 +5386,13 @@ out:
 }
 EXPORT_SYMBOL_GPL(qeth_poll);
 
+static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
+{
+       if (!cmd->hdr.return_code)
+               cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
+       return cmd->hdr.return_code;
+}
+
 int qeth_setassparms_cb(struct qeth_card *card,
                        struct qeth_reply *reply, unsigned long data)
 {
@@ -6242,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
                                (struct qeth_checksum_cmd *)reply->param;
 
        QETH_CARD_TEXT(card, 4, "chkdoccb");
-       if (cmd->hdr.return_code)
+       if (qeth_setassparms_inspect_rc(cmd))
                return 0;
 
        memset(chksum_cb, 0, sizeof(*chksum_cb));
index 6e3d819..d522654 100644 (file)
@@ -1725,6 +1725,7 @@ struct aac_dev
 #define FIB_CONTEXT_FLAG_NATIVE_HBA            (0x00000010)
 #define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF        (0x00000020)
 #define FIB_CONTEXT_FLAG_SCSI_CMD      (0x00000040)
+#define FIB_CONTEXT_FLAG_EH_RESET      (0x00000080)
 
 /*
  *     Define the command values
index bdf127a..d55332d 100644 (file)
@@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
                        info = &aac->hba_map[bus][cid];
                        if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
                            info->devtype != AAC_DEVTYPE_NATIVE_RAW) {
-                               fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
+                               fib->flags |= FIB_CONTEXT_FLAG_EH_RESET;
                                cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
                        }
                }
index a4f28b7..e188771 100644 (file)
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
                return req;
 
        for_each_bio(bio) {
-               ret = blk_rq_append_bio(req, bio);
+               struct bio *bounce_bio = bio;
+
+               ret = blk_rq_append_bio(req, &bounce_bio);
                if (ret)
                        return ERR_PTR(ret);
        }
index 449ef5a..dfb8da8 100644 (file)
@@ -374,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
                            model, compatible);
 
        if (strflags)
-               devinfo->flags = simple_strtoul(strflags, NULL, 0);
-       else
-               devinfo->flags = flags;
-
+               flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
+       devinfo->flags = flags;
        devinfo->compatible = compatible;
 
        if (compatible)
index be5e919..0880d97 100644 (file)
@@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
  *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
 static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
-               int *bflags, int async)
+               blist_flags_t *bflags, int async)
 {
        int ret;
 
@@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
  *   - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
 static int scsi_probe_and_add_lun(struct scsi_target *starget,
-                                 u64 lun, int *bflagsp,
+                                 u64 lun, blist_flags_t *bflagsp,
                                  struct scsi_device **sdevp,
                                  enum scsi_scan_mode rescan,
                                  void *hostdata)
 {
        struct scsi_device *sdev;
        unsigned char *result;
-       int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
+       blist_flags_t bflags;
+       int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 
        /*
@@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
  *     Modifies sdevscan->lun.
  **/
 static void scsi_sequential_lun_scan(struct scsi_target *starget,
-                                    int bflags, int scsi_level,
+                                    blist_flags_t bflags, int scsi_level,
                                     enum scsi_scan_mode rescan)
 {
        uint max_dev_lun;
@@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
  *     0: scan completed (or no memory, so further scanning is futile)
  *     1: could not scan with REPORT LUN
  **/
-static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
+static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
                                enum scsi_scan_mode rescan)
 {
        unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
                unsigned int id, u64 lun, enum scsi_scan_mode rescan)
 {
        struct Scsi_Host *shost = dev_to_shost(parent);
-       int bflags = 0;
+       blist_flags_t bflags = 0;
        int res;
        struct scsi_target *starget;
 
index 50e7d7e..a9996c1 100644 (file)
@@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
 
-#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name
+#define BLIST_FLAG_NAME(name)                                  \
+       [ilog2((__force unsigned int)BLIST_##name)] = #name
 static const char *const sdev_bflags_name[] = {
 #include "scsi_devinfo_tbl.c"
 };
@@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
        for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
                const char *name = NULL;
 
-               if (!(sdev->sdev_bflags & BIT(i)))
+               if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
                        continue;
                if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
                        name = sdev_bflags_name[i];
index d0219e3..10ebb21 100644 (file)
 
 /* Our blacklist flags */
 enum {
-       SPI_BLIST_NOIUS = 0x1,
+       SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
 };
 
 /* blacklist table, modelled on scsi_devinfo.c */
 static struct {
        char *vendor;
        char *model;
-       unsigned flags;
+       blist_flags_t flags;
 } spi_static_device_list[] __initdata = {
        {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
        {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
@@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc,
 {
        struct scsi_device *sdev = to_scsi_device(dev);
        struct scsi_target *starget = sdev->sdev_target;
-       unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
-                                                     &sdev->inquiry[16],
-                                                     SCSI_DEVINFO_SPI);
+       blist_flags_t bflags;
+
+       bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
+                                            &sdev->inquiry[16],
+                                            SCSI_DEVINFO_SPI);
 
        /* Populate the target capability fields with the values
         * gleaned from the device inquiry */
index 77fe55c..d653453 100644 (file)
@@ -79,6 +79,7 @@
 #define A3700_SPI_BYTE_LEN             BIT(5)
 #define A3700_SPI_CLK_PRESCALE         BIT(0)
 #define A3700_SPI_CLK_PRESCALE_MASK    (0x1f)
+#define A3700_SPI_CLK_EVEN_OFFS                (0x10)
 
 #define A3700_SPI_WFIFO_THRS_BIT       28
 #define A3700_SPI_RFIFO_THRS_BIT       24
@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
 
        prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
 
+       /* For prescaler values over 15, we can only set it by steps of 2.
+        * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
+        * 30. We only use this range from 16 to 30.
+        */
+       if (prescale > 15)
+               prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
+
        val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
        val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
 
index f95da36..6694709 100644 (file)
@@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev)
        pm_runtime_get_sync(&pdev->dev);
 
        /* reset the hardware and block queue progress */
-       spin_lock_irq(&as->lock);
        if (as->use_dma) {
                atmel_spi_stop_dma(master);
                atmel_spi_release_dma(master);
        }
 
+       spin_lock_irq(&as->lock);
        spi_writel(as, CR, SPI_BIT(SWRST));
        spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
        spi_readl(as, SR);
index 2ce8757..0835a8d 100644 (file)
@@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
        /* Sets SPCMD */
        rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
 
-       /* Enables SPI function in master mode */
-       rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR);
+       /* Sets RSPI mode */
+       rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
 
        return 0;
 }
index c5cd635..4141003 100644 (file)
@@ -525,7 +525,7 @@ err_free_master:
 
 static int sun4i_spi_remove(struct platform_device *pdev)
 {
-       pm_runtime_disable(&pdev->dev);
+       pm_runtime_force_suspend(&pdev->dev);
 
        return 0;
 }
index bc7100b..e0b9fe1 100644 (file)
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
        while (remaining_words) {
                int n_words, tx_words, rx_words;
                u32 sr;
+               int stalled;
 
                n_words = min(remaining_words, xspi->buffer_size);
 
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
 
                /* Read out all the data from the Rx FIFO */
                rx_words = n_words;
+               stalled = 10;
                while (rx_words) {
+                       if (rx_words == n_words && !(stalled--) &&
+                           !(sr & XSPI_SR_TX_EMPTY_MASK) &&
+                           (sr & XSPI_SR_RX_EMPTY_MASK)) {
+                               dev_err(&spi->dev,
+                                       "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
+                               xspi_init_hw(xspi);
+                               return -EIO;
+                       }
+
                        if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
                                xilinx_spi_rx(xspi);
                                rx_words--;
index 7c69b4a..0d99b24 100644 (file)
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                                        " %d i: %d bio: %p, allocating another"
                                        " bio\n", bio->bi_vcnt, i, bio);
 
-                               rc = blk_rq_append_bio(req, bio);
+                               rc = blk_rq_append_bio(req, &bio);
                                if (rc) {
                                        pr_err("pSCSI: failed to append bio\n");
                                        goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
        }
 
        if (bio) {
-               rc = blk_rq_append_bio(req, bio);
+               rc = blk_rq_append_bio(req, &bio);
                if (rc) {
                        pr_err("pSCSI: failed to append bio\n");
                        goto fail;
index f77e499..065f0b6 100644 (file)
@@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource)
        kfree(resource);
 }
 
+/*
+ * Host memory not allocated to dom0. We can use this range for hotplug-based
+ * ballooning.
+ *
+ * It's a type-less resource. Setting IORESOURCE_MEM will make resource
+ * management algorithms (arch_remove_reservations()) look into guest e820,
+ * which we don't want.
+ */
+static struct resource hostmem_resource = {
+       .name   = "Host RAM",
+};
+
+void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
+{}
+
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
-       struct resource *res;
-       int ret;
+       struct resource *res, *res_hostmem;
+       int ret = -ENOMEM;
 
        res = kzalloc(sizeof(*res), GFP_KERNEL);
        if (!res)
@@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size)
        res->name = "System RAM";
        res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
-       ret = allocate_resource(&iomem_resource, res,
-                               size, 0, -1,
-                               PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
-       if (ret < 0) {
-               pr_err("Cannot allocate new System RAM resource\n");
-               kfree(res);
-               return NULL;
+       res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
+       if (res_hostmem) {
+               /* Try to grab a range from hostmem */
+               res_hostmem->name = "Host memory";
+               ret = allocate_resource(&hostmem_resource, res_hostmem,
+                                       size, 0, -1,
+                                       PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+       }
+
+       if (!ret) {
+               /*
+                * Insert this resource into iomem. Because hostmem_resource
+                * tracks portion of guest e820 marked as UNUSABLE noone else
+                * should try to use it.
+                */
+               res->start = res_hostmem->start;
+               res->end = res_hostmem->end;
+               ret = insert_resource(&iomem_resource, res);
+               if (ret < 0) {
+                       pr_err("Can't insert iomem_resource [%llx - %llx]\n",
+                               res->start, res->end);
+                       release_memory_resource(res_hostmem);
+                       res_hostmem = NULL;
+                       res->start = res->end = 0;
+               }
+       }
+
+       if (ret) {
+               ret = allocate_resource(&iomem_resource, res,
+                                       size, 0, -1,
+                                       PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+               if (ret < 0) {
+                       pr_err("Cannot allocate new System RAM resource\n");
+                       kfree(res);
+                       return NULL;
+               }
        }
 
 #ifdef CONFIG_SPARSEMEM
@@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size)
                        pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
                               pfn, limit);
                        release_memory_resource(res);
+                       release_memory_resource(res_hostmem);
                        return NULL;
                }
        }
@@ -765,6 +810,8 @@ static int __init balloon_init(void)
        set_online_page_callback(&xen_online_page);
        register_memory_notifier(&xen_memory_nb);
        register_sysctl_table(xen_root);
+
+       arch_xen_balloon_init(&hostmem_resource);
 #endif
 
 #ifdef CONFIG_XEN_PV
index 0da8001..83ed771 100644 (file)
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
        ASSERT(args->agbno % args->alignment == 0);
 
        /* if not file data, insert new block into the reverse map btree */
-       if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+       if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
                error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
                                       args->agbno, args->len, &args->oinfo);
                if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
        bno_cur = cnt_cur = NULL;
        mp = tp->t_mountp;
 
-       if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+       if (!xfs_rmap_should_skip_owner_update(oinfo)) {
                error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
                if (error)
                        goto error0;
index 6249c92..a76914d 100644 (file)
@@ -212,6 +212,7 @@ xfs_attr_set(
        int                     flags)
 {
        struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_buf          *leaf_bp = NULL;
        struct xfs_da_args      args;
        struct xfs_defer_ops    dfops;
        struct xfs_trans_res    tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
                 * GROT: another possible req'mt for a double-split btree op.
                 */
                xfs_defer_init(args.dfops, args.firstblock);
-               error = xfs_attr_shortform_to_leaf(&args);
+               error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
                if (error)
                        goto out_defer_cancel;
+               /*
+                * Prevent the leaf buffer from being unlocked so that a
+                * concurrent AIL push cannot grab the half-baked leaf
+                * buffer and run into problems with the write verifier.
+                */
+               xfs_trans_bhold(args.trans, leaf_bp);
+               xfs_defer_bjoin(args.dfops, leaf_bp);
                xfs_defer_ijoin(args.dfops, dp);
                error = xfs_defer_finish(&args.trans, args.dfops);
                if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
 
                /*
                 * Commit the leaf transformation.  We'll need another (linked)
-                * transaction to add the new attribute to the leaf.
+                * transaction to add the new attribute to the leaf, which
+                * means that we have to hold & join the leaf buffer here too.
                 */
-
                error = xfs_trans_roll_inode(&args.trans, dp);
                if (error)
                        goto out;
-
+               xfs_trans_bjoin(args.trans, leaf_bp);
+               leaf_bp = NULL;
        }
 
        if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
 
 out_defer_cancel:
        xfs_defer_cancel(&dfops);
-       args.trans = NULL;
 out:
+       if (leaf_bp)
+               xfs_trans_brelse(args.trans, leaf_bp);
        if (args.trans)
                xfs_trans_cancel(args.trans);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
index 53cc8b9..601eaa3 100644 (file)
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 }
 
 /*
- * Convert from using the shortform to the leaf.
+ * Convert from using the shortform to the leaf.  On success, return the
+ * buffer so that we can keep it locked until we're totally done with it.
  */
 int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+xfs_attr_shortform_to_leaf(
+       struct xfs_da_args      *args,
+       struct xfs_buf          **leaf_bp)
 {
        xfs_inode_t *dp;
        xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
                sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
        }
        error = 0;
-
+       *leaf_bp = bp;
 out:
        kmem_free(tmpbuffer);
        return error;
index f7dda0c..894124e 100644 (file)
@@ -48,7 +48,8 @@ void  xfs_attr_shortform_create(struct xfs_da_args *args);
 void   xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
 int    xfs_attr_shortform_lookup(struct xfs_da_args *args);
 int    xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
+                       struct xfs_buf **leaf_bp);
 int    xfs_attr_shortform_remove(struct xfs_da_args *args);
 int    xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int    xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
index 1210f68..1bddbba 100644 (file)
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
         * blowing out the transaction with a mix of EFIs and reflink
         * adjustments.
         */
-       if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+       if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
                max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
        else
                max_len = len;
index 072ebfe..087fea0 100644 (file)
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
        for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
 
+       /* Hold the (previously bjoin'd) buffer locked across the roll. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
+               xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
+
        trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
 
        /* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
        for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
 
+       /* Rejoin the buffers and dirty them so the log moves forward. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
+               xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
+               xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+       }
+
        return error;
 }
 
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
                }
        }
 
+       ASSERT(0);
+       return -EFSCORRUPTED;
+}
+
+/*
+ * Add this buffer to the deferred op.  Each joined buffer is relogged
+ * each time we roll the transaction.
+ */
+int
+xfs_defer_bjoin(
+       struct xfs_defer_ops            *dop,
+       struct xfs_buf                  *bp)
+{
+       int                             i;
+
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+               if (dop->dop_bufs[i] == bp)
+                       return 0;
+               else if (dop->dop_bufs[i] == NULL) {
+                       dop->dop_bufs[i] = bp;
+                       return 0;
+               }
+       }
+
+       ASSERT(0);
        return -EFSCORRUPTED;
 }
 
@@ -493,9 +528,7 @@ xfs_defer_init(
        struct xfs_defer_ops            *dop,
        xfs_fsblock_t                   *fbp)
 {
-       dop->dop_committed = false;
-       dop->dop_low = false;
-       memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+       memset(dop, 0, sizeof(struct xfs_defer_ops));
        *fbp = NULLFSBLOCK;
        INIT_LIST_HEAD(&dop->dop_intake);
        INIT_LIST_HEAD(&dop->dop_pending);
index d4f046d..045beac 100644 (file)
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
 };
 
 #define XFS_DEFER_OPS_NR_INODES        2       /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS  2       /* join up to two buffers */
 
 struct xfs_defer_ops {
        bool                    dop_committed;  /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
        struct list_head        dop_intake;     /* unlogged pending work */
        struct list_head        dop_pending;    /* logged pending work */
 
-       /* relog these inodes with each roll */
+       /* relog these with each roll */
        struct xfs_inode        *dop_inodes[XFS_DEFER_OPS_NR_INODES];
+       struct xfs_buf          *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
 };
 
 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
 void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
 
 /* Description of a deferred type. */
 struct xfs_defer_op_type {
index 89bf16b..b0f3179 100644 (file)
@@ -632,8 +632,6 @@ xfs_iext_insert(
        struct xfs_iext_leaf    *new = NULL;
        int                     nr_entries, i;
 
-       trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
-
        if (ifp->if_height == 0)
                xfs_iext_alloc_root(ifp, cur);
        else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
        xfs_iext_set(cur_rec(cur), irec);
        ifp->if_bytes += sizeof(struct xfs_iext_rec);
 
+       trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
+
        if (new)
                xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
 }
index 585b35d..c40d267 100644 (file)
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
        xfs_extlen_t            aglen,
        struct xfs_defer_ops    *dfops)
 {
-       int                     error;
-
        trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
                        agbno, aglen);
 
        /* Add refcount btree reservation */
-       error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+       return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                        XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
-       if (error)
-               return error;
-
-       /* Add rmap entry */
-       if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-               error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
-                               rcur->bc_private.a.agno,
-                               agbno, aglen, XFS_RMAP_OWN_COW);
-               if (error)
-                       return error;
-       }
-
-       return error;
 }
 
 /*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
        xfs_extlen_t            aglen,
        struct xfs_defer_ops    *dfops)
 {
-       int                     error;
-
        trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
                        agbno, aglen);
 
        /* Remove refcount btree reservation */
-       error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+       return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                        XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
-       if (error)
-               return error;
-
-       /* Remove rmap entry */
-       if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-               error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
-                               rcur->bc_private.a.agno,
-                               agbno, aglen, XFS_RMAP_OWN_COW);
-               if (error)
-                       return error;
-       }
-
-       return error;
 }
 
 /* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
        xfs_fsblock_t                   fsb,
        xfs_extlen_t                    len)
 {
+       int                             error;
+
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return 0;
 
-       return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
+       error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
                        fsb, len);
+       if (error)
+               return error;
+
+       /* Add rmap entry */
+       return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                       XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
 }
 
 /* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
        xfs_fsblock_t                   fsb,
        xfs_extlen_t                    len)
 {
+       int                             error;
+
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return 0;
 
+       /* Remove rmap entry */
+       error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                       XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
+       if (error)
+               return error;
+
        return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
                        fsb, len);
 }
index dd019ce..50db920 100644 (file)
@@ -367,6 +367,51 @@ xfs_rmap_lookup_le_range(
        return error;
 }
 
+/*
+ * Perform all the relevant owner checks for a removal op.  If we're doing an
+ * unknown-owner removal then we have no owner information to check.
+ */
+static int
+xfs_rmap_free_check_owner(
+       struct xfs_mount        *mp,
+       uint64_t                ltoff,
+       struct xfs_rmap_irec    *rec,
+       xfs_fsblock_t           bno,
+       xfs_filblks_t           len,
+       uint64_t                owner,
+       uint64_t                offset,
+       unsigned int            flags)
+{
+       int                     error = 0;
+
+       if (owner == XFS_RMAP_OWN_UNKNOWN)
+               return 0;
+
+       /* Make sure the unwritten flag matches. */
+       XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
+                       (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
+
+       /* Make sure the owner matches what we expect to find in the tree. */
+       XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
+
+       /* Check the offset, if necessary. */
+       if (XFS_RMAP_NON_INODE_OWNER(owner))
+               goto out;
+
+       if (flags & XFS_RMAP_BMBT_BLOCK) {
+               XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
+                               out);
+       } else {
+               XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                               ltoff + rec->rm_blockcount >= offset + len,
+                               out);
+       }
+
+out:
+       return error;
+}
+
 /*
  * Find the extent in the rmap btree and remove it.
  *
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
                goto out_done;
        }
 
-       /* Make sure the unwritten flag matches. */
-       XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
-                       (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
+       /*
+        * If we're doing an unknown-owner removal for EFI recovery, we expect
+        * to find the full range in the rmapbt or nothing at all.  If we
+        * don't find any rmaps overlapping either end of the range, we're
+        * done.  Hopefully this means that the EFI creator already queued
+        * (and finished) a RUI to remove the rmap.
+        */
+       if (owner == XFS_RMAP_OWN_UNKNOWN &&
+           ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
+               struct xfs_rmap_irec    rtrec;
+
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto out_error;
+               if (i == 0)
+                       goto out_done;
+               error = xfs_rmap_get_rec(cur, &rtrec, &i);
+               if (error)
+                       goto out_error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+               if (rtrec.rm_startblock >= bno + len)
+                       goto out_done;
+       }
 
        /* Make sure the extent we found covers the entire freeing range. */
        XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
-               ltrec.rm_startblock + ltrec.rm_blockcount >=
-               bno + len, out_error);
+                       ltrec.rm_startblock + ltrec.rm_blockcount >=
+                       bno + len, out_error);
 
-       /* Make sure the owner matches what we expect to find in the tree. */
-       XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
-                                   XFS_RMAP_NON_INODE_OWNER(owner), out_error);
-
-       /* Check the offset, if necessary. */
-       if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
-               if (flags & XFS_RMAP_BMBT_BLOCK) {
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
-                                       out_error);
-               } else {
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltrec.rm_offset <= offset, out_error);
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltoff + ltrec.rm_blockcount >= offset + len,
-                                       out_error);
-               }
-       }
+       /* Check owner information. */
+       error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
+                       offset, flags);
+       if (error)
+               goto out_error;
 
        if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
                /* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
                flags |= XFS_RMAP_UNWRITTEN;
        trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
                        unwritten, oinfo);
+       ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
 
        /*
         * For the initial lookup, look for an exact match or the left-adjacent
index 466ede6..0fcd5b1 100644 (file)
@@ -61,7 +61,21 @@ static inline void
 xfs_rmap_skip_owner_update(
        struct xfs_owner_info   *oi)
 {
-       oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
+       xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
+}
+
+static inline bool
+xfs_rmap_should_skip_owner_update(
+       struct xfs_owner_info   *oi)
+{
+       return oi->oi_owner == XFS_RMAP_OWN_NULL;
+}
+
+static inline void
+xfs_rmap_any_owner_update(
+       struct xfs_owner_info   *oi)
+{
+       xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
 }
 
 /* Reverse mapping functions. */
index 44f8c54..64da906 100644 (file)
@@ -538,7 +538,7 @@ xfs_efi_recover(
                return error;
        efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
 
-       xfs_rmap_skip_owner_update(&oinfo);
+       xfs_rmap_any_owner_update(&oinfo);
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                extp = &efip->efi_format.efi_extents[i];
                error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
index 8f22fc5..60a2e12 100644 (file)
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
                 * this doesn't actually exist in the rmap btree.
                 */
                xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+               error = xfs_rmap_free(tp, bp, agno,
+                               be32_to_cpu(agf->agf_length) - new,
+                               new, &oinfo);
+               if (error)
+                       goto error0;
                error = xfs_free_extent(tp,
                                XFS_AGB_TO_FSB(mp, agno,
                                        be32_to_cpu(agf->agf_length) - new),
index 43005fb..3861d61 100644 (file)
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
  * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
  * (We'll just piggyback on the post-EOF prealloc space workqueue.)
  */
-STATIC void
+void
 xfs_queue_cowblocks(
        struct xfs_mount *mp)
 {
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
        return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
 }
 
+static inline unsigned long
+xfs_iflag_for_tag(
+       int             tag)
+{
+       switch (tag) {
+       case XFS_ICI_EOFBLOCKS_TAG:
+               return XFS_IEOFBLOCKS;
+       case XFS_ICI_COWBLOCKS_TAG:
+               return XFS_ICOWBLOCKS;
+       default:
+               ASSERT(0);
+               return 0;
+       }
+}
+
 static void
-__xfs_inode_set_eofblocks_tag(
+__xfs_inode_set_blocks_tag(
        xfs_inode_t     *ip,
        void            (*execute)(struct xfs_mount *mp),
        void            (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
         * Don't bother locking the AG and looking up in the radix trees
         * if we already know that we have the tag set.
         */
-       if (ip->i_flags & XFS_IEOFBLOCKS)
+       if (ip->i_flags & xfs_iflag_for_tag(tag))
                return;
        spin_lock(&ip->i_flags_lock);
-       ip->i_flags |= XFS_IEOFBLOCKS;
+       ip->i_flags |= xfs_iflag_for_tag(tag);
        spin_unlock(&ip->i_flags_lock);
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_set_eofblocks_tag(ip);
-       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks,
+       return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
                        trace_xfs_perag_set_eofblocks,
                        XFS_ICI_EOFBLOCKS_TAG);
 }
 
 static void
-__xfs_inode_clear_eofblocks_tag(
+__xfs_inode_clear_blocks_tag(
        xfs_inode_t     *ip,
        void            (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
                                    int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
        struct xfs_perag *pag;
 
        spin_lock(&ip->i_flags_lock);
-       ip->i_flags &= ~XFS_IEOFBLOCKS;
+       ip->i_flags &= ~xfs_iflag_for_tag(tag);
        spin_unlock(&ip->i_flags_lock);
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_clear_eofblocks_tag(ip);
-       return __xfs_inode_clear_eofblocks_tag(ip,
+       return __xfs_inode_clear_blocks_tag(ip,
                        trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
 }
 
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_set_cowblocks_tag(ip);
-       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
+       return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
                        trace_xfs_perag_set_cowblocks,
                        XFS_ICI_COWBLOCKS_TAG);
 }
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_clear_cowblocks_tag(ip);
-       return __xfs_inode_clear_eofblocks_tag(ip,
+       return __xfs_inode_clear_blocks_tag(ip,
                        trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
 }
index bff4d85..d4a7758 100644 (file)
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
 int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
 int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
 void xfs_cowblocks_worker(struct work_struct *);
+void xfs_queue_cowblocks(struct xfs_mount *);
 
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, int flags, void *args),
index b41952a..6f95bdb 100644 (file)
@@ -1487,6 +1487,24 @@ xfs_link(
        return error;
 }
 
+/* Clear the reflink flag and the cowblocks tag if possible. */
+static void
+xfs_itruncate_clear_reflink_flags(
+       struct xfs_inode        *ip)
+{
+       struct xfs_ifork        *dfork;
+       struct xfs_ifork        *cfork;
+
+       if (!xfs_is_reflink_inode(ip))
+               return;
+       dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
+               ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+       if (cfork->if_bytes == 0)
+               xfs_inode_clear_cowblocks_tag(ip);
+}
+
 /*
  * Free up the underlying blocks past new_size.  The new size must be smaller
  * than the current size.  This routine can be used both for the attribute and
@@ -1583,15 +1601,7 @@ xfs_itruncate_extents(
        if (error)
                goto out;
 
-       /*
-        * Clear the reflink flag if there are no data fork blocks and
-        * there are no extents staged in the cow fork.
-        */
-       if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
-               if (ip->i_d.di_nblocks == 0)
-                       ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
-               xfs_inode_clear_cowblocks_tag(ip);
-       }
+       xfs_itruncate_clear_reflink_flags(ip);
 
        /*
         * Always re-log the inode so that our permanent transaction can keep
index b2136af..d383e39 100644 (file)
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
  * log recovery to replay a bmap operation on the inode.
  */
 #define XFS_IRECOVERY          (1 << 11)
+#define XFS_ICOWBLOCKS         (1 << 12)/* has the cowblocks tag set */
 
 /*
  * Per-lifetime flags need to be reset when re-using a reclaimable inode during
index cf7c8f8..47aea2e 100644 (file)
@@ -454,6 +454,8 @@ retry:
        if (error)
                goto out_bmap_cancel;
 
+       xfs_inode_set_cowblocks_tag(ip);
+
        /* Finish up. */
        error = xfs_defer_finish(&tp, &dfops);
        if (error)
@@ -490,8 +492,9 @@ xfs_reflink_find_cow_mapping(
        struct xfs_iext_cursor          icur;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
-       ASSERT(xfs_is_reflink_inode(ip));
 
+       if (!xfs_is_reflink_inode(ip))
+               return false;
        offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
        if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
                return false;
@@ -610,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
 
                        /* Remove the mapping from the CoW fork. */
                        xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
+               } else {
+                       /* Didn't do anything, push cursor back. */
+                       xfs_iext_prev(ifp, &icur);
                }
 next_extent:
                if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -725,7 +731,7 @@ xfs_reflink_end_cow(
                        (unsigned int)(end_fsb - offset_fsb),
                        XFS_DATA_FORK);
        error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-                       resblks, 0, 0, &tp);
+                       resblks, 0, XFS_TRANS_RESERVE, &tp);
        if (error)
                goto out;
 
@@ -1291,6 +1297,17 @@ xfs_reflink_remap_range(
 
        trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
 
+       /*
+        * Clear out post-eof preallocations because we don't have page cache
+        * backing the delayed allocations and they'll never get freed on
+        * their own.
+        */
+       if (xfs_can_free_eofblocks(dest, true)) {
+               ret = xfs_free_eofblocks(dest);
+               if (ret)
+                       goto out_unlock;
+       }
+
        /* Set flags and remap blocks. */
        ret = xfs_reflink_set_inode_flag(src, dest);
        if (ret)
index 5122d30..1dacccc 100644 (file)
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                        return error;
                }
+               xfs_queue_cowblocks(mp);
 
                /* Create the per-AG metadata reservation pool .*/
                error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
 
        /* rw -> ro */
        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
+               /* Get rid of any leftover CoW reservations... */
+               cancel_delayed_work_sync(&mp->m_cowblocks_work);
+               error = xfs_icache_free_cowblocks(mp, NULL);
+               if (error) {
+                       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+                       return error;
+               }
+
                /* Free the per-AG metadata reservation pool. */
                error = xfs_fs_unreserve_ag_blocks(mp);
                if (error) {
index ea189d8..8ac4e68 100644 (file)
@@ -7,9 +7,10 @@
 #ifndef _ASM_GENERIC_MM_HOOKS_H
 #define _ASM_GENERIC_MM_HOOKS_H
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
index b234d54..868e685 100644 (file)
@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 struct file;
 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
                        unsigned long size, pgprot_t *vma_prot);
+
+#ifndef CONFIG_X86_ESPFIX64
+static inline void init_espfix_bsp(void) { }
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef io_remap_pfn_range
index cceafa0..b67404f 100644 (file)
@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
 
 struct mcryptd_cpu_queue {
        struct crypto_queue queue;
+       spinlock_t q_lock;
        struct work_struct work;
 };
 
index 6e45608..9da6ce2 100644 (file)
@@ -62,7 +62,7 @@ struct arch_timer_cpu {
        bool                    enabled;
 };
 
-int kvm_timer_hyp_init(void);
+int kvm_timer_hyp_init(bool);
 int kvm_timer_enable(struct kvm_vcpu *vcpu);
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
index 82f0c8f..23d29b3 100644 (file)
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 #define bio_set_dev(bio, bdev)                         \
 do {                                           \
+       if ((bio)->bi_disk != (bdev)->bd_disk)  \
+               bio_clear_flag(bio, BIO_THROTTLED);\
        (bio)->bi_disk = (bdev)->bd_disk;       \
        (bio)->bi_partno = (bdev)->bd_partno;   \
 } while (0)
index a1e628e..9e7d8bd 100644 (file)
@@ -50,8 +50,6 @@ struct blk_issue_stat {
 struct bio {
        struct bio              *bi_next;       /* request queue link */
        struct gendisk          *bi_disk;
-       u8                      bi_partno;
-       blk_status_t            bi_status;
        unsigned int            bi_opf;         /* bottom bits req flags,
                                                 * top bits REQ_OP. Use
                                                 * accessors.
@@ -59,8 +57,8 @@ struct bio {
        unsigned short          bi_flags;       /* status, etc and bvec pool number */
        unsigned short          bi_ioprio;
        unsigned short          bi_write_hint;
-
-       struct bvec_iter        bi_iter;
+       blk_status_t            bi_status;
+       u8                      bi_partno;
 
        /* Number of segments in this BIO after
         * physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
        unsigned int            bi_seg_front_size;
        unsigned int            bi_seg_back_size;
 
-       atomic_t                __bi_remaining;
+       struct bvec_iter        bi_iter;
 
+       atomic_t                __bi_remaining;
        bio_end_io_t            *bi_end_io;
 
        void                    *bi_private;
index 8089ca1..0ce8a37 100644 (file)
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
 struct request {
        struct list_head queuelist;
        union {
-               call_single_data_t csd;
+               struct __call_single_data csd;
                u64 fifo_time;
        };
 
@@ -241,14 +241,24 @@ struct request {
        struct request *next_rq;
 };
 
+static inline bool blk_op_is_scsi(unsigned int op)
+{
+       return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
+}
+
+static inline bool blk_op_is_private(unsigned int op)
+{
+       return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
+}
+
 static inline bool blk_rq_is_scsi(struct request *rq)
 {
-       return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT;
+       return blk_op_is_scsi(req_op(rq));
 }
 
 static inline bool blk_rq_is_private(struct request *rq)
 {
-       return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT;
+       return blk_op_is_private(req_op(rq));
 }
 
 static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
        return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
 }
 
+static inline bool bio_is_passthrough(struct bio *bio)
+{
+       unsigned op = bio_op(bio);
+
+       return blk_op_is_scsi(op) || blk_op_is_private(op);
+}
+
 static inline unsigned short req_get_ioprio(struct request *req)
 {
        return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 extern void blk_rq_unprep_clone(struct request *rq);
 extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
                                     struct request *rq);
-extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
+extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
index c561b98..1632bb1 100644 (file)
  * In practice this is far bigger than any realistic pointer offset; this limit
  * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
  */
-#define BPF_MAX_VAR_OFF        (1ULL << 31)
+#define BPF_MAX_VAR_OFF        (1 << 29)
 /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
  * that converting umax_value to int cannot overflow.
  */
-#define BPF_MAX_VAR_SIZ        INT_MAX
+#define BPF_MAX_VAR_SIZ        (1 << 29)
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
index cb18c62..8415bf1 100644 (file)
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
                                                 * 100: prefer care-of address
                                                 */
                                dontfrag:1,
-                               autoflowlabel:1;
+                               autoflowlabel:1,
+                               autoflowlabel_set:1;
        __u8                    min_hopcount;
        __u8                    tclass;
        __be32                  rcv_flowinfo;
index a2a1318..c3d3f04 100644 (file)
@@ -915,10 +915,10 @@ enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
 #define LTR_L1SS_PWR_GATE_CHECK_CARD_EN        BIT(6)
 
 enum dev_aspm_mode {
-       DEV_ASPM_DISABLE = 0,
        DEV_ASPM_DYNAMIC,
        DEV_ASPM_BACKDOOR,
        DEV_ASPM_STATIC,
+       DEV_ASPM_DISABLE,
 };
 
 /*
index a886b51..57b109c 100644 (file)
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
 };
 
 struct mlx5_irq_info {
+       cpumask_var_t mask;
        char name[MLX5_MAX_IRQ_NAME];
 };
 
@@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
                       enum mlx5_eq_type type);
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_start_eqs(struct mlx5_core_dev *dev);
-int mlx5_stop_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
index 38a7577..d44ec5f 100644 (file)
@@ -147,7 +147,7 @@ enum {
        MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
        MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
        MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
-       MLX5_CMD_OP_SET_RATE_LIMIT                = 0x780,
+       MLX5_CMD_OP_SET_PP_RATE_LIMIT             = 0x780,
        MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
        MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT      = 0x782,
        MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT     = 0x783,
@@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
        u8         vxlan_udp_port[0x10];
 };
 
-struct mlx5_ifc_set_rate_limit_out_bits {
+struct mlx5_ifc_set_pp_rate_limit_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
 
@@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits {
        u8         reserved_at_40[0x40];
 };
 
-struct mlx5_ifc_set_rate_limit_in_bits {
+struct mlx5_ifc_set_pp_rate_limit_in_bits {
        u8         opcode[0x10];
        u8         reserved_at_10[0x10];
 
@@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits {
        u8         reserved_at_60[0x20];
 
        u8         rate_limit[0x20];
+
+       u8         reserved_at_a0[0x160];
 };
 
 struct mlx5_ifc_access_register_out_bits {
index 7b2170b..bc6bb32 100644 (file)
@@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
  *     for that name.  This appears in the sysfs "modalias" attribute
  *     for driver coldplugging, and in uevents used for hotplugging
  * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
- *     when not using a GPIO line)
+ *     not using a GPIO line)
  *
  * @statistics: statistics for the spi_device
  *
index 8b8118a..cb4d92b 100644 (file)
@@ -3226,7 +3226,6 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
  * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
  *     auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
- * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
  * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
  *     firmware.
  * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
index 0105445..8e08b6d 100644 (file)
@@ -694,9 +694,7 @@ struct tc_cls_matchall_offload {
 };
 
 enum tc_clsbpf_command {
-       TC_CLSBPF_ADD,
-       TC_CLSBPF_REPLACE,
-       TC_CLSBPF_DESTROY,
+       TC_CLSBPF_OFFLOAD,
        TC_CLSBPF_STATS,
 };
 
@@ -705,6 +703,7 @@ struct tc_cls_bpf_offload {
        enum tc_clsbpf_command command;
        struct tcf_exts *exts;
        struct bpf_prog *prog;
+       struct bpf_prog *oldprog;
        const char *name;
        bool exts_integrated;
        u32 gen_flags;
index 7586072..2cd4493 100644 (file)
@@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent,
 
        TP_STRUCT__entry(
                __string(        name,           core->name                )
-               __string(        pname,          parent->name              )
+               __string(        pname, parent ? parent->name : "none"     )
        ),
 
        TP_fast_assign(
                __assign_str(name, core->name);
-               __assign_str(pname, parent->name);
+               __assign_str(pname, parent ? parent->name : "none");
        ),
 
        TP_printk("%s %s", __get_str(name), __get_str(pname))
index e4b0b8e..2c735a3 100644 (file)
@@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
        { KVM_TRACE_MMIO_WRITE, "write" }
 
 TRACE_EVENT(kvm_mmio,
-       TP_PROTO(int type, int len, u64 gpa, u64 val),
+       TP_PROTO(int type, int len, u64 gpa, void *val),
        TP_ARGS(type, len, gpa, val),
 
        TP_STRUCT__entry(
@@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
                __entry->type           = type;
                __entry->len            = len;
                __entry->gpa            = gpa;
-               __entry->val            = val;
+               __entry->val            = 0;
+               if (val)
+                       memcpy(&__entry->val, val,
+                              min_t(u32, sizeof(__entry->val), len));
        ),
 
        TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
index 4914b93..61f410f 100644 (file)
@@ -44,3 +44,8 @@ static inline void xen_balloon_init(void)
 {
 }
 #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+struct resource;
+void arch_xen_balloon_init(struct resource *hostmem_resource);
+#endif
index e96e3a1..7b606fc 100644 (file)
@@ -504,6 +504,8 @@ static void __init mm_init(void)
        pgtable_init();
        vmalloc_init();
        ioremap_huge_init();
+       /* Should be run before the first non-init thread is created */
+       init_espfix_bsp();
 }
 
 asmlinkage __visible void __init start_kernel(void)
@@ -678,10 +680,6 @@ asmlinkage __visible void __init start_kernel(void)
 #ifdef CONFIG_X86
        if (efi_enabled(EFI_RUNTIME_SERVICES))
                efi_enter_virtual_mode();
-#endif
-#ifdef CONFIG_X86_ESPFIX64
-       /* Should be run before the first non-init thread is created */
-       init_espfix_bsp();
 #endif
        thread_stack_cache_init();
        cred_init();
index d459357..04b2487 100644 (file)
@@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
                break;
        case PTR_TO_STACK:
                pointer_desc = "stack ";
+               /* The stack spill tracking logic in check_stack_write()
+                * and check_stack_read() relies on stack accesses being
+                * aligned.
+                */
+               strict = true;
                break;
        default:
                break;
@@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
                                           strict);
 }
 
+/* truncate register to smaller size (in bytes)
+ * must be called with size < BPF_REG_SIZE
+ */
+static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
+{
+       u64 mask;
+
+       /* clear high bits in bit representation */
+       reg->var_off = tnum_cast(reg->var_off, size);
+
+       /* fix arithmetic bounds */
+       mask = ((u64)1 << (size * 8)) - 1;
+       if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
+               reg->umin_value &= mask;
+               reg->umax_value &= mask;
+       } else {
+               reg->umin_value = 0;
+               reg->umax_value = mask;
+       }
+       reg->smin_value = reg->umin_value;
+       reg->smax_value = reg->umax_value;
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
        if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
            regs[value_regno].type == SCALAR_VALUE) {
                /* b/h/w load zero-extends, mark upper bits as known 0 */
-               regs[value_regno].var_off =
-                       tnum_cast(regs[value_regno].var_off, size);
-               __update_reg_bounds(&regs[value_regno]);
+               coerce_reg_to_size(&regs[value_regno], size);
        }
        return err;
 }
@@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
                tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
                verbose(env, "invalid variable stack read R%d var_off=%s\n",
                        regno, tn_buf);
+               return -EACCES;
        }
        off = regs[regno].off + regs[regno].var_off.value;
        if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
                return -EINVAL;
        }
 
+       /* With LD_ABS/IND some JITs save/restore skb from r1. */
        changes_data = bpf_helper_changes_pkt_data(fn->func);
+       if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
+               verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
+                       func_id_name(func_id), func_id);
+               return -EINVAL;
+       }
 
        memset(&meta, 0, sizeof(meta));
        meta.pkt_access = fn->pkt_access;
@@ -1766,14 +1799,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
        return 0;
 }
 
-static void coerce_reg_to_32(struct bpf_reg_state *reg)
-{
-       /* clear high 32 bits */
-       reg->var_off = tnum_cast(reg->var_off, 4);
-       /* Update bounds */
-       __update_reg_bounds(reg);
-}
-
 static bool signed_add_overflows(s64 a, s64 b)
 {
        /* Do the add in u64, where overflow is well-defined */
@@ -1794,6 +1819,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
        return res > a;
 }
 
+static bool check_reg_sane_offset(struct bpf_verifier_env *env,
+                                 const struct bpf_reg_state *reg,
+                                 enum bpf_reg_type type)
+{
+       bool known = tnum_is_const(reg->var_off);
+       s64 val = reg->var_off.value;
+       s64 smin = reg->smin_value;
+
+       if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
+               verbose(env, "math between %s pointer and %lld is not allowed\n",
+                       reg_type_str[type], val);
+               return false;
+       }
+
+       if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
+               verbose(env, "%s pointer offset %d is not allowed\n",
+                       reg_type_str[type], reg->off);
+               return false;
+       }
+
+       if (smin == S64_MIN) {
+               verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
+                       reg_type_str[type]);
+               return false;
+       }
+
+       if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
+               verbose(env, "value %lld makes %s pointer be out of bounds\n",
+                       smin, reg_type_str[type]);
+               return false;
+       }
+
+       return true;
+}
+
 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
  * Caller should also handle BPF_MOV case separately.
  * If we return -EACCES, caller may want to try again treating pointer as a
@@ -1830,29 +1890,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 
        if (BPF_CLASS(insn->code) != BPF_ALU64) {
                /* 32-bit ALU ops on pointers produce (meaningless) scalars */
-               if (!env->allow_ptr_leaks)
-                       verbose(env,
-                               "R%d 32-bit pointer arithmetic prohibited\n",
-                               dst);
+               verbose(env,
+                       "R%d 32-bit pointer arithmetic prohibited\n",
+                       dst);
                return -EACCES;
        }
 
        if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
+                       dst);
                return -EACCES;
        }
        if (ptr_reg->type == CONST_PTR_TO_MAP) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
+                       dst);
                return -EACCES;
        }
        if (ptr_reg->type == PTR_TO_PACKET_END) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
+                       dst);
                return -EACCES;
        }
 
@@ -1862,6 +1918,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        dst_reg->type = ptr_reg->type;
        dst_reg->id = ptr_reg->id;
 
+       if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
+           !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
+               return -EINVAL;
+
        switch (opcode) {
        case BPF_ADD:
                /* We can take a fixed offset as long as it doesn't overflow
@@ -1915,9 +1975,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        case BPF_SUB:
                if (dst_reg == off_reg) {
                        /* scalar -= pointer.  Creates an unknown scalar */
-                       if (!env->allow_ptr_leaks)
-                               verbose(env, "R%d tried to subtract pointer from scalar\n",
-                                       dst);
+                       verbose(env, "R%d tried to subtract pointer from scalar\n",
+                               dst);
                        return -EACCES;
                }
                /* We don't allow subtraction from FP, because (according to
@@ -1925,9 +1984,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
                 * be able to deal with it.
                 */
                if (ptr_reg->type == PTR_TO_STACK) {
-                       if (!env->allow_ptr_leaks)
-                               verbose(env, "R%d subtraction from stack pointer prohibited\n",
-                                       dst);
+                       verbose(env, "R%d subtraction from stack pointer prohibited\n",
+                               dst);
                        return -EACCES;
                }
                if (known && (ptr_reg->off - smin_val ==
@@ -1976,28 +2034,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        case BPF_AND:
        case BPF_OR:
        case BPF_XOR:
-               /* bitwise ops on pointers are troublesome, prohibit for now.
-                * (However, in principle we could allow some cases, e.g.
-                * ptr &= ~3 which would reduce min_value by 3.)
-                */
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
-                               dst, bpf_alu_string[opcode >> 4]);
+               /* bitwise ops on pointers are troublesome, prohibit. */
+               verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
+                       dst, bpf_alu_string[opcode >> 4]);
                return -EACCES;
        default:
                /* other operators (e.g. MUL,LSH) produce non-pointer results */
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
-                               dst, bpf_alu_string[opcode >> 4]);
+               verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
+                       dst, bpf_alu_string[opcode >> 4]);
                return -EACCES;
        }
 
+       if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
+               return -EINVAL;
+
        __update_reg_bounds(dst_reg);
        __reg_deduce_bounds(dst_reg);
        __reg_bound_offset(dst_reg);
        return 0;
 }
 
+/* WARNING: This function does calculations on 64-bit values, but the actual
+ * execution may occur on 32-bit values. Therefore, things like bitshifts
+ * need extra checks in the 32-bit case.
+ */
 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                                      struct bpf_insn *insn,
                                      struct bpf_reg_state *dst_reg,
@@ -2008,12 +2068,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
        bool src_known, dst_known;
        s64 smin_val, smax_val;
        u64 umin_val, umax_val;
+       u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
 
-       if (BPF_CLASS(insn->code) != BPF_ALU64) {
-               /* 32-bit ALU ops are (32,32)->64 */
-               coerce_reg_to_32(dst_reg);
-               coerce_reg_to_32(&src_reg);
-       }
        smin_val = src_reg.smin_value;
        smax_val = src_reg.smax_value;
        umin_val = src_reg.umin_value;
@@ -2021,6 +2077,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
        src_known = tnum_is_const(src_reg.var_off);
        dst_known = tnum_is_const(dst_reg->var_off);
 
+       if (!src_known &&
+           opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
+               __mark_reg_unknown(dst_reg);
+               return 0;
+       }
+
        switch (opcode) {
        case BPF_ADD:
                if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2149,9 +2211,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                __update_reg_bounds(dst_reg);
                break;
        case BPF_LSH:
-               if (umax_val > 63) {
-                       /* Shifts greater than 63 are undefined.  This includes
-                        * shifts by a negative number.
+               if (umax_val >= insn_bitness) {
+                       /* Shifts greater than 31 or 63 are undefined.
+                        * This includes shifts by a negative number.
                         */
                        mark_reg_unknown(env, regs, insn->dst_reg);
                        break;
@@ -2177,27 +2239,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                __update_reg_bounds(dst_reg);
                break;
        case BPF_RSH:
-               if (umax_val > 63) {
-                       /* Shifts greater than 63 are undefined.  This includes
-                        * shifts by a negative number.
+               if (umax_val >= insn_bitness) {
+                       /* Shifts greater than 31 or 63 are undefined.
+                        * This includes shifts by a negative number.
                         */
                        mark_reg_unknown(env, regs, insn->dst_reg);
                        break;
                }
-               /* BPF_RSH is an unsigned shift, so make the appropriate casts */
-               if (dst_reg->smin_value < 0) {
-                       if (umin_val) {
-                               /* Sign bit will be cleared */
-                               dst_reg->smin_value = 0;
-                       } else {
-                               /* Lost sign bit information */
-                               dst_reg->smin_value = S64_MIN;
-                               dst_reg->smax_value = S64_MAX;
-                       }
-               } else {
-                       dst_reg->smin_value =
-                               (u64)(dst_reg->smin_value) >> umax_val;
-               }
+               /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
+                * be negative, then either:
+                * 1) src_reg might be zero, so the sign bit of the result is
+                *    unknown, so we lose our signed bounds
+                * 2) it's known negative, thus the unsigned bounds capture the
+                *    signed bounds
+                * 3) the signed bounds cross zero, so they tell us nothing
+                *    about the result
+                * If the value in dst_reg is known nonnegative, then again the
+                * unsigned bounts capture the signed bounds.
+                * Thus, in all cases it suffices to blow away our signed bounds
+                * and rely on inferring new ones from the unsigned bounds and
+                * var_off of the result.
+                */
+               dst_reg->smin_value = S64_MIN;
+               dst_reg->smax_value = S64_MAX;
                if (src_known)
                        dst_reg->var_off = tnum_rshift(dst_reg->var_off,
                                                       umin_val);
@@ -2213,6 +2277,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                break;
        }
 
+       if (BPF_CLASS(insn->code) != BPF_ALU64) {
+               /* 32-bit ALU ops are (32,32)->32 */
+               coerce_reg_to_size(dst_reg, 4);
+               coerce_reg_to_size(&src_reg, 4);
+       }
+
        __reg_deduce_bounds(dst_reg);
        __reg_bound_offset(dst_reg);
        return 0;
@@ -2227,7 +2297,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
        struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
        struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
        u8 opcode = BPF_OP(insn->code);
-       int rc;
 
        dst_reg = &regs[insn->dst_reg];
        src_reg = NULL;
@@ -2238,43 +2307,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                if (src_reg->type != SCALAR_VALUE) {
                        if (dst_reg->type != SCALAR_VALUE) {
                                /* Combining two pointers by any ALU op yields
-                                * an arbitrary scalar.
+                                * an arbitrary scalar. Disallow all math except
+                                * pointer subtraction
                                 */
-                               if (!env->allow_ptr_leaks) {
-                                       verbose(env, "R%d pointer %s pointer prohibited\n",
-                                               insn->dst_reg,
-                                               bpf_alu_string[opcode >> 4]);
-                                       return -EACCES;
+                               if (opcode == BPF_SUB){
+                                       mark_reg_unknown(env, regs, insn->dst_reg);
+                                       return 0;
                                }
-                               mark_reg_unknown(env, regs, insn->dst_reg);
-                               return 0;
+                               verbose(env, "R%d pointer %s pointer prohibited\n",
+                                       insn->dst_reg,
+                                       bpf_alu_string[opcode >> 4]);
+                               return -EACCES;
                        } else {
                                /* scalar += pointer
                                 * This is legal, but we have to reverse our
                                 * src/dest handling in computing the range
                                 */
-                               rc = adjust_ptr_min_max_vals(env, insn,
-                                                            src_reg, dst_reg);
-                               if (rc == -EACCES && env->allow_ptr_leaks) {
-                                       /* scalar += unknown scalar */
-                                       __mark_reg_unknown(&off_reg);
-                                       return adjust_scalar_min_max_vals(
-                                                       env, insn,
-                                                       dst_reg, off_reg);
-                               }
-                               return rc;
+                               return adjust_ptr_min_max_vals(env, insn,
+                                                              src_reg, dst_reg);
                        }
                } else if (ptr_reg) {
                        /* pointer += scalar */
-                       rc = adjust_ptr_min_max_vals(env, insn,
-                                                    dst_reg, src_reg);
-                       if (rc == -EACCES && env->allow_ptr_leaks) {
-                               /* unknown scalar += scalar */
-                               __mark_reg_unknown(dst_reg);
-                               return adjust_scalar_min_max_vals(
-                                               env, insn, dst_reg, *src_reg);
-                       }
-                       return rc;
+                       return adjust_ptr_min_max_vals(env, insn,
+                                                      dst_reg, src_reg);
                }
        } else {
                /* Pretend the src is a reg with a known value, since we only
@@ -2283,17 +2338,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                off_reg.type = SCALAR_VALUE;
                __mark_reg_known(&off_reg, insn->imm);
                src_reg = &off_reg;
-               if (ptr_reg) { /* pointer += K */
-                       rc = adjust_ptr_min_max_vals(env, insn,
-                                                    ptr_reg, src_reg);
-                       if (rc == -EACCES && env->allow_ptr_leaks) {
-                               /* unknown scalar += K */
-                               __mark_reg_unknown(dst_reg);
-                               return adjust_scalar_min_max_vals(
-                                               env, insn, dst_reg, off_reg);
-                       }
-                       return rc;
-               }
+               if (ptr_reg) /* pointer += K */
+                       return adjust_ptr_min_max_vals(env, insn,
+                                                      ptr_reg, src_reg);
        }
 
        /* Got here implies adding two SCALAR_VALUEs */
@@ -2390,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
                                        return -EACCES;
                                }
                                mark_reg_unknown(env, regs, insn->dst_reg);
-                               /* high 32 bits are known zero. */
-                               regs[insn->dst_reg].var_off = tnum_cast(
-                                               regs[insn->dst_reg].var_off, 4);
-                               __update_reg_bounds(&regs[insn->dst_reg]);
+                               coerce_reg_to_size(&regs[insn->dst_reg], 4);
                        }
                } else {
                        /* case: R = imm
                         * remember the value we stored into this reg
                         */
                        regs[insn->dst_reg].type = SCALAR_VALUE;
-                       __mark_reg_known(regs + insn->dst_reg, insn->imm);
+                       if (BPF_CLASS(insn->code) == BPF_ALU64) {
+                               __mark_reg_known(regs + insn->dst_reg,
+                                                insn->imm);
+                       } else {
+                               __mark_reg_known(regs + insn->dst_reg,
+                                                (u32)insn->imm);
+                       }
                }
 
        } else if (opcode > BPF_END) {
@@ -3431,15 +3481,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
                        return range_within(rold, rcur) &&
                               tnum_in(rold->var_off, rcur->var_off);
                } else {
-                       /* if we knew anything about the old value, we're not
-                        * equal, because we can't know anything about the
-                        * scalar value of the pointer in the new value.
+                       /* We're trying to use a pointer in place of a scalar.
+                        * Even if the scalar was unbounded, this could lead to
+                        * pointer leaks because scalars are allowed to leak
+                        * while pointers are not. We could make this safe in
+                        * special cases if root is calling us, but it's
+                        * probably not worth the hassle.
                         */
-                       return rold->umin_value == 0 &&
-                              rold->umax_value == U64_MAX &&
-                              rold->smin_value == S64_MIN &&
-                              rold->smax_value == S64_MAX &&
-                              tnum_is_unknown(rold->var_off);
+                       return false;
                }
        case PTR_TO_MAP_VALUE:
                /* If the new min/max/var_off satisfy the old ones and
index 432eadf..2295fc6 100644 (file)
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                        goto out;
        }
        /* a new mm has just been created */
-       arch_dup_mmap(oldmm, mm);
-       retval = 0;
+       retval = arch_dup_mmap(oldmm, mm);
 out:
        up_write(&mm->mmap_sem);
        flush_tlb_mm(oldmm);
index aa8812a..9e97480 100644 (file)
@@ -435,6 +435,41 @@ loop:
        return 0;
 }
 
+static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
+{
+       struct bpf_insn *insn;
+
+       insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
+       if (!insn)
+               return -ENOMEM;
+
+       /* Due to func address being non-const, we need to
+        * assemble this here.
+        */
+       insn[0] = BPF_MOV64_REG(R6, R1);
+       insn[1] = BPF_LD_ABS(BPF_B, 0);
+       insn[2] = BPF_LD_ABS(BPF_H, 0);
+       insn[3] = BPF_LD_ABS(BPF_W, 0);
+       insn[4] = BPF_MOV64_REG(R7, R6);
+       insn[5] = BPF_MOV64_IMM(R6, 0);
+       insn[6] = BPF_MOV64_REG(R1, R7);
+       insn[7] = BPF_MOV64_IMM(R2, 1);
+       insn[8] = BPF_MOV64_IMM(R3, 2);
+       insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                              bpf_skb_vlan_push_proto.func - __bpf_call_base);
+       insn[10] = BPF_MOV64_REG(R6, R7);
+       insn[11] = BPF_LD_ABS(BPF_B, 0);
+       insn[12] = BPF_LD_ABS(BPF_H, 0);
+       insn[13] = BPF_LD_ABS(BPF_W, 0);
+       insn[14] = BPF_MOV64_IMM(R0, 42);
+       insn[15] = BPF_EXIT_INSN();
+
+       self->u.ptr.insns = insn;
+       self->u.ptr.len = 16;
+
+       return 0;
+}
+
 static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
 {
        unsigned int len = BPF_MAXINSNS;
@@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = {
                {},
                { {0x1, 0x42 } },
        },
+       {
+               "LD_ABS with helper changing skb data",
+               { },
+               INTERNAL,
+               { 0x34 },
+               { { ETH_HLEN, 42 } },
+               .fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
+       },
 };
 
 static struct net_device dev;
index 84b2dc7..b5f940c 100644 (file)
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
        if (IS_ERR(dev))
                return PTR_ERR(dev);
 
-       if (bdi_debug_register(bdi, dev_name(dev))) {
-               device_destroy(bdi_class, dev->devt);
-               return -ENOMEM;
-       }
        cgwb_bdi_register(bdi);
        bdi->dev = dev;
 
+       bdi_debug_register(bdi, dev_name(dev));
        set_bit(WB_registered, &bdi->wb.state);
 
        spin_lock_bh(&bdi_lock);
index d0ef0a8..015f465 100644 (file)
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
        struct net_bridge *br = netdev_priv(dev);
        int err;
 
+       err = register_netdevice(dev);
+       if (err)
+               return err;
+
        if (tb[IFLA_ADDRESS]) {
                spin_lock_bh(&br->lock);
                br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
                spin_unlock_bh(&br->lock);
        }
 
-       err = register_netdevice(dev);
-       if (err)
-               return err;
-
        err = br_changelink(dev, tb, data, extack);
        if (err)
-               unregister_netdevice(dev);
+               br_dev_delete(dev, NULL);
+
        return err;
 }
 
index f47e96b..01ee854 100644 (file)
@@ -3904,7 +3904,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                                     hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
                                     troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
                        goto do_drop;
-               if (troom > 0 && __skb_linearize(skb))
+               if (skb_linearize(skb))
                        goto do_drop;
        }
 
index b797832..60a71be 100644 (file)
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
        spin_lock_bh(&net->nsid_lock);
        peer = idr_find(&net->netns_ids, id);
        if (peer)
-               get_net(peer);
+               peer = maybe_get_net(peer);
        spin_unlock_bh(&net->nsid_lock);
        rcu_read_unlock();
 
index a592ca0..a3cb0be 100644 (file)
@@ -1178,7 +1178,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
        u32 d_off;
 
        if (!num_frags)
-               return 0;
+               goto release;
 
        if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
                return -EINVAL;
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
        __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
        skb_shinfo(skb)->nr_frags = new_frags;
 
+release:
        skb_zcopy_clear(skb, false);
        return 0;
 }
@@ -3654,8 +3655,6 @@ normal:
 
                skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
                                              SKBTX_SHARED_FRAG;
-               if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
-                       goto err;
 
                while (pos < offset + len) {
                        if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
 
                        if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
                                goto err;
+                       if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+                               goto err;
 
                        *nskb_frag = *frag;
                        __skb_frag_ref(nskb_frag);
index f52d27a..08259d0 100644 (file)
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
 
 static void ip_fib_net_exit(struct net *net)
 {
-       unsigned int i;
+       int i;
 
        rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
        RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
 #endif
-       for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+       /* Destroy the tables in reverse order to guarantee that the
+        * local table, ID 255, is destroyed before the main table, ID
+        * 254. This is necessary as the local table may contain
+        * references to data contained in the main table.
+        */
+       for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
                struct hlist_head *head = &net->ipv4.fib_table_hash[i];
                struct hlist_node *tmp;
                struct fib_table *tb;
index f04d944..c586597 100644 (file)
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
 
        nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
                int type = nla_type(nla);
-               u32 val;
+               u32 fi_val, val;
 
                if (!type)
                        continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
                        val = nla_get_u32(nla);
                }
 
-               if (fi->fib_metrics->metrics[type - 1] != val)
+               fi_val = fi->fib_metrics->metrics[type - 1];
+               if (type == RTAX_FEATURES)
+                       fi_val &= ~DST_FEATURE_ECN_CA;
+
+               if (fi_val != val)
                        return false;
        }
 
index 9c17356..45ffd3d 100644 (file)
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
 static void ipgre_tap_setup(struct net_device *dev)
 {
        ether_setup(dev);
+       dev->max_mtu = 0;
        dev->netdev_ops = &gre_tap_netdev_ops;
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
index c26f712..c9441ca 100644 (file)
@@ -210,7 +210,6 @@ lookup_protocol:
        np->mcast_hops  = IPV6_DEFAULT_MCASTHOPS;
        np->mc_loop     = 1;
        np->pmtudisc    = IPV6_PMTUDISC_WANT;
-       np->autoflowlabel = ip6_default_np_autolabel(net);
        np->repflow     = net->ipv6.sysctl.flowlabel_reflect;
        sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
 
index 4cfd8e0..416c891 100644 (file)
@@ -1308,6 +1308,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
 
        ether_setup(dev);
 
+       dev->max_mtu = 0;
        dev->netdev_ops = &ip6gre_tap_netdev_ops;
        dev->needs_free_netdev = true;
        dev->priv_destructor = ip6gre_dev_free;
index 5110a41..f7dd51c 100644 (file)
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
+static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+       if (!np->autoflowlabel_set)
+               return ip6_default_np_autolabel(net);
+       else
+               return np->autoflowlabel;
+}
+
 /*
  * xmit an sk_buff (used by TCP, SCTP and DCCP)
  * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
                hlimit = ip6_dst_hoplimit(dst);
 
        ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
-                                                    np->autoflowlabel, fl6));
+                               ip6_autoflowlabel(net, np), fl6));
 
        hdr->payload_len = htons(seg_len);
        hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
        ip6_flow_hdr(hdr, v6_cork->tclass,
                     ip6_make_flowlabel(net, skb, fl6->flowlabel,
-                                       np->autoflowlabel, fl6));
+                                       ip6_autoflowlabel(net, np), fl6));
        hdr->hop_limit = v6_cork->hop_limit;
        hdr->nexthdr = proto;
        hdr->saddr = fl6->saddr;
index db84f52..931c38f 100644 (file)
@@ -1123,8 +1123,13 @@ route_lookup:
                max_headroom += 8;
                mtu -= 8;
        }
-       if (mtu < IPV6_MIN_MTU)
-               mtu = IPV6_MIN_MTU;
+       if (skb->protocol == htons(ETH_P_IPV6)) {
+               if (mtu < IPV6_MIN_MTU)
+                       mtu = IPV6_MIN_MTU;
+       } else if (mtu < 576) {
+               mtu = 576;
+       }
+
        if (skb_dst(skb) && !t->parms.collect_md)
                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
        if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
index b9404fe..2d4680e 100644 (file)
@@ -886,6 +886,7 @@ pref_skip_coa:
                break;
        case IPV6_AUTOFLOWLABEL:
                np->autoflowlabel = valbool;
+               np->autoflowlabel_set = 1;
                retv = 0;
                break;
        case IPV6_RECVFRAGSIZE:
index 7a8d150..0458b76 100644 (file)
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
        }
 
        rt->dst.flags |= DST_HOST;
+       rt->dst.input = ip6_input;
        rt->dst.output  = ip6_output;
        rt->rt6i_gateway  = fl6->daddr;
        rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                if (!ipv6_addr_any(&fl6.saddr))
                        flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-               if (!fibmatch)
-                       dst = ip6_route_input_lookup(net, dev, &fl6, flags);
-               else
-                       dst = ip6_route_lookup(net, &fl6, 0);
+               dst = ip6_route_input_lookup(net, dev, &fl6, flags);
 
                rcu_read_unlock();
        } else {
                fl6.flowi6_oif = oif;
 
-               if (!fibmatch)
-                       dst = ip6_route_output(net, NULL, &fl6);
-               else
-                       dst = ip6_route_lookup(net, &fl6, 0);
+               dst = ip6_route_output(net, NULL, &fl6);
        }
 
 
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                goto errout;
        }
 
+       if (fibmatch && rt->dst.from) {
+               struct rt6_info *ort = container_of(rt->dst.from,
+                                                   struct rt6_info, dst);
+
+               dst_hold(&ort->dst);
+               ip6_rt_put(rt);
+               rt = ort;
+       }
+
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb) {
                ip6_rt_put(rt);
index dbe2379..f039064 100644 (file)
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
                        return -EINVAL;
 
                skb_reset_network_header(skb);
+               key->eth.type = skb->protocol;
        } else {
                eth = eth_hdr(skb);
                ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
                if (unlikely(parse_vlan(skb, key)))
                        return -ENOMEM;
 
-               skb->protocol = parse_ethertype(skb);
-               if (unlikely(skb->protocol == htons(0)))
+               key->eth.type = parse_ethertype(skb);
+               if (unlikely(key->eth.type == htons(0)))
                        return -ENOMEM;
 
+               /* Multiple tagged packets need to retain TPID to satisfy
+                * skb_vlan_pop(), which will later shift the ethertype into
+                * skb->protocol.
+                */
+               if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+                       skb->protocol = key->eth.cvlan.tpid;
+               else
+                       skb->protocol = key->eth.type;
+
                skb_reset_network_header(skb);
                __skb_push(skb, skb->data - skb_mac_header(skb));
        }
        skb_reset_mac_len(skb);
-       key->eth.type = skb->protocol;
 
        /* Network layer. */
        if (key->eth.type == htons(ETH_P_IP)) {
index 6fe798c..8d78e7f 100644 (file)
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
        struct list_head link;
        struct tcf_result res;
        bool exts_integrated;
-       bool offloaded;
        u32 gen_flags;
        struct tcf_exts exts;
        u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 }
 
 static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
-                              enum tc_clsbpf_command cmd)
+                              struct cls_bpf_prog *oldprog)
 {
-       bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
        struct tcf_block *block = tp->chain->block;
-       bool skip_sw = tc_skip_sw(prog->gen_flags);
        struct tc_cls_bpf_offload cls_bpf = {};
+       struct cls_bpf_prog *obj;
+       bool skip_sw;
        int err;
 
+       skip_sw = prog && tc_skip_sw(prog->gen_flags);
+       obj = prog ?: oldprog;
+
        tc_cls_common_offload_init(&cls_bpf.common, tp);
-       cls_bpf.command = cmd;
-       cls_bpf.exts = &prog->exts;
-       cls_bpf.prog = prog->filter;
-       cls_bpf.name = prog->bpf_name;
-       cls_bpf.exts_integrated = prog->exts_integrated;
-       cls_bpf.gen_flags = prog->gen_flags;
+       cls_bpf.command = TC_CLSBPF_OFFLOAD;
+       cls_bpf.exts = &obj->exts;
+       cls_bpf.prog = prog ? prog->filter : NULL;
+       cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
+       cls_bpf.name = obj->bpf_name;
+       cls_bpf.exts_integrated = obj->exts_integrated;
+       cls_bpf.gen_flags = obj->gen_flags;
 
        err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
-       if (addorrep) {
+       if (prog) {
                if (err < 0) {
-                       cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+                       cls_bpf_offload_cmd(tp, oldprog, prog);
                        return err;
                } else if (err > 0) {
                        prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
                }
        }
 
-       if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+       if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
                return -EINVAL;
 
        return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
                           struct cls_bpf_prog *oldprog)
 {
-       struct cls_bpf_prog *obj = prog;
-       enum tc_clsbpf_command cmd;
-       bool skip_sw;
-       int ret;
-
-       skip_sw = tc_skip_sw(prog->gen_flags) ||
-               (oldprog && tc_skip_sw(oldprog->gen_flags));
-
-       if (oldprog && oldprog->offloaded) {
-               if (!tc_skip_hw(prog->gen_flags)) {
-                       cmd = TC_CLSBPF_REPLACE;
-               } else if (!tc_skip_sw(prog->gen_flags)) {
-                       obj = oldprog;
-                       cmd = TC_CLSBPF_DESTROY;
-               } else {
-                       return -EINVAL;
-               }
-       } else {
-               if (tc_skip_hw(prog->gen_flags))
-                       return skip_sw ? -EINVAL : 0;
-               cmd = TC_CLSBPF_ADD;
-       }
-
-       ret = cls_bpf_offload_cmd(tp, obj, cmd);
-       if (ret)
-               return ret;
+       if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+               return -EINVAL;
 
-       obj->offloaded = true;
-       if (oldprog)
-               oldprog->offloaded = false;
+       if (prog && tc_skip_hw(prog->gen_flags))
+               prog = NULL;
+       if (oldprog && tc_skip_hw(oldprog->gen_flags))
+               oldprog = NULL;
+       if (!prog && !oldprog)
+               return 0;
 
-       return 0;
+       return cls_bpf_offload_cmd(tp, prog, oldprog);
 }
 
 static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
 {
        int err;
 
-       if (!prog->offloaded)
-               return;
-
-       err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
-       if (err) {
+       err = cls_bpf_offload_cmd(tp, NULL, prog);
+       if (err)
                pr_err("Stopping hardware offload failed: %d\n", err);
-               return;
-       }
-
-       prog->offloaded = false;
 }
 
 static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
                                         struct cls_bpf_prog *prog)
 {
-       if (!prog->offloaded)
-               return;
+       struct tcf_block *block = tp->chain->block;
+       struct tc_cls_bpf_offload cls_bpf = {};
+
+       tc_cls_common_offload_init(&cls_bpf.common, tp);
+       cls_bpf.command = TC_CLSBPF_STATS;
+       cls_bpf.exts = &prog->exts;
+       cls_bpf.prog = prog->filter;
+       cls_bpf.name = prog->bpf_name;
+       cls_bpf.exts_integrated = prog->exts_integrated;
+       cls_bpf.gen_flags = prog->gen_flags;
 
-       cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+       tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
 }
 
 static int cls_bpf_init(struct tcf_proto *tp)
index 3f619fd..291c97b 100644 (file)
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
        case SCTP_CID_AUTH:
                return "AUTH";
 
+       case SCTP_CID_RECONF:
+               return "RECONF";
+
        default:
                break;
        }
index a71be33..e36ec5d 100644 (file)
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
                      gfp_t gfp)
 {
-       struct sctp_association *asoc;
-       __u16 needed, freed;
-
-       asoc = ulpq->asoc;
+       struct sctp_association *asoc = ulpq->asoc;
+       __u32 freed = 0;
+       __u16 needed;
 
-       if (chunk) {
-               needed = ntohs(chunk->chunk_hdr->length);
-               needed -= sizeof(struct sctp_data_chunk);
-       } else
-               needed = SCTP_DEFAULT_MAXWINDOW;
-
-       freed = 0;
+       needed = ntohs(chunk->chunk_hdr->length) -
+                sizeof(struct sctp_data_chunk);
 
        if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
                freed = sctp_ulpq_renege_order(ulpq, needed);
-               if (freed < needed) {
+               if (freed < needed)
                        freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
-               }
        }
        /* If able to free enough room, accept this chunk. */
-       if (chunk && (freed >= needed)) {
-               int retval;
-               retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
+       if (freed >= needed) {
+               int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
                /*
                 * Enter partial delivery if chunk has not been
                 * delivered; otherwise, drain the reassembly queue.
index 95fec2c..7ebbdeb 100644 (file)
@@ -351,8 +351,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
        if (m->window >= ADV_IDLE)
                return;
 
-       if (!list_empty(&m->congested))
-               return;
+       list_del_init(&m->congested);
 
        /* Sort member into congested members' list */
        list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -648,6 +647,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
        } else if (mtyp == GRP_REMIT_MSG) {
                msg_set_grp_remitted(hdr, m->window);
        }
+       msg_set_dest_droppable(hdr, true);
        __skb_queue_tail(xmitq, skb);
 }
 
@@ -689,15 +689,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                        msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
                        __skb_queue_tail(inputq, m->event_msg);
                }
-               if (m->window < ADV_IDLE)
-                       tipc_group_update_member(m, 0);
-               else
-                       list_del_init(&m->congested);
+               list_del_init(&m->congested);
+               tipc_group_update_member(m, 0);
                return;
        case GRP_LEAVE_MSG:
                if (!m)
                        return;
                m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+               list_del_init(&m->list);
+               list_del_init(&m->congested);
+               *usr_wakeup = true;
 
                /* Wait until WITHDRAW event is received */
                if (m->state != MBR_LEAVING) {
@@ -709,8 +710,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                ehdr = buf_msg(m->event_msg);
                msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
                __skb_queue_tail(inputq, m->event_msg);
-               *usr_wakeup = true;
-               list_del_init(&m->congested);
                return;
        case GRP_ADV_MSG:
                if (!m)
@@ -862,6 +861,7 @@ void tipc_group_member_evt(struct tipc_group *grp,
                                msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
                        __skb_queue_tail(inputq, skb);
                }
+               list_del_init(&m->list);
                list_del_init(&m->congested);
        }
        *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
index d7d6cb0..1d84f91 100644 (file)
@@ -23,27 +23,14 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
 cfg80211-y += extra-certs.o
 endif
 
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
        @$(kecho) "  GEN     $@"
-       @(set -e; \
-         allf=""; \
-         for f in $^ ; do \
-             # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
-             thisf=$$(od -An -v -tx1 < $$f | \
-                          sed -e 's/ /\n/g' | \
-                          sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
-                          sed -e 's/^/0x/;s/$$/,/'); \
-             # file should not be empty - maybe command substitution failed? \
-             test ! -z "$$thisf";\
-             allf=$$allf$$thisf;\
-         done; \
-         ( \
-             echo '#include "reg.h"'; \
-             echo 'const u8 shipped_regdb_certs[] = {'; \
-             echo "$$allf"; \
-             echo '};'; \
-             echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
-         ) >> $@)
+       @(echo '#include "reg.h"'; \
+         echo 'const u8 shipped_regdb_certs[] = {'; \
+         cat $^ ; \
+         echo '};'; \
+         echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
+        ) > $@
 
 $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
                      $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@@ -66,4 +53,6 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
              echo "$$allf"; \
              echo '};'; \
              echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
-         ) >> $@)
+         ) > $@)
+
+clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644 (file)
index 0000000..14ea666
--- /dev/null
@@ -0,0 +1,86 @@
+/* Seth Forshee's regdb certificate */
+0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
+0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
+0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
+0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
+0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
+0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
+0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
+0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
+0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
+0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
+0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
+0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
+0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
+0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
+0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
+0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
+0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
+0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
+0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
+0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
+0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
+0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
+0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
+0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
+0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
+0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
+0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
+0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
+0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
+0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
+0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
+0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
+0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
+0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
+0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
+0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
+0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
+0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
+0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
+0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
+0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
+0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
+0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
+0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
+0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
+0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
+0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
+0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
+0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
+0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
+0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
+0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
+0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
+0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
+0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
+0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
+0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
+0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
+0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
+0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
+0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
+0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
+0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
+0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
+0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
+0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
+0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
+0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
+0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
+0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
+0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
+0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
+0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
+0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
+0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
+0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
+0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
+0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
+0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
+0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
+0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
+0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
+0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644 (file)
index c6f8f9d..0000000
Binary files a/net/wireless/certs/sforshee.x509 and /dev/null differ
index b1ac23c..213d0c4 100644 (file)
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
        case NL80211_IFTYPE_AP:
                if (wdev->ssid_len &&
                    nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
-                       goto nla_put_failure;
+                       goto nla_put_failure_locked;
                break;
        case NL80211_IFTYPE_STATION:
        case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
                if (!ssid_ie)
                        break;
                if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
-                       goto nla_put_failure;
+                       goto nla_put_failure_locked;
                break;
                }
        default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
        genlmsg_end(msg, hdr);
        return 0;
 
+ nla_put_failure_locked:
+       wdev_unlock(wdev);
  nla_put_failure:
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
index b3b353d..f055ca1 100644 (file)
@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
        return 0;
 }
 
-int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+static int __snd_rawmidi_info_select(struct snd_card *card,
+                                    struct snd_rawmidi_info *info)
 {
        struct snd_rawmidi *rmidi;
        struct snd_rawmidi_str *pstr;
        struct snd_rawmidi_substream *substream;
 
-       mutex_lock(&register_mutex);
        rmidi = snd_rawmidi_search(card, info->device);
-       mutex_unlock(&register_mutex);
        if (!rmidi)
                return -ENXIO;
        if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info
        }
        return -ENXIO;
 }
+
+int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+{
+       int ret;
+
+       mutex_lock(&register_mutex);
+       ret = __snd_rawmidi_info_select(card, info);
+       mutex_unlock(&register_mutex);
+       return ret;
+}
 EXPORT_SYMBOL(snd_rawmidi_info_select);
 
 static int snd_rawmidi_info_select_user(struct snd_card *card,
index 038a180..cbe818e 100644 (file)
@@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data)
  */
 int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops)
 {
-       if (WARN_ON(!hdac_acomp))
+       if (!hdac_acomp)
                return -ENODEV;
 
        hdac_acomp->audio_ops = aops;
index a81aacf..37e1cf8 100644 (file)
@@ -271,6 +271,8 @@ enum {
        CXT_FIXUP_HP_SPECTRE,
        CXT_FIXUP_HP_GATE_MIC,
        CXT_FIXUP_MUTE_LED_GPIO,
+       CXT_FIXUP_HEADSET_MIC,
+       CXT_FIXUP_HP_MIC_NO_PRESENCE,
 };
 
 /* for hda_fixup_thinkpad_acpi() */
@@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec,
        }
 }
 
+static void cxt_fixup_headset_mic(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct conexant_spec *spec = codec->spec;
+
+       switch (action) {
+       case HDA_FIXUP_ACT_PRE_PROBE:
+               spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
+               break;
+       }
+}
+
 /* OPLC XO 1.5 fixup */
 
 /* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors)
@@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cxt_fixup_mute_led_gpio,
        },
+       [CXT_FIXUP_HEADSET_MIC] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cxt_fixup_headset_mic,
+       },
+       [CXT_FIXUP_HP_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1a, 0x02a1113c },
+                       { }
+               },
+               .chained = true,
+               .chain_id = CXT_FIXUP_HEADSET_MIC,
+       },
 };
 
 static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
        SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
        SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO),
        SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO),
+       SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
        SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
index c19c81d..b4f1b6e 100644 (file)
@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
 #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
                                ((codec)->core.vendor_id == 0x80862800))
+#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
 #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
                                || is_skylake(codec) || is_broxton(codec) \
-                               || is_kabylake(codec)) || is_geminilake(codec)
-
+                               || is_kabylake(codec)) || is_geminilake(codec) \
+                               || is_cannonlake(codec)
 #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
 #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI",     patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",    patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",    patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",   patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
index 4b21f71..8fd2d9c 100644 (file)
@@ -324,8 +324,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0292:
                alc_update_coef_idx(codec, 0x4, 1<<15, 0);
                break;
-       case 0x10ec0215:
        case 0x10ec0225:
+       case 0x10ec0295:
+       case 0x10ec0299:
+               alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000);
+               /* fallthrough */
+       case 0x10ec0215:
        case 0x10ec0233:
        case 0x10ec0236:
        case 0x10ec0255:
@@ -336,10 +340,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0286:
        case 0x10ec0288:
        case 0x10ec0285:
-       case 0x10ec0295:
        case 0x10ec0298:
        case 0x10ec0289:
-       case 0x10ec0299:
                alc_update_coef_idx(codec, 0x10, 1<<9, 0);
                break;
        case 0x10ec0275:
@@ -5185,6 +5187,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec,
        }
 }
 
+/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
+static void alc274_fixup_bind_dacs(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+       static hda_nid_t preferred_pairs[] = {
+               0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
+               0
+       };
+
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       spec->gen.preferred_dacs = preferred_pairs;
+}
+
 /* for hda_fixup_thinkpad_acpi() */
 #include "thinkpad_helper.c"
 
@@ -5302,6 +5320,8 @@ enum {
        ALC233_FIXUP_LENOVO_MULTI_CODECS,
        ALC294_FIXUP_LENOVO_MIC_LOCATION,
        ALC700_FIXUP_INTEL_REFERENCE,
+       ALC274_FIXUP_DELL_BIND_DACS,
+       ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6112,6 +6132,21 @@ static const struct hda_fixup alc269_fixups[] = {
                        {}
                }
        },
+       [ALC274_FIXUP_DELL_BIND_DACS] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc274_fixup_bind_dacs,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+       },
+       [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1b, 0x0401102f },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC274_FIXUP_DELL_BIND_DACS
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6295,6 +6330,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+       SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
        SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
        SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
@@ -6552,6 +6588,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
        SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x1b, 0x01011020},
                {0x21, 0x02211010}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x12, 0x90a60130},
+               {0x14, 0x90170110},
+               {0x1b, 0x01011020},
+               {0x21, 0x0221101f}),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x12, 0x90a60160},
                {0x14, 0x90170120},
@@ -6578,7 +6619,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x14, 0x90170110},
                {0x1b, 0x90a70130},
                {0x21, 0x03211020}),
-       SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+       SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
                {0x12, 0xb7a60130},
                {0x13, 0xb8a61140},
                {0x16, 0x90170110},
index 9f521a5..b5e41df 100644 (file)
@@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev)
        struct resource *res;
        const u32 *pdata = pdev->dev.platform_data;
 
+       if (!pdata) {
+               dev_err(&pdev->dev, "Missing platform data\n");
+               return -ENODEV;
+       }
+
        audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data),
                                        GFP_KERNEL);
        if (audio_drv_data == NULL)
@@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev)
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(audio_drv_data->acp_mmio))
+               return PTR_ERR(audio_drv_data->acp_mmio);
 
        /* The following members gets populated in device 'open'
         * function. Till then interrupts are disabled in 'acp_init'
index 4a56f3d..dcee145 100644 (file)
@@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731
 config SND_ATMEL_SOC_CLASSD
        tristate "Atmel ASoC driver for boards using CLASSD"
        depends on ARCH_AT91 || COMPILE_TEST
-       select SND_ATMEL_SOC_DMA
+       select SND_SOC_GENERIC_DMAENGINE_PCM
        select REGMAP_MMIO
        help
          Say Y if you want to add support for Atmel ASoC driver for boards using
index b2d42ec..56564ce 100644 (file)
@@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec)
        }
 
        if (da7218->dev_id == DA7218_DEV_ID) {
-               hpldet_np = of_find_node_by_name(np, "da7218_hpldet");
+               hpldet_np = of_get_child_by_name(np, "da7218_hpldet");
                if (!hpldet_np)
                        return pdata;
 
index 5f3c42c..066ea2f 100644 (file)
 #define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
                        SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000)
 #define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
-                                   SNDRV_PCM_FMTBIT_S24_LE)
+                                   SNDRV_PCM_FMTBIT_S32_LE)
 
 static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 |
               SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4;
index a10a724..13354d6 100644 (file)
                                   SNDRV_PCM_RATE_32000 | \
                                   SNDRV_PCM_RATE_48000)
 #define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
-                                    SNDRV_PCM_FMTBIT_S24_LE)
+                                    SNDRV_PCM_FMTBIT_S32_LE)
 
 struct msm8916_wcd_digital_priv {
        struct clk *ahbclk, *mclk;
@@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream,
                                    RX_I2S_CTL_RX_I2S_MODE_MASK,
                                    RX_I2S_CTL_RX_I2S_MODE_16);
                break;
-       case SNDRV_PCM_FORMAT_S24_LE:
+       case SNDRV_PCM_FORMAT_S32_LE:
                snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL,
                                    TX_I2S_CTL_TX_I2S_MODE_MASK,
                                    TX_I2S_CTL_TX_I2S_MODE_32);
index 714ce17..e853a6d 100644 (file)
@@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w,
 
        switch (event) {
        case SND_SOC_DAPM_POST_PMU:
+               msleep(125);
                regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL,
                        NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC);
                break;
index 2df91db..64bf26c 100644 (file)
@@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform)
                        dev_err(&rt5514_spi->dev,
                                "%s Failed to reguest IRQ: %d\n", __func__,
                                ret);
+               else
+                       device_init_wakeup(rt5514_dsp->dev, true);
        }
 
        return 0;
@@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi)
                return ret;
        }
 
-       device_init_wakeup(&spi->dev, true);
-
        return 0;
 }
 
@@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev)
        if (device_may_wakeup(dev))
                disable_irq_wake(irq);
 
-       if (rt5514_dsp->substream) {
-               rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf));
-               if (buf[0] & RT5514_IRQ_STATUS_BIT)
-                       rt5514_schedule_copy(rt5514_dsp);
+       if (rt5514_dsp) {
+               if (rt5514_dsp->substream) {
+                       rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf,
+                               sizeof(buf));
+                       if (buf[0] & RT5514_IRQ_STATUS_BIT)
+                               rt5514_schedule_copy(rt5514_dsp);
+               }
        }
 
        return 0;
index 2a5b5d7..2dd6e9f 100644 (file)
@@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = {
        SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0),
        SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0),
 
-       SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0,
+       SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0,
                rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU),
 
        SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1,
index f020d2d..edc152c 100644 (file)
@@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
        regmap_read(regmap, RT5645_VENDOR_ID, &val);
        rt5645->v_id = val & 0xff;
 
+       regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080);
+
        ret = regmap_register_patch(rt5645->regmap, init_list,
                                    ARRAY_SIZE(init_list));
        if (ret != 0)
index b036c9d..d329bf7 100644 (file)
@@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert)
                        RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN);
                snd_soc_update_bits(codec, RT5663_IRQ_1,
                        RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN);
+               snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
+                       RT5663_EM_JD_MASK, RT5663_EM_JD_RST);
+               snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
+                       RT5663_EM_JD_MASK, RT5663_EM_JD_NOR);
 
                while (true) {
                        regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val);
index c5a9b69..03adc80 100644 (file)
 #define RT5663_POL_EXT_JD_SHIFT                        10
 #define RT5663_POL_EXT_JD_EN                   (0x1 << 10)
 #define RT5663_POL_EXT_JD_DIS                  (0x0 << 10)
+#define RT5663_EM_JD_MASK                      (0x1 << 7)
+#define RT5663_EM_JD_SHIFT                     7
+#define RT5663_EM_JD_NOR                       (0x1 << 7)
+#define RT5663_EM_JD_RST                       (0x0 << 7)
 
 /* DACREF LDO Control (0x0112)*/
 #define RT5663_PWR_LDO_DACREFL_MASK            (0x1 << 9)
index 730fb20..1ff3edb 100644 (file)
@@ -116,7 +116,7 @@ struct aic31xx_pdata {
 /* INT2 interrupt control */
 #define AIC31XX_INT2CTRL       AIC31XX_REG(0, 49)
 /* GPIO1 control */
-#define AIC31XX_GPIO1          AIC31XX_REG(0, 50)
+#define AIC31XX_GPIO1          AIC31XX_REG(0, 51)
 
 #define AIC31XX_DACPRB         AIC31XX_REG(0, 60)
 /* ADC Instruction Set Register */
index c482b2e..cfe72b9 100644 (file)
@@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
        struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev);
        struct device_node *twl4030_codec_node = NULL;
 
-       twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node,
+       twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node,
                                                  "codec");
 
        if (!pdata && twl4030_codec_node) {
@@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
                                     GFP_KERNEL);
                if (!pdata) {
                        dev_err(codec->dev, "Can not allocate memory\n");
+                       of_node_put(twl4030_codec_node);
                        return NULL;
                }
                twl4030_setup_pdata_of(pdata, twl4030_codec_node);
+               of_node_put(twl4030_codec_node);
        }
 
        return pdata;
index 65c059b..66e32f5 100644 (file)
@@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
                 le64_to_cpu(footer->timestamp));
 
        while (pos < firmware->size &&
-              pos - firmware->size > sizeof(*region)) {
+              sizeof(*region) < firmware->size - pos) {
                region = (void *)&(firmware->data[pos]);
                region_name = "Unknown";
                reg = 0;
@@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp)
                         regions, le32_to_cpu(region->len), offset,
                         region_name);
 
-               if ((pos + le32_to_cpu(region->len) + sizeof(*region)) >
-                   firmware->size) {
+               if (le32_to_cpu(region->len) >
+                   firmware->size - pos - sizeof(*region)) {
                        adsp_err(dsp,
                                 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
                                 file, regions, region_name,
@@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
 
        blocks = 0;
        while (pos < firmware->size &&
-              pos - firmware->size > sizeof(*blk)) {
+              sizeof(*blk) < firmware->size - pos) {
                blk = (void *)(&firmware->data[pos]);
 
                type = le16_to_cpu(blk->type);
@@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
                }
 
                if (reg) {
-                       if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) >
-                           firmware->size) {
+                       if (le32_to_cpu(blk->len) >
+                           firmware->size - pos - sizeof(*blk)) {
                                adsp_err(dsp,
                                         "%s.%d: %s region len %d bytes exceeds file length %zu\n",
                                         file, blocks, region_name,
index 0f163ab..52c27a3 100644 (file)
 #define ASRFSTi_OUTPUT_FIFO_SHIFT      12
 #define ASRFSTi_OUTPUT_FIFO_MASK       (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT)
 #define ASRFSTi_IAEi_SHIFT             11
-#define ASRFSTi_IAEi_MASK              (1 << ASRFSTi_OAFi_SHIFT)
-#define ASRFSTi_IAEi                   (1 << ASRFSTi_OAFi_SHIFT)
+#define ASRFSTi_IAEi_MASK              (1 << ASRFSTi_IAEi_SHIFT)
+#define ASRFSTi_IAEi                   (1 << ASRFSTi_IAEi_SHIFT)
 #define ASRFSTi_INPUT_FIFO_WIDTH       7
 #define ASRFSTi_INPUT_FIFO_SHIFT       0
 #define ASRFSTi_INPUT_FIFO_MASK                ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1)
index f2f51e0..424bafa 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/ctype.h>
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/of.h>
@@ -265,6 +266,8 @@ struct fsl_ssi_private {
 
        u32 fifo_watermark;
        u32 dma_maxburst;
+
+       struct mutex ac97_reg_lock;
 };
 
 /*
@@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
        if (reg > 0x7f)
                return;
 
+       mutex_lock(&fsl_ac97_data->ac97_reg_lock);
+
        ret = clk_prepare_enable(fsl_ac97_data->clk);
        if (ret) {
                pr_err("ac97 write clk_prepare_enable failed: %d\n",
                        ret);
-               return;
+               goto ret_unlock;
        }
 
        lreg = reg <<  12;
@@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
        udelay(100);
 
        clk_disable_unprepare(fsl_ac97_data->clk);
+
+ret_unlock:
+       mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
 }
 
 static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
@@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
 {
        struct regmap *regs = fsl_ac97_data->regs;
 
-       unsigned short val = -1;
+       unsigned short val = 0;
        u32 reg_val;
        unsigned int lreg;
        int ret;
 
+       mutex_lock(&fsl_ac97_data->ac97_reg_lock);
+
        ret = clk_prepare_enable(fsl_ac97_data->clk);
        if (ret) {
                pr_err("ac97 read clk_prepare_enable failed: %d\n",
                        ret);
-               return -1;
+               goto ret_unlock;
        }
 
        lreg = (reg & 0x7f) <<  12;
@@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
 
        clk_disable_unprepare(fsl_ac97_data->clk);
 
+ret_unlock:
+       mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
        return val;
 }
 
@@ -1458,12 +1470,6 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                                sizeof(fsl_ssi_ac97_dai));
 
                fsl_ac97_data = ssi_private;
-
-               ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
-               if (ret) {
-                       dev_err(&pdev->dev, "could not set AC'97 ops\n");
-                       return ret;
-               }
        } else {
                /* Initialize this copy of the CPU DAI driver structure */
                memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template,
@@ -1574,6 +1580,15 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                        return ret;
        }
 
+       if (fsl_ssi_is_ac97(ssi_private)) {
+               mutex_init(&ssi_private->ac97_reg_lock);
+               ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
+               if (ret) {
+                       dev_err(&pdev->dev, "could not set AC'97 ops\n");
+                       goto error_ac97_ops;
+               }
+       }
+
        ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component,
                                              &ssi_private->cpu_dai_drv, 1);
        if (ret) {
@@ -1657,6 +1672,13 @@ error_sound_card:
        fsl_ssi_debugfs_remove(&ssi_private->dbg_stats);
 
 error_asoc_register:
+       if (fsl_ssi_is_ac97(ssi_private))
+               snd_soc_set_ac97_ops(NULL);
+
+error_ac97_ops:
+       if (fsl_ssi_is_ac97(ssi_private))
+               mutex_destroy(&ssi_private->ac97_reg_lock);
+
        if (ssi_private->soc->imx)
                fsl_ssi_imx_clean(pdev, ssi_private);
 
@@ -1675,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev)
        if (ssi_private->soc->imx)
                fsl_ssi_imx_clean(pdev, ssi_private);
 
-       if (fsl_ssi_is_ac97(ssi_private))
+       if (fsl_ssi_is_ac97(ssi_private)) {
                snd_soc_set_ac97_ops(NULL);
+               mutex_destroy(&ssi_private->ac97_reg_lock);
+       }
 
        return 0;
 }
index 6f9a8bc..6dcad0a 100644 (file)
@@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
        { "ssp0 Tx", NULL, "spk_out" },
 
        { "AIF Playback", NULL, "ssp1 Tx" },
-       { "ssp1 Tx", NULL, "hs_out" },
+       { "ssp1 Tx", NULL, "codec1_out" },
 
        { "hs_in", NULL, "ssp1 Rx" },
        { "ssp1 Rx", NULL, "AIF Capture" },
index 6072164..271ae3c 100644 (file)
@@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
        { "ssp0 Tx", NULL, "spk_out" },
 
        { "AIF Playback", NULL, "ssp1 Tx" },
-       { "ssp1 Tx", NULL, "hs_out" },
+       { "ssp1 Tx", NULL, "codec1_out" },
 
        { "hs_in", NULL, "ssp1 Rx" },
        { "ssp1 Rx", NULL, "AIF Capture" },
index d14c50a..3eaac41 100644 (file)
@@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt,
 
        if ((epnt->virtual_bus_id == instance_id) &&
                        (epnt->linktype == link_type) &&
-                       (epnt->direction == dirn) &&
-                       (epnt->device_type == dev_type))
-               return true;
-       else
-               return false;
+                       (epnt->direction == dirn)) {
+               /* do not check dev_type for DMIC link type */
+               if (epnt->linktype == NHLT_LINK_DMIC)
+                       return true;
+
+               if (epnt->device_type == dev_type)
+                       return true;
+       }
+
+       return false;
 }
 
 struct nhlt_specific_cfg
index a072bcf..81923da 100644 (file)
@@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt,
                break;
 
        default:
-               dev_warn(bus->dev, "Control load not supported %d:%d:%d\n",
+               dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n",
                        hdr->ops.get, hdr->ops.put, hdr->ops.info);
                break;
        }
index ee5055d..a89fe9b 100644 (file)
@@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev)
        spdif->mclk = devm_clk_get(&pdev->dev, "mclk");
        if (IS_ERR(spdif->mclk)) {
                dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n");
-               return PTR_ERR(spdif->mclk);
+               ret = PTR_ERR(spdif->mclk);
+               goto err_disable_hclk;
        }
 
        ret = clk_prepare_enable(spdif->mclk);
        if (ret) {
                dev_err(spdif->dev, "clock enable failed %d\n", ret);
-               return ret;
+               goto err_disable_clocks;
        }
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        regs = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(regs))
-               return PTR_ERR(regs);
+       if (IS_ERR(regs)) {
+               ret = PTR_ERR(regs);
+               goto err_disable_clocks;
+       }
 
        spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs,
                                                  &rk_spdif_regmap_config);
        if (IS_ERR(spdif->regmap)) {
                dev_err(&pdev->dev,
                        "Failed to initialise managed register map\n");
-               return PTR_ERR(spdif->regmap);
+               ret = PTR_ERR(spdif->regmap);
+               goto err_disable_clocks;
        }
 
        spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR;
@@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev)
 
 err_pm_runtime:
        pm_runtime_disable(&pdev->dev);
+err_disable_clocks:
+       clk_disable_unprepare(spdif->mclk);
+err_disable_hclk:
+       clk_disable_unprepare(spdif->hclk);
 
        return ret;
 }
index 8ddb087..4672688 100644 (file)
@@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod,
                                   NULL, &val, NULL);
 
        val  = val      << shift;
-       mask = 0xffff   << shift;
+       mask = 0x0f1f   << shift;
 
        rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val);
 
@@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod,
 
        in   = in       << shift;
        out  = out      << shift;
-       mask = 0xffff   << shift;
+       mask = 0x0f1f   << shift;
 
        switch (id / 2) {
        case 0:
@@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate)
                        ckr = 0x80000000;
        }
 
-       rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr);
+       rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr);
        rsnd_mod_write(adg_mod, BRRA,  adg->rbga);
        rsnd_mod_write(adg_mod, BRRB,  adg->rbgb);
 
index c70eb20..f12a88a 100644 (file)
@@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd)
 
        return snd_pcm_lib_preallocate_pages_for_all(
                rtd->pcm,
-               SNDRV_DMA_TYPE_CONTINUOUS,
-               snd_dma_continuous_data(GFP_KERNEL),
+               SNDRV_DMA_TYPE_DEV,
+               rtd->card->snd_card->dev,
                PREALLOC_BUFFER, PREALLOC_BUFFER_MAX);
 }
 
index fd557ab..4d750bd 100644 (file)
 struct rsnd_dmaen {
        struct dma_chan         *chan;
        dma_cookie_t            cookie;
-       dma_addr_t              dma_buf;
        unsigned int            dma_len;
-       unsigned int            dma_period;
-       unsigned int            dma_cnt;
 };
 
 struct rsnd_dmapp {
@@ -71,38 +68,10 @@ static struct rsnd_mod mem = {
 /*
  *             Audio DMAC
  */
-#define rsnd_dmaen_sync(dmaen, io, i)  __rsnd_dmaen_sync(dmaen, io, i, 1)
-#define rsnd_dmaen_unsync(dmaen, io, i)        __rsnd_dmaen_sync(dmaen, io, i, 0)
-static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io,
-                             int i, int sync)
-{
-       struct device *dev = dmaen->chan->device->dev;
-       enum dma_data_direction dir;
-       int is_play = rsnd_io_is_play(io);
-       dma_addr_t buf;
-       int len, max;
-       size_t period;
-
-       len     = dmaen->dma_len;
-       period  = dmaen->dma_period;
-       max     = len / period;
-       i       = i % max;
-       buf     = dmaen->dma_buf + (period * i);
-
-       dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-
-       if (sync)
-               dma_sync_single_for_device(dev, buf, period, dir);
-       else
-               dma_sync_single_for_cpu(dev, buf, period, dir);
-}
-
 static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
                                  struct rsnd_dai_stream *io)
 {
        struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
-       struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
-       struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
        bool elapsed = false;
        unsigned long flags;
 
@@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
         */
        spin_lock_irqsave(&priv->lock, flags);
 
-       if (rsnd_io_is_working(io)) {
-               rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt);
-
-               /*
-                * Next period is already started.
-                * Let's sync Next Next period
-                * see
-                *      rsnd_dmaen_start()
-                */
-               rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2);
-
+       if (rsnd_io_is_working(io))
                elapsed = true;
 
-               dmaen->dma_cnt++;
-       }
-
        spin_unlock_irqrestore(&priv->lock, flags);
 
        if (elapsed)
@@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod,
        struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
        struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
 
-       if (dmaen->chan) {
-               int is_play = rsnd_io_is_play(io);
-
+       if (dmaen->chan)
                dmaengine_terminate_all(dmaen->chan);
-               dma_unmap_single(dmaen->chan->device->dev,
-                                dmaen->dma_buf, dmaen->dma_len,
-                                is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-       }
 
        return 0;
 }
@@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
        struct device *dev = rsnd_priv_to_dev(priv);
        struct dma_async_tx_descriptor *desc;
        struct dma_slave_config cfg = {};
-       dma_addr_t buf;
-       size_t len;
-       size_t period;
        int is_play = rsnd_io_is_play(io);
-       int i;
        int ret;
 
        cfg.direction   = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
@@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
        if (ret < 0)
                return ret;
 
-       len     = snd_pcm_lib_buffer_bytes(substream);
-       period  = snd_pcm_lib_period_bytes(substream);
-       buf     = dma_map_single(dmaen->chan->device->dev,
-                                substream->runtime->dma_area,
-                                len,
-                                is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-       if (dma_mapping_error(dmaen->chan->device->dev, buf)) {
-               dev_err(dev, "dma map failed\n");
-               return -EIO;
-       }
-
        desc = dmaengine_prep_dma_cyclic(dmaen->chan,
-                                        buf, len, period,
+                                        substream->runtime->dma_addr,
+                                        snd_pcm_lib_buffer_bytes(substream),
+                                        snd_pcm_lib_period_bytes(substream),
                                         is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
                                         DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
@@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
        desc->callback          = rsnd_dmaen_complete;
        desc->callback_param    = rsnd_mod_get(dma);
 
-       dmaen->dma_buf          = buf;
-       dmaen->dma_len          = len;
-       dmaen->dma_period       = period;
-       dmaen->dma_cnt          = 0;
-
-       /*
-        * synchronize this and next period
-        * see
-        *      __rsnd_dmaen_complete()
-        */
-       for (i = 0; i < 2; i++)
-               rsnd_dmaen_sync(dmaen, io, i);
+       dmaen->dma_len          = snd_pcm_lib_buffer_bytes(substream);
 
        dmaen->cookie = dmaengine_submit(desc);
        if (dmaen->cookie < 0) {
index fece1e5..cbf3bf3 100644 (file)
@@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod,
                                    int byte)
 {
        struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
+       bool ret = false;
+       int byte_pos;
 
-       ssi->byte_pos += byte;
+       byte_pos = ssi->byte_pos + byte;
 
-       if (ssi->byte_pos >= ssi->next_period_byte) {
+       if (byte_pos >= ssi->next_period_byte) {
                struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 
                ssi->period_pos++;
                ssi->next_period_byte += ssi->byte_per_period;
 
                if (ssi->period_pos >= runtime->periods) {
-                       ssi->byte_pos = 0;
+                       byte_pos = 0;
                        ssi->period_pos = 0;
                        ssi->next_period_byte = ssi->byte_per_period;
                }
 
-               return true;
+               ret = true;
        }
 
-       return false;
+       WRITE_ONCE(ssi->byte_pos, byte_pos);
+
+       return ret;
 }
 
 /*
@@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod,
        struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
        struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 
-       *pointer = bytes_to_frames(runtime, ssi->byte_pos);
+       *pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos));
 
        return 0;
 }
index 4d94875..6ff8a36 100644 (file)
@@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
 {
        int hdmi = rsnd_ssi_hdmi_port(io);
        int ret;
+       u32 mode = 0;
 
        ret = rsnd_ssiu_init(mod, io, priv);
        if (ret < 0)
@@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
                 * see
                 *      rsnd_ssi_config_init()
                 */
-               rsnd_mod_write(mod, SSI_MODE, 0x1);
+               mode = 0x1;
        }
 
+       rsnd_mod_write(mod, SSI_MODE, mode);
+
        if (rsnd_ssi_use_busif(io)) {
                rsnd_mod_write(mod, SSI_BUSIF_ADINR,
                               rsnd_get_adinr_bit(mod, io) |
index 7c9e361..2b4ceda 100644 (file)
@@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
        kctl->private_value = (unsigned long)namelist;
        kctl->private_free = usb_mixer_selector_elem_free;
 
-       nameid = uac_selector_unit_iSelector(desc);
+       /* check the static mapping table at first */
        len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
-       if (len)
-               ;
-       else if (nameid)
-               len = snd_usb_copy_string_desc(state, nameid, kctl->id.name,
-                                        sizeof(kctl->id.name));
-       else
-               len = get_term_name(state, &state->oterm,
-                                   kctl->id.name, sizeof(kctl->id.name), 0);
-
        if (!len) {
-               strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
+               /* no mapping ? */
+               /* if iSelector is given, use it */
+               nameid = uac_selector_unit_iSelector(desc);
+               if (nameid)
+                       len = snd_usb_copy_string_desc(state, nameid,
+                                                      kctl->id.name,
+                                                      sizeof(kctl->id.name));
+               /* ... or pick up the terminal name at next */
+               if (!len)
+                       len = get_term_name(state, &state->oterm,
+                                   kctl->id.name, sizeof(kctl->id.name), 0);
+               /* ... or use the fixed string "USB" as the last resort */
+               if (!len)
+                       strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
 
+               /* and add the proper suffix */
                if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
                        append_ctl_name(kctl, " Clock Source");
                else if ((state->oterm.type & 0xff00) == 0x0100)
index 77eecaa..a66ef57 100644 (file)
@@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id)
 /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
  * between PCM/DOP and native DSD mode
  */
-static bool is_teac_50X_dac(unsigned int id)
+static bool is_teac_dsd_dac(unsigned int id)
 {
        switch (id) {
        case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
+       case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
                return true;
        }
        return false;
@@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
                        break;
                }
                mdelay(20);
-       } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) {
+       } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
                /* Vendor mode switch cmd is required. */
                switch (fmt->altsetting) {
                case 3: /* DSD mode (DSD_U32) requested */
@@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        }
 
        /* TEAC devices with USB DAC functionality */
-       if (is_teac_50X_dac(chip->usb_id)) {
+       if (is_teac_dsd_dac(chip->usb_id)) {
                if (fp->altsetting == 3)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
        }
index cefe7c7..0a8e37a 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
 #define _UAPI__ASM_BPF_PERF_EVENT_H__
 
-#include <asm/ptrace.h>
+#include "ptrace.h"
 
 typedef user_pt_regs bpf_user_pt_regs_t;
 
index 217cf6f..a5684d0 100755 (executable)
@@ -478,7 +478,7 @@ class Provider(object):
     @staticmethod
     def is_field_wanted(fields_filter, field):
         """Indicate whether field is valid according to fields_filter."""
-        if not fields_filter or fields_filter == "help":
+        if not fields_filter:
             return True
         return re.match(fields_filter, field) is not None
 
@@ -549,8 +549,8 @@ class TracepointProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self._fields = [field for field in self.get_available_fields()
-                        if self.is_field_wanted(fields_filter, field)]
+        self.fields = [field for field in self.get_available_fields()
+                       if self.is_field_wanted(fields_filter, field)]
 
     @staticmethod
     def get_online_cpus():
@@ -950,7 +950,8 @@ class Tui(object):
             curses.nocbreak()
             curses.endwin()
 
-    def get_all_gnames(self):
+    @staticmethod
+    def get_all_gnames():
         """Returns a list of (pid, gname) tuples of all running guests"""
         res = []
         try:
@@ -963,7 +964,7 @@ class Tui(object):
             # perform a sanity check before calling the more expensive
             # function to possibly extract the guest name
             if ' -name ' in line[1]:
-                res.append((line[0], self.get_gname_from_pid(line[0])))
+                res.append((line[0], Tui.get_gname_from_pid(line[0])))
         child.stdout.close()
 
         return res
@@ -984,7 +985,8 @@ class Tui(object):
         except Exception:
             self.screen.addstr(row + 1, 2, 'Not available')
 
-    def get_pid_from_gname(self, gname):
+    @staticmethod
+    def get_pid_from_gname(gname):
         """Fuzzy function to convert guest name to QEMU process pid.
 
         Returns a list of potential pids, can be empty if no match found.
@@ -992,7 +994,7 @@ class Tui(object):
 
         """
         pids = []
-        for line in self.get_all_gnames():
+        for line in Tui.get_all_gnames():
             if gname == line[1]:
                 pids.append(int(line[0]))
 
@@ -1090,15 +1092,16 @@ class Tui(object):
             # sort by totals
             return (0, -stats[x][0])
         total = 0.
-        for val in stats.values():
-            total += val[0]
+        for key in stats.keys():
+            if key.find('(') is -1:
+                total += stats[key][0]
         if self._sorting == SORT_DEFAULT:
             sortkey = sortCurAvg
         else:
             sortkey = sortTotal
+        tavg = 0
         for key in sorted(stats.keys(), key=sortkey):
-
-            if row >= self.screen.getmaxyx()[0]:
+            if row >= self.screen.getmaxyx()[0] - 1:
                 break
             values = stats[key]
             if not values[0] and not values[1]:
@@ -1110,9 +1113,15 @@ class Tui(object):
                 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
                                    (key, values[0], values[0] * 100 / total,
                                     cur))
+                if cur is not '' and key.find('(') is -1:
+                    tavg += cur
             row += 1
         if row == 3:
             self.screen.addstr(4, 1, 'No matching events reported yet')
+        else:
+            self.screen.addstr(row, 1, '%-40s %10d        %8s' %
+                               ('Total', total, tavg if tavg else ''),
+                               curses.A_BOLD)
         self.screen.refresh()
 
     def show_msg(self, text):
@@ -1358,7 +1367,7 @@ class Tui(object):
                 if char == 'x':
                     self.update_drilldown()
                     # prevents display of current values on next refresh
-                    self.stats.get()
+                    self.stats.get(self._display_guests)
             except KeyboardInterrupt:
                 break
             except curses.error:
@@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately.
         try:
             pids = Tui.get_pid_from_gname(val)
         except:
-            raise optparse.OptionValueError('Error while searching for guest '
-                                            '"{}", use "-p" to specify a pid '
-                                            'instead'.format(val))
+            sys.exit('Error while searching for guest "{}". Use "-p" to '
+                     'specify a pid instead?'.format(val))
         if len(pids) == 0:
-            raise optparse.OptionValueError('No guest by the name "{}" '
-                                            'found'.format(val))
+            sys.exit('Error: No guest by the name "{}" found'.format(val))
         if len(pids) > 1:
-            raise optparse.OptionValueError('Multiple processes found (pids: '
-                                            '{}) - use "-p" to specify a pid '
-                                            'instead'.format(" ".join(pids)))
+            sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
+                     'to specify the desired pid'.format(" ".join(pids)))
         parser.values.pid = pids[0]
 
     optparser = optparse.OptionParser(description=description_text,
@@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately.
                          help='restrict statistics to guest by name',
                          callback=cb_guest_to_pid,
                          )
-    (options, _) = optparser.parse_args(sys.argv)
+    options, unkn = optparser.parse_args(sys.argv)
+    if len(unkn) != 1:
+        sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
+    try:
+        # verify that we were passed a valid regex up front
+        re.compile(options.fields)
+    except re.error:
+        sys.exit('Error: "' + options.fields + '" is not a valid regular '
+                 'expression')
+
     return options
 
 
@@ -1564,16 +1579,13 @@ def main():
 
     stats = Stats(options)
 
-    if options.fields == "help":
-        event_list = "\n"
-        s = stats.get()
-        for key in s.keys():
-            if key.find('(') != -1:
-                key = key[0:key.find('(')]
-            if event_list.find('\n' + key + '\n') == -1:
-                event_list += key + '\n'
-        sys.stdout.write(event_list)
-        return ""
+    if options.fields == 'help':
+        stats.fields_filter = None
+        event_list = []
+        for key in stats.get().keys():
+            event_list.append(key.split('(', 1)[0])
+        sys.stdout.write('  ' + '\n  '.join(sorted(set(event_list))) + '\n')
+        sys.exit(0)
 
     if options.log:
         log(stats)
index e5cf836..b5b3810 100644 (file)
@@ -50,6 +50,8 @@ INTERACTIVE COMMANDS
 *s*::   set update interval
 
 *x*::  toggle reporting of stats for child trace events
+ ::     *Note*: The stats for the parents summarize the respective child trace
+                events
 
 Press any other key to refresh statistics immediately.
 
@@ -86,7 +88,7 @@ OPTIONS
 
 -f<fields>::
 --fields=<fields>::
-       fields to display (regex)
+       fields to display (regex), "-f help" for a list of available events
 
 -h::
 --help::
index 792af7c..05fc4e2 100644 (file)
@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
 endif
 
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
-LDLIBS += -lcap -lelf
+LDLIBS += -lcap -lelf -lrt
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
        test_align test_verifier_log test_dev_cgroup
index 6942753..6761be1 100644 (file)
@@ -351,7 +351,7 @@ static void test_bpf_obj_id(void)
                          info_len != sizeof(struct bpf_map_info) ||
                          strcmp((char *)map_infos[i].name, expected_map_name),
                          "get-map-info(fd)",
-                         "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+                         "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
                          err, errno,
                          map_infos[i].type, BPF_MAP_TYPE_ARRAY,
                          info_len, sizeof(struct bpf_map_info),
@@ -395,7 +395,7 @@ static void test_bpf_obj_id(void)
                          *(int *)prog_infos[i].map_ids != map_infos[i].id ||
                          strcmp((char *)prog_infos[i].name, expected_prog_name),
                          "get-prog-info(fd)",
-                         "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+                         "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
                          err, errno, i,
                          prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
                          info_len, sizeof(struct bpf_prog_info),
@@ -463,7 +463,7 @@ static void test_bpf_obj_id(void)
                      memcmp(&prog_info, &prog_infos[i], info_len) ||
                      *(int *)prog_info.map_ids != saved_map_id,
                      "get-prog-info(next_id->fd)",
-                     "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n",
+                     "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
                      err, errno, info_len, sizeof(struct bpf_prog_info),
                      memcmp(&prog_info, &prog_infos[i], info_len),
                      *(int *)prog_info.map_ids, saved_map_id);
@@ -509,7 +509,7 @@ static void test_bpf_obj_id(void)
                      memcmp(&map_info, &map_infos[i], info_len) ||
                      array_value != array_magic_value,
                      "check get-map-info(next_id->fd)",
-                     "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n",
+                     "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
                      err, errno, info_len, sizeof(struct bpf_map_info),
                      memcmp(&map_info, &map_infos[i], info_len),
                      array_value, array_magic_value);
index 3c64f30..b510174 100644 (file)
@@ -422,9 +422,7 @@ static struct bpf_test tests[] = {
                        BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr_unpriv = "R1 subtraction from stack pointer",
-               .result_unpriv = REJECT,
-               .errstr = "R1 invalid mem access",
+               .errstr = "R1 subtraction from stack pointer",
                .result = REJECT,
        },
        {
@@ -606,7 +604,6 @@ static struct bpf_test tests[] = {
                },
                .errstr = "misaligned stack access",
                .result = REJECT,
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
        },
        {
                "invalid map_fd for function call",
@@ -1797,7 +1794,6 @@ static struct bpf_test tests[] = {
                },
                .result = REJECT,
                .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
        },
        {
                "PTR_TO_STACK store/load - bad alignment on reg",
@@ -1810,7 +1806,6 @@ static struct bpf_test tests[] = {
                },
                .result = REJECT,
                .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
        },
        {
                "PTR_TO_STACK store/load - out of bounds low",
@@ -1862,9 +1857,8 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .result = ACCEPT,
-               .result_unpriv = REJECT,
-               .errstr_unpriv = "R1 pointer += pointer",
+               .result = REJECT,
+               .errstr = "R1 pointer += pointer",
        },
        {
                "unpriv: neg pointer",
@@ -2592,7 +2586,8 @@ static struct bpf_test tests[] = {
                        BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
                                    offsetof(struct __sk_buff, data)),
                        BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
-                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, len)),
                        BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
                        BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
                        BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@ -2899,7 +2894,7 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "invalid access to packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
@@ -3885,9 +3880,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3, 11 },
-               .errstr_unpriv = "R0 pointer += pointer",
-               .errstr = "R0 invalid mem access 'inv'",
-               .result_unpriv = REJECT,
+               .errstr = "R0 pointer += pointer",
                .result = REJECT,
                .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
        },
@@ -3928,7 +3921,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
@@ -3949,7 +3942,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
@@ -3970,7 +3963,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
@@ -5195,10 +5188,8 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 bitwise operator &= on pointer",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 bitwise operator &= on pointer",
                .result = REJECT,
-               .result_unpriv = REJECT,
        },
        {
                "map element value illegal alu op, 2",
@@ -5214,10 +5205,8 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 32-bit pointer arithmetic prohibited",
                .result = REJECT,
-               .result_unpriv = REJECT,
        },
        {
                "map element value illegal alu op, 3",
@@ -5233,10 +5222,8 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 pointer arithmetic with /= operator",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 pointer arithmetic with /= operator",
                .result = REJECT,
-               .result_unpriv = REJECT,
        },
        {
                "map element value illegal alu op, 4",
@@ -6019,8 +6006,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map_in_map = { 3 },
-               .errstr = "R1 type=inv expected=map_ptr",
-               .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+               .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
                .result = REJECT,
        },
        {
@@ -6116,6 +6102,30 @@ static struct bpf_test tests[] = {
                },
                .result = ACCEPT,
        },
+       {
+               "ld_abs: tests on r6 and skb data reload helper",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+                       BPF_MOV64_IMM(BPF_REG_6, 0),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_3, 2),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_skb_vlan_push),
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 42),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+       },
        {
                "ld_ind: check calling conv, r1",
                .insns = {
@@ -6300,7 +6310,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6324,7 +6334,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6350,7 +6360,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R8 invalid mem access 'inv'",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6375,7 +6385,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R8 invalid mem access 'inv'",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6423,7 +6433,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6494,7 +6504,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6545,7 +6555,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6572,7 +6582,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6598,7 +6608,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6627,7 +6637,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6657,7 +6667,7 @@ static struct bpf_test tests[] = {
                        BPF_JMP_IMM(BPF_JA, 0, 0, -7),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6685,8 +6695,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr_unpriv = "R0 pointer comparison prohibited",
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
                .result_unpriv = REJECT,
        },
@@ -6741,6 +6750,462 @@ static struct bpf_test tests[] = {
                .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                .result = REJECT,
        },
+       {
+               "bounds check based on zero-extended MOV",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0x0000'0000'ffff'ffff */
+                       BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0 */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check based on sign-extended MOV. test1",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0xffff'ffff'ffff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0xffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+                       /* r0 = <oob pointer> */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access to OOB pointer */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 4294967295",
+               .result = REJECT
+       },
+       {
+               "bounds check based on sign-extended MOV. test2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0xffff'ffff'ffff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0xfff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
+                       /* r0 = <oob pointer> */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access to OOB pointer */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 min value is outside of the array range",
+               .result = REJECT
+       },
+       {
+               "bounds check based on reg_off + var_off + insn_off. test1",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct __sk_buff, mark)),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr = "value_size=8 off=1073741825",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "bounds check based on reg_off + var_off + insn_off. test2",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct __sk_buff, mark)),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr = "value 1073741823",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "bounds check after truncation of non-boundary-crossing range",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       /* r2 = 0x10'0000'0000 */
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
+                       /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+                       /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0 */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check after truncation of boundary-crossing range (1)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0xffff'ffff] or
+                        *      [0x0000'0000, 0x0000'007f]
+                        */
+                       BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0x00, 0xff] or
+                        *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = 0 or
+                        *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op or OOB pointer computation */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               /* not actually fully unbounded, but the bound is very high */
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check after truncation of boundary-crossing range (2)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0xffff'ffff] or
+                        *      [0x0000'0000, 0x0000'007f]
+                        * difference to previous test: truncation via MOV32
+                        * instead of ALU32.
+                        */
+                       BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0x00, 0xff] or
+                        *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = 0 or
+                        *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op or OOB pointer computation */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               /* not actually fully unbounded, but the bound is very high */
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check after wrapping 32-bit addition",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+                       /* r1 = 0x7fff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0xffff'fffe */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0 */
+                       BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check after shift with oversized count operand",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_IMM(BPF_REG_2, 32),
+                       BPF_MOV64_IMM(BPF_REG_1, 1),
+                       /* r1 = (u32)1 << (u32)32 = ? */
+                       BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
+                       /* r1 = [0x0000, 0xffff] */
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
+                       /* computes unknown pointer, potentially OOB */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 max value is outside of the array range",
+               .result = REJECT
+       },
+       {
+               "bounds check after right shift of maybe-negative number",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       /* r1 = [-0x01, 0xfe] */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+                       /* r1 = 0 or 0xff'ffff'ffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* r1 = 0 or 0xffff'ffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* computes unknown pointer, potentially OOB */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test1",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 2147483646",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "pointer offset 1073741822",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test3",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "pointer offset -1073741822",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test4",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_1, 1000000),
+                       BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 1000000000000",
+               .result = REJECT
+       },
+       {
+               "pointer/scalar confusion in state equality check (way 1)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(1),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .errstr_unpriv = "R0 leaks addr as return value"
+       },
+       {
+               "pointer/scalar confusion in state equality check (way 2)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+                       BPF_JMP_A(1),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .errstr_unpriv = "R0 leaks addr as return value"
+       },
        {
                "variable-offset ctx access",
                .insns = {
@@ -6782,6 +7247,71 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_LWT_IN,
        },
+       {
+               "indirect variable-offset stack access",
+               .insns = {
+                       /* Fill the top 8 bytes of the stack */
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       /* Get an unknown value */
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+                       /* Make it small and 4-byte aligned */
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+                       /* add it to fp.  We now have either fp-4 or fp-8, but
+                        * we don't know which
+                        */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+                       /* dereference it indirectly */
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 5 },
+               .errstr = "variable stack read R2",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_LWT_IN,
+       },
+       {
+               "direct stack access with 32-bit wraparound. test1",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer and 2147483647",
+               .result = REJECT
+       },
+       {
+               "direct stack access with 32-bit wraparound. test2",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer and 1073741823",
+               .result = REJECT
+       },
+       {
+               "direct stack access with 32-bit wraparound. test3",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer offset 1073741822",
+               .result = REJECT
+       },
        {
                "liveness pruning and write screening",
                .insns = {
@@ -7103,6 +7633,19 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
+       {
+               "pkt_end - pkt_start is allowed",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
        {
                "XDP pkt read, pkt_end mangling, bad access 1",
                .insns = {
@@ -7118,7 +7661,7 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "R1 offset is outside of the packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_XDP,
        },
@@ -7137,7 +7680,7 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "R1 offset is outside of the packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_XDP,
        },
index e57b4ac..7177bea 100644 (file)
@@ -1,3 +1,4 @@
 CONFIG_USER_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
+CONFIG_NUMA=y
index 66e5ce5..0304ffb 100644 (file)
@@ -627,13 +627,10 @@ static void do_multicpu_tests(void)
 static int finish_exec_test(void)
 {
        /*
-        * In a sensible world, this would be check_invalid_segment(0, 1);
-        * For better or for worse, though, the LDT is inherited across exec.
-        * We can probably change this safely, but for now we test it.
+        * Older kernel versions did inherit the LDT on exec() which is
+        * wrong because exec() starts from a clean state.
         */
-       check_valid_segment(0, 1,
-                           AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
-                           42, true);
+       check_invalid_segment(0, 1);
 
        return nerrs ? 1 : 0;
 }
index f9555b1..cc29a81 100644 (file)
@@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
        struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
        struct arch_timer_context *vtimer;
+       u32 cnt_ctl;
 
-       if (!vcpu) {
-               pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
-               return IRQ_NONE;
-       }
-       vtimer = vcpu_vtimer(vcpu);
+       /*
+        * We may see a timer interrupt after vcpu_put() has been called which
+        * sets the CPU's vcpu pointer to NULL, because even though the timer
+        * has been disabled in vtimer_save_state(), the hardware interrupt
+        * signal may not have been retired from the interrupt controller yet.
+        */
+       if (!vcpu)
+               return IRQ_HANDLED;
 
+       vtimer = vcpu_vtimer(vcpu);
        if (!vtimer->irq.level) {
-               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-               if (kvm_timer_irq_can_fire(vtimer))
+               cnt_ctl = read_sysreg_el0(cntv_ctl);
+               cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
+                          ARCH_TIMER_CTRL_IT_MASK;
+               if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
                        kvm_timer_update_irq(vcpu, true, vtimer);
        }
 
@@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
 
        /* Disable the virtual timer */
        write_sysreg_el0(0, cntv_ctl);
+       isb();
 
        vtimer->loaded = false;
 out:
@@ -720,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
        return 0;
 }
 
-int kvm_timer_hyp_init(void)
+int kvm_timer_hyp_init(bool has_gic)
 {
        struct arch_timer_kvm_info *info;
        int err;
@@ -756,10 +764,13 @@ int kvm_timer_hyp_init(void)
                return err;
        }
 
-       err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus());
-       if (err) {
-               kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
-               goto out_free_irq;
+       if (has_gic) {
+               err = irq_set_vcpu_affinity(host_vtimer_irq,
+                                           kvm_get_running_vcpus());
+               if (err) {
+                       kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
+                       goto out_free_irq;
+               }
        }
 
        kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
@@ -835,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 no_vgic:
        preempt_disable();
        timer->enabled = 1;
-       if (!irqchip_in_kernel(vcpu->kvm))
-               kvm_timer_vcpu_load_user(vcpu);
-       else
-               kvm_timer_vcpu_load_vgic(vcpu);
+       kvm_timer_vcpu_load(vcpu);
        preempt_enable();
 
        return 0;
index 6b60c98..2e43f9d 100644 (file)
@@ -1326,7 +1326,7 @@ static int init_subsystems(void)
        /*
         * Init HYP architected timer support
         */
-       err = kvm_timer_hyp_init();
+       err = kvm_timer_hyp_init(vgic_present);
        if (err)
                goto out;
 
index b6e715f..dac7ceb 100644 (file)
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
                }
 
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-                              data);
+                              &data);
                data = vcpu_data_host_to_guest(vcpu, data, len);
                vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
        }
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
                                               len);
 
-               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
                kvm_mmio_write_buf(data_buf, len, data);
 
                ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
                                       data_buf);
        } else {
                trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
-                              fault_ipa, 0);
+                              fault_ipa, NULL);
 
                ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
                                      data_buf);
index b36945d..b4b69c2 100644 (file)
@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
  */
 void free_hyp_pgds(void)
 {
-       unsigned long addr;
-
        mutex_lock(&kvm_hyp_pgd_mutex);
 
        if (boot_hyp_pgd) {
@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
 
        if (hyp_pgd) {
                unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-               for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-                       unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
-               for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-                       unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
+               unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
+                               (uintptr_t)high_memory - PAGE_OFFSET);
+               unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
+                               VMALLOC_END - VMALLOC_START);
 
                free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
                hyp_pgd = NULL;