Merge tag 'sched_urgent_for_v5.13_rc6' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 20 Jun 2021 16:44:52 +0000 (09:44 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 20 Jun 2021 16:44:52 +0000 (09:44 -0700)
Pull scheduler fix from Borislav Petkov:
 "A single fix to restore fairness between control groups with equal
  priority"

* tag 'sched_urgent_for_v5.13_rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/fair: Correctly insert cfs_rq's to list on unthrottle

244 files changed:
Documentation/riscv/vm-layout.rst
Documentation/vm/slub.rst
MAINTAINERS
Makefile
arch/arc/include/uapi/asm/sigcontext.h
arch/arc/kernel/signal.c
arch/arc/kernel/vmlinux.lds.S
arch/powerpc/include/asm/jump_label.h
arch/powerpc/kernel/signal_64.c
arch/powerpc/mm/mem.c
arch/powerpc/perf/core-book3s.c
arch/riscv/Kconfig.socs
arch/riscv/Makefile
arch/riscv/boot/dts/sifive/fu740-c000.dtsi
arch/riscv/include/asm/pgtable.h
arch/riscv/mm/kasan_init.c
arch/s390/kernel/entry.S
arch/x86/include/asm/fpu/internal.h
arch/x86/kernel/cpu/sgx/virt.c
arch/x86/kernel/fpu/signal.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
arch/x86/mm/ioremap.c
arch/x86/mm/numa.c
arch/x86/pci/fixup.c
drivers/cpufreq/Kconfig.arm
drivers/cpufreq/cppc_cpufreq.c
drivers/dma/Kconfig
drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
drivers/dma/idxd/cdev.c
drivers/dma/idxd/init.c
drivers/dma/ipu/ipu_irq.c
drivers/dma/mediatek/mtk-uart-apdma.c
drivers/dma/pl330.c
drivers/dma/qcom/Kconfig
drivers/dma/sf-pdma/Kconfig
drivers/dma/sh/rcar-dmac.c
drivers/dma/ste_dma40.c
drivers/dma/stm32-mdma.c
drivers/dma/xilinx/xilinx_dpdma.c
drivers/dma/xilinx/zynqmp_dma.c
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/irqchip/irq-gic-v3.c
drivers/net/caif/caif_serial.c
drivers/net/can/usb/mcba_usb.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/atheros/alx/main.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/ec_bhf.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/freescale/fec_ptp.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/lantiq_xrx200.c
drivers/net/ethernet/mellanox/mlx5/core/dev.c
drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/rdma.c
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
drivers/net/ethernet/mellanox/mlx5/core/transobj.c
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/hamradio/mkiss.c
drivers/net/mhi/net.c
drivers/net/phy/dp83867.c
drivers/net/usb/cdc_eem.c
drivers/net/usb/cdc_ncm.c
drivers/net/usb/qmi_wwan.c
drivers/net/usb/r8152.c
drivers/net/usb/smsc75xx.c
drivers/net/vrf.c
drivers/net/wireless/mac80211_hwsim.c
drivers/pci/controller/dwc/Makefile
drivers/pci/controller/dwc/pcie-tegra194-acpi.c [new file with mode: 0644]
drivers/pci/controller/dwc/pcie-tegra194.c
drivers/pci/controller/pci-aardvark.c
drivers/pci/of.c
drivers/pci/quirks.c
drivers/ptp/ptp_clock.c
drivers/s390/crypto/ap_queue.c
drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
drivers/usb/chipidea/usbmisc_imx.c
drivers/usb/core/hub.c
drivers/usb/dwc3/core.c
fs/afs/main.c
fs/afs/write.c
fs/btrfs/block-group.c
fs/hugetlbfs/inode.c
fs/notify/fanotify/fanotify_user.c
fs/proc/base.c
include/linux/arch_topology.h
include/linux/huge_mm.h
include/linux/hugetlb.h
include/linux/mlx5/driver.h
include/linux/mlx5/transobj.h
include/linux/mm.h
include/linux/ptp_clock_kernel.h
include/linux/rmap.h
include/linux/socket.h
include/linux/swapops.h
include/net/mac80211.h
include/net/net_namespace.h
include/net/sock.h
include/uapi/asm-generic/unistd.h
include/uapi/linux/in.h
kernel/bpf/verifier.c
kernel/crash_core.c
kernel/printk/printk_safe.c
kernel/sched/core.c
kernel/trace/trace.c
kernel/trace/trace_clock.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/memory-failure.c
mm/memory.c
mm/migrate.c
mm/page_vma_mapped.c
mm/pgtable-generic.c
mm/rmap.c
mm/slab_common.c
mm/slub.c
mm/sparse.c
mm/swapfile.c
mm/truncate.c
net/appletalk/aarp.c
net/batman-adv/bat_iv_ogm.c
net/bluetooth/smp.c
net/bridge/br_private.h
net/bridge/br_vlan_tunnel.c
net/can/bcm.c
net/can/isotp.c
net/can/j1939/transport.c
net/can/raw.c
net/core/neighbour.c
net/core/net_namespace.c
net/core/rtnetlink.c
net/core/skbuff.c
net/ethtool/eeprom.c
net/ethtool/ioctl.c
net/ethtool/strset.c
net/ipv4/af_inet.c
net/ipv4/cipso_ipv4.c
net/ipv4/devinet.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/ping.c
net/ipv4/route.c
net/ipv4/udp.c
net/ipv6/addrconf.c
net/ipv6/netfilter/nft_fib_ipv6.c
net/ipv6/udp.c
net/kcm/kcmsock.c
net/mac80211/debugfs.c
net/mac80211/ieee80211_i.h
net/mac80211/iface.c
net/mac80211/main.c
net/mac80211/mlme.c
net/mac80211/rc80211_minstrel_ht.c
net/mac80211/rx.c
net/mac80211/scan.c
net/mac80211/tx.c
net/mac80211/util.c
net/mptcp/options.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/nf_synproxy_core.c
net/netfilter/nf_tables_api.c
net/packet/af_packet.c
net/qrtr/qrtr.c
net/rds/recv.c
net/sched/act_ct.c
net/sched/sch_cake.c
net/socket.c
net/unix/af_unix.c
net/wireless/Makefile
net/wireless/core.c
net/wireless/pmsr.c
net/wireless/sysfs.c
net/wireless/util.c
scripts/recordmcount.h
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/linux/in.h
tools/lib/bpf/xsk.c
tools/perf/tests/shell/stat_bpf_counters.sh
tools/perf/trace/beauty/include/linux/socket.h
tools/perf/util/machine.c
tools/perf/util/metricgroup.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/verifier/and.c
tools/testing/selftests/bpf/verifier/bounds.c
tools/testing/selftests/bpf/verifier/dead_code.c
tools/testing/selftests/bpf/verifier/jmp32.c
tools/testing/selftests/bpf/verifier/jset.c
tools/testing/selftests/bpf/verifier/unpriv.c
tools/testing/selftests/bpf/verifier/value_ptr_arith.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/test_util.c
tools/testing/selftests/net/fib_tests.sh
tools/testing/selftests/net/icmp.sh [new file with mode: 0755]
tools/testing/selftests/net/mptcp/mptcp_connect.sh
tools/testing/selftests/net/udpgro_fwd.sh
tools/testing/selftests/net/veth.sh
tools/testing/selftests/netfilter/Makefile
tools/testing/selftests/netfilter/nft_fib.sh [new file with mode: 0755]

index 329d320..b7f9893 100644 (file)
@@ -58,6 +58,6 @@ RISC-V Linux Kernel SV39
                                                               |
   ____________________________________________________________|____________________________________________________________
                     |            |                  |         |
-   ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules
-   ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel, BPF
+   ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules, BPF
+   ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel
   __________________|____________|__________________|_________|____________________________________________________________
index 03f294a..d302855 100644 (file)
@@ -181,7 +181,7 @@ SLUB Debug output
 Here is a sample of slub debug output::
 
  ====================================================================
- BUG kmalloc-8: Redzone overwritten
+ BUG kmalloc-8: Right Redzone overwritten
  --------------------------------------------------------------------
 
  INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
@@ -189,10 +189,10 @@ Here is a sample of slub debug output::
  INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
  INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
 
- Bytes b4 0xc90f6d10:  00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
  Object 0xc90f6d20:  31 30 31 39 2e 30 30 35                         1019.005
 Redzone 0xc90f6d28:  00 cc cc cc                                     .
 Padding 0xc90f6d50:  5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
+ Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
Object   (0xc90f6d20): 31 30 31 39 2e 30 30 35                         1019.005
Redzone  (0xc90f6d28): 00 cc cc cc                                     .
Padding  (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
 
    [<c010523d>] dump_trace+0x63/0x1eb
    [<c01053df>] show_trace_log_lvl+0x1a/0x2f
index bc0ceef..8c5ee00 100644 (file)
@@ -16560,6 +16560,7 @@ F:      drivers/misc/sgi-xp/
 
 SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
 M:     Karsten Graul <kgraul@linux.ibm.com>
+M:     Guvenc Gulce <guvenc@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 S:     Supported
 W:     http://www.ibm.com/developerworks/linux/linux390/
index ed669b2..2d7a8df 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -929,11 +929,14 @@ CC_FLAGS_LTO      += -fvisibility=hidden
 # Limit inlining across translation units to reduce binary size
 KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
 
-# Check for frame size exceeding threshold during prolog/epilog insertion.
+# Check for frame size exceeding threshold during prolog/epilog insertion
+# when using lld < 13.0.0.
 ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
 KBUILD_LDFLAGS += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN)
 endif
 endif
+endif
 
 ifdef CONFIG_LTO
 KBUILD_CFLAGS  += -fno-lto $(CC_FLAGS_LTO)
index 95f8a43..7a5449d 100644 (file)
@@ -18,6 +18,7 @@
  */
 struct sigcontext {
        struct user_regs_struct regs;
+       struct user_regs_arcv2 v2abi;
 };
 
 #endif /* _ASM_ARC_SIGCONTEXT_H */
index b3ccb9e..cb2f885 100644 (file)
@@ -61,6 +61,41 @@ struct rt_sigframe {
        unsigned int sigret_magic;
 };
 
+static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+       int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+       struct user_regs_arcv2 v2abi;
+
+       v2abi.r30 = regs->r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+       v2abi.r58 = regs->r58;
+       v2abi.r59 = regs->r59;
+#else
+       v2abi.r58 = v2abi.r59 = 0;
+#endif
+       err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi));
+#endif
+       return err;
+}
+
+static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+       int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+       struct user_regs_arcv2 v2abi;
+
+       err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi));
+
+       regs->r30 = v2abi.r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+       regs->r58 = v2abi.r58;
+       regs->r59 = v2abi.r59;
+#endif
+#endif
+       return err;
+}
+
 static int
 stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
               sigset_t *set)
@@ -94,6 +129,10 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
 
        err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
                             sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+       if (is_isa_arcv2())
+               err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
        err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
 
        return err ? -EFAULT : 0;
@@ -109,6 +148,10 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
        err |= __copy_from_user(&uregs.scratch,
                                &(sf->uc.uc_mcontext.regs.scratch),
                                sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+       if (is_isa_arcv2())
+               err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
        if (err)
                return -EFAULT;
 
index 33ce59d..e2146a8 100644 (file)
@@ -57,7 +57,6 @@ SECTIONS
        .init.ramfs : { INIT_RAM_FS }
 
        . = ALIGN(PAGE_SIZE);
-       _stext = .;
 
        HEAD_TEXT_SECTION
        INIT_TEXT_SECTION(L1_CACHE_BYTES)
@@ -83,6 +82,7 @@ SECTIONS
 
        .text : {
                _text = .;
+               _stext = .;
                TEXT_TEXT
                SCHED_TEXT
                CPUIDLE_TEXT
index 2d5c6be..93ce3ec 100644 (file)
@@ -50,7 +50,7 @@ l_yes:
 1098:  nop;                                    \
        .pushsection __jump_table, "aw";        \
        .long 1098b - ., LABEL - .;             \
-       FTR_ENTRY_LONG KEY;                     \
+       FTR_ENTRY_LONG KEY - .;                 \
        .popsection
 #endif
 
index dca6648..f9e1f54 100644 (file)
@@ -902,6 +902,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
        unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
        user_write_access_end();
 
+       /* Save the siginfo outside of the unsafe block. */
+       if (copy_siginfo_to_user(&frame->info, &ksig->info))
+               goto badframe;
+
        /* Make sure signal handler doesn't get spurious FP exceptions */
        tsk->thread.fp_state.fpscr = 0;
 
@@ -915,11 +919,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
                regs->nip = (unsigned long) &frame->tramp[0];
        }
 
-
-       /* Save the siginfo outside of the unsafe block. */
-       if (copy_siginfo_to_user(&frame->info, &ksig->info))
-               goto badframe;
-
        /* Allocate a dummy caller frame for the signal handler. */
        newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
        err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
index 043bbea..a6b36a4 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/machdep.h>
 #include <asm/rtas.h>
 #include <asm/kasan.h>
+#include <asm/sparsemem.h>
 #include <asm/svm.h>
 
 #include <mm/mmu_decl.h>
index 16d4d1b..5162241 100644 (file)
@@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs)
        bool use_siar = regs_use_siar(regs);
        unsigned long siar = mfspr(SPRN_SIAR);
 
-       if (ppmu->flags & PPMU_P10_DD1) {
+       if (ppmu && (ppmu->flags & PPMU_P10_DD1)) {
                if (siar)
                        return siar;
                else
index ed96376..30676eb 100644 (file)
@@ -14,6 +14,7 @@ config SOC_SIFIVE
        select CLK_SIFIVE
        select CLK_SIFIVE_PRCI
        select SIFIVE_PLIC
+       select RISCV_ERRATA_ALTERNATIVE
        select ERRATA_SIFIVE
        help
          This enables support for SiFive SoC platform hardware.
index 4be0206..99ecd8b 100644 (file)
@@ -16,7 +16,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
        CC_FLAGS_FTRACE := -fpatchable-function-entry=8
 endif
 
-ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy)
+ifeq ($(CONFIG_CMODEL_MEDLOW),y)
 KBUILD_CFLAGS_MODULE += -mcmodel=medany
 endif
 
index 8eef82e..abbb960 100644 (file)
                        cache-size = <2097152>;
                        cache-unified;
                        interrupt-parent = <&plic0>;
-                       interrupts = <19 20 21 22>;
+                       interrupts = <19 21 22 20>;
                        reg = <0x0 0x2010000 0x0 0x1000>;
                };
                gpio: gpio@10060000 {
index 9469f46..380cd3a 100644 (file)
@@ -30,9 +30,8 @@
 
 #define BPF_JIT_REGION_SIZE    (SZ_128M)
 #ifdef CONFIG_64BIT
-/* KASLR should leave at least 128MB for BPF after the kernel */
-#define BPF_JIT_REGION_START   PFN_ALIGN((unsigned long)&_end)
-#define BPF_JIT_REGION_END     (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_START   (BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_END     (MODULES_END)
 #else
 #define BPF_JIT_REGION_START   (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
 #define BPF_JIT_REGION_END     (VMALLOC_END)
index 9daacae..d7189c8 100644 (file)
@@ -169,7 +169,7 @@ static void __init kasan_shallow_populate(void *start, void *end)
 
 void __init kasan_init(void)
 {
-       phys_addr_t _start, _end;
+       phys_addr_t p_start, p_end;
        u64 i;
 
        /*
@@ -189,9 +189,9 @@ void __init kasan_init(void)
                        (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
 
        /* Populate the linear mapping */
-       for_each_mem_range(i, &_start, &_end) {
-               void *start = (void *)__va(_start);
-               void *end = (void *)__va(_end);
+       for_each_mem_range(i, &p_start, &p_end) {
+               void *start = (void *)__va(p_start);
+               void *end = (void *)__va(p_end);
 
                if (start >= end)
                        break;
@@ -201,7 +201,7 @@ void __init kasan_init(void)
 
        /* Populate kernel, BPF, modules mapping */
        kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR),
-                      kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END));
+                      kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G));
 
        for (i = 0; i < PTRS_PER_PTE; i++)
                set_pte(&kasan_early_shadow_pte[i],
index 12de7a9..9cc71ca 100644 (file)
@@ -651,9 +651,9 @@ ENDPROC(stack_overflow)
 .Lcleanup_sie_mcck:
        larl    %r13,.Lsie_entry
        slgr    %r9,%r13
-       larl    %r13,.Lsie_skip
+       lghi    %r13,.Lsie_skip - .Lsie_entry
        clgr    %r9,%r13
-       jh      .Lcleanup_sie_int
+       jhe     .Lcleanup_sie_int
        oi      __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
 .Lcleanup_sie_int:
        BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
index ceeba9f..fdee23e 100644 (file)
@@ -578,10 +578,17 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
         * PKRU state is switched eagerly because it needs to be valid before we
         * return to userland e.g. for a copy_to_user() operation.
         */
-       if (current->mm) {
+       if (!(current->flags & PF_KTHREAD)) {
+               /*
+                * If the PKRU bit in xsave.header.xfeatures is not set,
+                * then the PKRU component was in init state, which means
+                * XRSTOR will set PKRU to 0. If the bit is not set then
+                * get_xsave_addr() will return NULL because the PKRU value
+                * in memory is not valid. This means pkru_val has to be
+                * set to 0 and not to init_pkru_value.
+                */
                pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
-               if (pk)
-                       pkru_val = pk->pkru;
+               pkru_val = pk ? pk->pkru : 0;
        }
        __write_pkru(pkru_val);
 }
index 6ad165a..64511c4 100644 (file)
@@ -212,6 +212,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
                list_splice_tail(&secs_pages, &zombie_secs_pages);
        mutex_unlock(&zombie_secs_pages_lock);
 
+       xa_destroy(&vepc->page_array);
        kfree(vepc);
 
        return 0;
index a4ec653..ec3ae30 100644 (file)
@@ -307,13 +307,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                return 0;
        }
 
-       if (!access_ok(buf, size))
-               return -EACCES;
+       if (!access_ok(buf, size)) {
+               ret = -EACCES;
+               goto out;
+       }
 
-       if (!static_cpu_has(X86_FEATURE_FPU))
-               return fpregs_soft_set(current, NULL,
-                                      0, sizeof(struct user_i387_ia32_struct),
-                                      NULL, buf) != 0;
+       if (!static_cpu_has(X86_FEATURE_FPU)) {
+               ret = fpregs_soft_set(current, NULL, 0,
+                                     sizeof(struct user_i387_ia32_struct),
+                                     NULL, buf);
+               goto out;
+       }
 
        if (use_xsave()) {
                struct _fpx_sw_bytes fx_sw_user;
@@ -369,6 +373,25 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                        fpregs_unlock();
                        return 0;
                }
+
+               /*
+                * The above did an FPU restore operation, restricted to
+                * the user portion of the registers, and failed, but the
+                * microcode might have modified the FPU registers
+                * nevertheless.
+                *
+                * If the FPU registers do not belong to current, then
+                * invalidate the FPU register state otherwise the task might
+                * preempt current and return to user space with corrupted
+                * FPU registers.
+                *
+                * In case current owns the FPU registers then no further
+                * action is required. The fixup below will handle it
+                * correctly.
+                */
+               if (test_thread_flag(TIF_NEED_FPU_LOAD))
+                       __cpu_invalidate_fpregs_state();
+
                fpregs_unlock();
        } else {
                /*
@@ -377,7 +400,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                 */
                ret = __copy_from_user(&env, buf, sizeof(env));
                if (ret)
-                       goto err_out;
+                       goto out;
                envp = &env;
        }
 
@@ -405,16 +428,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
        if (use_xsave() && !fx_only) {
                u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
 
-               if (using_compacted_format()) {
-                       ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
-               } else {
-                       ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
-
-                       if (!ret && state_size > offsetof(struct xregs_state, header))
-                               ret = validate_user_xstate_header(&fpu->state.xsave.header);
-               }
+               ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
                if (ret)
-                       goto err_out;
+                       goto out;
 
                sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
                                              fx_only);
@@ -434,7 +450,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
                if (ret) {
                        ret = -EFAULT;
-                       goto err_out;
+                       goto out;
                }
 
                sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
@@ -452,7 +468,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
        } else {
                ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size);
                if (ret)
-                       goto err_out;
+                       goto out;
 
                fpregs_lock();
                ret = copy_kernel_to_fregs_err(&fpu->state.fsave);
@@ -463,7 +479,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                fpregs_deactivate(fpu);
        fpregs_unlock();
 
-err_out:
+out:
        if (ret)
                fpu__clear_user_states(fpu);
        return ret;
index 9a48f13..b4da665 100644 (file)
@@ -655,6 +655,7 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
                if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
                        entry->ecx = F(RDPID);
                ++array->nent;
+               break;
        default:
                break;
        }
index 6d72d8f..17fa4ab 100644 (file)
@@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
        if (!apic_x2apic_mode(apic))
                valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
 
+       if (alignment + len > 4)
+               return 1;
+
        if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
                return 1;
 
index 0144c40..8d5876d 100644 (file)
@@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
        context->inject_page_fault = kvm_inject_page_fault;
 }
 
+static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
+{
+       union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false);
+
+       /*
+        * Nested MMUs are used only for walking L2's gva->gpa, they never have
+        * shadow pages of their own and so "direct" has no meaning.   Set it
+        * to "true" to try to detect bogus usage of the nested MMU.
+        */
+       role.base.direct = true;
+
+       if (!is_paging(vcpu))
+               role.base.level = 0;
+       else if (is_long_mode(vcpu))
+               role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL :
+                                                      PT64_ROOT_4LEVEL;
+       else if (is_pae(vcpu))
+               role.base.level = PT32E_ROOT_LEVEL;
+       else
+               role.base.level = PT32_ROOT_LEVEL;
+
+       return role;
+}
+
 static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 {
-       union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
+       union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu);
        struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
 
        if (new_role.as_u64 == g_context->mmu_role.as_u64)
index 0e62e6a..5e7e920 100644 (file)
@@ -221,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
        return &avic_physical_id_table[index];
 }
 
-/**
+/*
  * Note:
  * AVIC hardware walks the nested page table to check permissions,
  * but does not use the SPA address specified in the leaf page
@@ -764,7 +764,7 @@ out:
        return ret;
 }
 
-/**
+/*
  * Note:
  * The HW cannot support posting multicast/broadcast
  * interrupts to a vCPU. So, we still use legacy interrupt
@@ -1005,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 }
 
-/**
+/*
  * This function is called during VCPU halt/unhalt.
  */
 static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
index e0ce5da..8d36f0c 100644 (file)
@@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev)
        sev->misc_cg = NULL;
 }
 
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
 {
        struct sev_data_decommission decommission;
+
+       if (!handle)
+               return;
+
+       decommission.handle = handle;
+       sev_guest_decommission(&decommission, NULL);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
        struct sev_data_deactivate deactivate;
 
        if (!handle)
@@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
        sev_guest_deactivate(&deactivate, NULL);
        up_read(&sev_deactivate_lock);
 
-       /* decommission handle */
-       decommission.handle = handle;
-       sev_guest_decommission(&decommission, NULL);
+       sev_decommission(handle);
 }
 
 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
 
        /* Bind ASID to this guest */
        ret = sev_bind_asid(kvm, start.handle, error);
-       if (ret)
+       if (ret) {
+               sev_decommission(start.handle);
                goto e_free_session;
+       }
 
        /* return handle to userspace */
        params.handle = start.handle;
index 50b42d7..c2a779b 100644 (file)
@@ -6247,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
        switch (kvm_get_apic_mode(vcpu)) {
        case LAPIC_MODE_INVALID:
                WARN_ONCE(true, "Invalid local APIC state");
+               break;
        case LAPIC_MODE_DISABLED:
                break;
        case LAPIC_MODE_XAPIC:
index 6d3955a..e0f4a46 100644 (file)
@@ -7106,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
 
 static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
 {
-       emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+       vcpu->arch.hflags = emul_flags;
+       kvm_mmu_reset_context(vcpu);
 }
 
 static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
@@ -8258,6 +8261,7 @@ void kvm_arch_exit(void)
        kvm_x86_ops.hardware_enable = NULL;
        kvm_mmu_module_exit();
        free_percpu(user_return_msrs);
+       kmem_cache_destroy(x86_emulator_cache);
        kmem_cache_destroy(x86_fpu_cache);
 #ifdef CONFIG_KVM_XEN
        static_key_deferred_flush(&kvm_xen_enabled);
index 12c686c..60ade7d 100644 (file)
@@ -118,7 +118,9 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des
        if (!IS_ENABLED(CONFIG_EFI))
                return;
 
-       if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
+       if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
+           (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
+            efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
                desc->flags |= IORES_MAP_ENCRYPTED;
 }
 
index 5eb4dc2..e94da74 100644 (file)
@@ -254,7 +254,13 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 
                /* make sure all non-reserved blocks are inside the limits */
                bi->start = max(bi->start, low);
-               bi->end = min(bi->end, high);
+
+               /* preserve info for non-RAM areas above 'max_pfn': */
+               if (bi->end > high) {
+                       numa_add_memblk_to(bi->nid, high, bi->end,
+                                          &numa_reserved_meminfo);
+                       bi->end = high;
+               }
 
                /* and there's no empty block */
                if (bi->start >= bi->end)
index 02dc646..2edd866 100644 (file)
@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
 
+#define RS690_LOWER_TOP_OF_DRAM2       0x30
+#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1
+#define RS690_UPPER_TOP_OF_DRAM2       0x31
+#define RS690_HTIU_NB_INDEX            0xA8
+#define RS690_HTIU_NB_INDEX_WR_ENABLE  0x100
+#define RS690_HTIU_NB_DATA             0xAC
+
+/*
+ * Some BIOS implementations support RAM above 4GB, but do not configure the
+ * PCI host to respond to bus master accesses for these addresses. These
+ * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA
+ * works as expected for addresses below 4GB.
+ *
+ * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57)
+ * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf
+ */
+static void rs690_fix_64bit_dma(struct pci_dev *pdev)
+{
+       u32 val = 0;
+       phys_addr_t top_of_dram = __pa(high_memory - 1) + 1;
+
+       if (top_of_dram <= (1ULL << 32))
+               return;
+
+       pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+                               RS690_LOWER_TOP_OF_DRAM2);
+       pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val);
+
+       if (val)
+               return;
+
+       pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram);
+
+       pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+               RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+       pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32);
+
+       pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+               RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+       pci_write_config_dword(pdev, RS690_HTIU_NB_DATA,
+               top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+
 #endif
index a5c5f70..e65e0a4 100644 (file)
@@ -19,16 +19,6 @@ config ACPI_CPPC_CPUFREQ
 
          If in doubt, say N.
 
-config ACPI_CPPC_CPUFREQ_FIE
-       bool "Frequency Invariance support for CPPC cpufreq driver"
-       depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY
-       default y
-       help
-         This extends frequency invariance support in the CPPC cpufreq driver,
-         by using CPPC delivered and reference performance counters.
-
-         If in doubt, say N.
-
 config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
        tristate "Allwinner nvmem based SUN50I CPUFreq driver"
        depends on ARCH_SUNXI
index 3848b4c..2f769b1 100644 (file)
 
 #define pr_fmt(fmt)    "CPPC Cpufreq:" fmt
 
-#include <linux/arch_topology.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/dmi.h>
-#include <linux/irq_work.h>
-#include <linux/kthread.h>
 #include <linux/time.h>
 #include <linux/vmalloc.h>
-#include <uapi/linux/sched/types.h>
 
 #include <asm/unaligned.h>
 
@@ -61,204 +57,6 @@ static struct cppc_workaround_oem_info wa_info[] = {
        }
 };
 
-#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
-
-/* Frequency invariance support */
-struct cppc_freq_invariance {
-       int cpu;
-       struct irq_work irq_work;
-       struct kthread_work work;
-       struct cppc_perf_fb_ctrs prev_perf_fb_ctrs;
-       struct cppc_cpudata *cpu_data;
-};
-
-static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
-static struct kthread_worker *kworker_fie;
-static bool fie_disabled;
-
-static struct cpufreq_driver cppc_cpufreq_driver;
-static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
-static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
-                                struct cppc_perf_fb_ctrs fb_ctrs_t0,
-                                struct cppc_perf_fb_ctrs fb_ctrs_t1);
-
-/**
- * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
- * @work: The work item.
- *
- * The CPPC driver register itself with the topology core to provide its own
- * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which
- * gets called by the scheduler on every tick.
- *
- * Note that the arch specific counters have higher priority than CPPC counters,
- * if available, though the CPPC driver doesn't need to have any special
- * handling for that.
- *
- * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we
- * reach here from hard-irq context), which then schedules a normal work item
- * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable
- * based on the counter updates since the last tick.
- */
-static void cppc_scale_freq_workfn(struct kthread_work *work)
-{
-       struct cppc_freq_invariance *cppc_fi;
-       struct cppc_perf_fb_ctrs fb_ctrs = {0};
-       struct cppc_cpudata *cpu_data;
-       unsigned long local_freq_scale;
-       u64 perf;
-
-       cppc_fi = container_of(work, struct cppc_freq_invariance, work);
-       cpu_data = cppc_fi->cpu_data;
-
-       if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) {
-               pr_warn("%s: failed to read perf counters\n", __func__);
-               return;
-       }
-
-       cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
-       perf = cppc_perf_from_fbctrs(cpu_data, cppc_fi->prev_perf_fb_ctrs,
-                                    fb_ctrs);
-
-       perf <<= SCHED_CAPACITY_SHIFT;
-       local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf);
-       if (WARN_ON(local_freq_scale > 1024))
-               local_freq_scale = 1024;
-
-       per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale;
-}
-
-static void cppc_irq_work(struct irq_work *irq_work)
-{
-       struct cppc_freq_invariance *cppc_fi;
-
-       cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work);
-       kthread_queue_work(kworker_fie, &cppc_fi->work);
-}
-
-static void cppc_scale_freq_tick(void)
-{
-       struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id());
-
-       /*
-        * cppc_get_perf_ctrs() can potentially sleep, call that from the right
-        * context.
-        */
-       irq_work_queue(&cppc_fi->irq_work);
-}
-
-static struct scale_freq_data cppc_sftd = {
-       .source = SCALE_FREQ_SOURCE_CPPC,
-       .set_freq_scale = cppc_scale_freq_tick,
-};
-
-static void cppc_freq_invariance_policy_init(struct cpufreq_policy *policy,
-                                            struct cppc_cpudata *cpu_data)
-{
-       struct cppc_perf_fb_ctrs fb_ctrs = {0};
-       struct cppc_freq_invariance *cppc_fi;
-       int i, ret;
-
-       if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
-               return;
-
-       if (fie_disabled)
-               return;
-
-       for_each_cpu(i, policy->cpus) {
-               cppc_fi = &per_cpu(cppc_freq_inv, i);
-               cppc_fi->cpu = i;
-               cppc_fi->cpu_data = cpu_data;
-               kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn);
-               init_irq_work(&cppc_fi->irq_work, cppc_irq_work);
-
-               ret = cppc_get_perf_ctrs(i, &fb_ctrs);
-               if (ret) {
-                       pr_warn("%s: failed to read perf counters: %d\n",
-                               __func__, ret);
-                       fie_disabled = true;
-               } else {
-                       cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
-               }
-       }
-}
-
-static void __init cppc_freq_invariance_init(void)
-{
-       struct sched_attr attr = {
-               .size           = sizeof(struct sched_attr),
-               .sched_policy   = SCHED_DEADLINE,
-               .sched_nice     = 0,
-               .sched_priority = 0,
-               /*
-                * Fake (unused) bandwidth; workaround to "fix"
-                * priority inheritance.
-                */
-               .sched_runtime  = 1000000,
-               .sched_deadline = 10000000,
-               .sched_period   = 10000000,
-       };
-       int ret;
-
-       if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
-               return;
-
-       if (fie_disabled)
-               return;
-
-       kworker_fie = kthread_create_worker(0, "cppc_fie");
-       if (IS_ERR(kworker_fie))
-               return;
-
-       ret = sched_setattr_nocheck(kworker_fie->task, &attr);
-       if (ret) {
-               pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__,
-                       ret);
-               kthread_destroy_worker(kworker_fie);
-               return;
-       }
-
-       /* Register for freq-invariance */
-       topology_set_scale_freq_source(&cppc_sftd, cpu_present_mask);
-}
-
-static void cppc_freq_invariance_exit(void)
-{
-       struct cppc_freq_invariance *cppc_fi;
-       int i;
-
-       if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
-               return;
-
-       if (fie_disabled)
-               return;
-
-       topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpu_present_mask);
-
-       for_each_possible_cpu(i) {
-               cppc_fi = &per_cpu(cppc_freq_inv, i);
-               irq_work_sync(&cppc_fi->irq_work);
-       }
-
-       kthread_destroy_worker(kworker_fie);
-       kworker_fie = NULL;
-}
-
-#else
-static inline void
-cppc_freq_invariance_policy_init(struct cpufreq_policy *policy,
-                                struct cppc_cpudata *cpu_data)
-{
-}
-
-static inline void cppc_freq_invariance_init(void)
-{
-}
-
-static inline void cppc_freq_invariance_exit(void)
-{
-}
-#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */
-
 /* Callback function used to retrieve the max frequency from DMI */
 static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
 {
@@ -547,12 +345,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
        cpu_data->perf_ctrls.desired_perf =  caps->highest_perf;
 
        ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
-       if (ret) {
+       if (ret)
                pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
                         caps->highest_perf, cpu, ret);
-       } else {
-               cppc_freq_invariance_policy_init(policy, cpu_data);
-       }
 
        return ret;
 }
@@ -565,12 +360,12 @@ static inline u64 get_delta(u64 t1, u64 t0)
        return (u32)t1 - (u32)t0;
 }
 
-static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
-                                struct cppc_perf_fb_ctrs fb_ctrs_t0,
-                                struct cppc_perf_fb_ctrs fb_ctrs_t1)
+static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
+                                    struct cppc_perf_fb_ctrs fb_ctrs_t0,
+                                    struct cppc_perf_fb_ctrs fb_ctrs_t1)
 {
        u64 delta_reference, delta_delivered;
-       u64 reference_perf;
+       u64 reference_perf, delivered_perf;
 
        reference_perf = fb_ctrs_t0.reference_perf;
 
@@ -579,21 +374,12 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
        delta_delivered = get_delta(fb_ctrs_t1.delivered,
                                    fb_ctrs_t0.delivered);
 
-       /* Check to avoid divide-by zero and invalid delivered_perf */
-       if (!delta_reference || !delta_delivered)
-               return cpu_data->perf_ctrls.desired_perf;
-
-       return (reference_perf * delta_delivered) / delta_reference;
-}
-
-static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
-                                    struct cppc_perf_fb_ctrs fb_ctrs_t0,
-                                    struct cppc_perf_fb_ctrs fb_ctrs_t1)
-{
-       u64 delivered_perf;
-
-       delivered_perf = cppc_perf_from_fbctrs(cpu_data, fb_ctrs_t0,
-                                              fb_ctrs_t1);
+       /* Check to avoid divide-by zero */
+       if (delta_reference || delta_delivered)
+               delivered_perf = (reference_perf * delta_delivered) /
+                                       delta_reference;
+       else
+               delivered_perf = cpu_data->perf_ctrls.desired_perf;
 
        return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
 }
@@ -718,8 +504,6 @@ static void cppc_check_hisi_workaround(void)
 
 static int __init cppc_cpufreq_init(void)
 {
-       int ret;
-
        if ((acpi_disabled) || !acpi_cpc_valid())
                return -ENODEV;
 
@@ -727,11 +511,7 @@ static int __init cppc_cpufreq_init(void)
 
        cppc_check_hisi_workaround();
 
-       ret = cpufreq_register_driver(&cppc_cpufreq_driver);
-       if (!ret)
-               cppc_freq_invariance_init();
-
-       return ret;
+       return cpufreq_register_driver(&cppc_cpufreq_driver);
 }
 
 static inline void free_cpu_data(void)
@@ -748,7 +528,6 @@ static inline void free_cpu_data(void)
 
 static void __exit cppc_cpufreq_exit(void)
 {
-       cppc_freq_invariance_exit();
        cpufreq_unregister_driver(&cppc_cpufreq_driver);
 
        free_cpu_data();
index 6ab9d9a..39b5b46 100644 (file)
@@ -59,6 +59,7 @@ config DMA_OF
 #devices
 config ALTERA_MSGDMA
        tristate "Altera / Intel mSGDMA Engine"
+       depends on HAS_IOMEM
        select DMA_ENGINE
        help
          Enable support for Altera / Intel mSGDMA controller.
@@ -701,6 +702,7 @@ config XILINX_ZYNQMP_DMA
 
 config XILINX_ZYNQMP_DPDMA
        tristate "Xilinx DPDMA Engine"
+       depends on HAS_IOMEM && OF
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
index 4ec909e..4ae0579 100644 (file)
@@ -332,6 +332,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
        }
 
        if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) {
+               err = -EINVAL;
                dev_err(dev, "DPDMAI major version mismatch\n"
                             "Found %u.%u, supported version is %u.%u\n",
                                priv->dpdmai_attr.version.major,
@@ -341,6 +342,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
        }
 
        if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) {
+               err = -EINVAL;
                dev_err(dev, "DPDMAI minor version mismatch\n"
                             "Found %u.%u, supported version is %u.%u\n",
                                priv->dpdmai_attr.version.major,
@@ -475,6 +477,7 @@ static int __cold dpaa2_qdma_dpio_setup(struct dpaa2_qdma_priv *priv)
                ppriv->store =
                        dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev);
                if (!ppriv->store) {
+                       err = -ENOMEM;
                        dev_err(dev, "dpaa2_io_store_create() failed\n");
                        goto err_store;
                }
index 302cba5..d4419bf 100644 (file)
@@ -110,6 +110,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
                pasid = iommu_sva_get_pasid(sva);
                if (pasid == IOMMU_PASID_INVALID) {
                        iommu_sva_unbind_device(sva);
+                       rc = -EINVAL;
                        goto failed;
                }
 
index 776fd44..442d55c 100644 (file)
@@ -168,6 +168,32 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
        return rc;
 }
 
+static void idxd_cleanup_interrupts(struct idxd_device *idxd)
+{
+       struct pci_dev *pdev = idxd->pdev;
+       struct idxd_irq_entry *irq_entry;
+       int i, msixcnt;
+
+       msixcnt = pci_msix_vec_count(pdev);
+       if (msixcnt <= 0)
+               return;
+
+       irq_entry = &idxd->irq_entries[0];
+       free_irq(irq_entry->vector, irq_entry);
+
+       for (i = 1; i < msixcnt; i++) {
+
+               irq_entry = &idxd->irq_entries[i];
+               if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))
+                       idxd_device_release_int_handle(idxd, idxd->int_handles[i],
+                                                      IDXD_IRQ_MSIX);
+               free_irq(irq_entry->vector, irq_entry);
+       }
+
+       idxd_mask_error_interrupts(idxd);
+       pci_free_irq_vectors(pdev);
+}
+
 static int idxd_setup_wqs(struct idxd_device *idxd)
 {
        struct device *dev = &idxd->pdev->dev;
@@ -242,6 +268,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
                engine->idxd = idxd;
                device_initialize(&engine->conf_dev);
                engine->conf_dev.parent = &idxd->conf_dev;
+               engine->conf_dev.bus = &dsa_bus_type;
                engine->conf_dev.type = &idxd_engine_device_type;
                rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
                if (rc < 0) {
@@ -303,6 +330,19 @@ static int idxd_setup_groups(struct idxd_device *idxd)
        return rc;
 }
 
+static void idxd_cleanup_internals(struct idxd_device *idxd)
+{
+       int i;
+
+       for (i = 0; i < idxd->max_groups; i++)
+               put_device(&idxd->groups[i]->conf_dev);
+       for (i = 0; i < idxd->max_engines; i++)
+               put_device(&idxd->engines[i]->conf_dev);
+       for (i = 0; i < idxd->max_wqs; i++)
+               put_device(&idxd->wqs[i]->conf_dev);
+       destroy_workqueue(idxd->wq);
+}
+
 static int idxd_setup_internals(struct idxd_device *idxd)
 {
        struct device *dev = &idxd->pdev->dev;
@@ -531,12 +571,12 @@ static int idxd_probe(struct idxd_device *idxd)
                dev_dbg(dev, "Loading RO device config\n");
                rc = idxd_device_load_config(idxd);
                if (rc < 0)
-                       goto err;
+                       goto err_config;
        }
 
        rc = idxd_setup_interrupts(idxd);
        if (rc)
-               goto err;
+               goto err_config;
 
        dev_dbg(dev, "IDXD interrupt setup complete.\n");
 
@@ -549,6 +589,8 @@ static int idxd_probe(struct idxd_device *idxd)
        dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
        return 0;
 
+ err_config:
+       idxd_cleanup_internals(idxd);
  err:
        if (device_pasid_enabled(idxd))
                idxd_disable_system_pasid(idxd);
@@ -556,6 +598,18 @@ static int idxd_probe(struct idxd_device *idxd)
        return rc;
 }
 
+static void idxd_cleanup(struct idxd_device *idxd)
+{
+       struct device *dev = &idxd->pdev->dev;
+
+       perfmon_pmu_remove(idxd);
+       idxd_cleanup_interrupts(idxd);
+       idxd_cleanup_internals(idxd);
+       if (device_pasid_enabled(idxd))
+               idxd_disable_system_pasid(idxd);
+       iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+}
+
 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        struct device *dev = &pdev->dev;
@@ -608,7 +662,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        rc = idxd_register_devices(idxd);
        if (rc) {
                dev_err(dev, "IDXD sysfs setup failed\n");
-               goto err;
+               goto err_dev_register;
        }
 
        idxd->state = IDXD_DEV_CONF_READY;
@@ -618,6 +672,8 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        return 0;
 
+ err_dev_register:
+       idxd_cleanup(idxd);
  err:
        pci_iounmap(pdev, idxd->reg_base);
  err_iomap:
@@ -787,6 +843,7 @@ module_init(idxd_init_module);
 
 static void __exit idxd_exit_module(void)
 {
+       idxd_unregister_driver();
        pci_unregister_driver(&idxd_pci_driver);
        idxd_cdev_remove();
        idxd_unregister_bus_type();
index 0d5c42f..97d9a6f 100644 (file)
@@ -230,7 +230,7 @@ out:
 }
 
 /**
- * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * ipu_irq_unmap() - unmap an IPU interrupt source
  * @source:    interrupt source bit position (see ipu_irq_map())
  * @return:    0 or negative error code
  */
index 27c0735..375e7e6 100644 (file)
@@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg)
 
 static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd)
 {
-       struct dma_chan *chan = vd->tx.chan;
-       struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
-
-       kfree(c->desc);
+       kfree(container_of(vd, struct mtk_uart_apdma_desc, vd));
 }
 
 static void mtk_uart_apdma_start_tx(struct mtk_chan *c)
@@ -207,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c)
 
 static void mtk_uart_apdma_tx_handler(struct mtk_chan *c)
 {
-       struct mtk_uart_apdma_desc *d = c->desc;
-
        mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);
        mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
        mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
-
-       list_del(&d->vd.node);
-       vchan_cookie_complete(&d->vd);
 }
 
 static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
@@ -245,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
 
        c->rx_status = d->avail_len - cnt;
        mtk_uart_apdma_write(c, VFF_RPT, wg);
+}
 
-       list_del(&d->vd.node);
-       vchan_cookie_complete(&d->vd);
+static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c)
+{
+       struct mtk_uart_apdma_desc *d = c->desc;
+
+       if (d) {
+               list_del(&d->vd.node);
+               vchan_cookie_complete(&d->vd);
+               c->desc = NULL;
+       }
 }
 
 static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
@@ -261,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
                mtk_uart_apdma_rx_handler(c);
        else if (c->dir == DMA_MEM_TO_DEV)
                mtk_uart_apdma_tx_handler(c);
+       mtk_uart_apdma_chan_complete_handler(c);
        spin_unlock_irqrestore(&c->vc.lock, flags);
 
        return IRQ_HANDLED;
@@ -348,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg
                return NULL;
 
        /* Now allocate and setup the descriptor */
-       d = kzalloc(sizeof(*d), GFP_ATOMIC);
+       d = kzalloc(sizeof(*d), GFP_NOWAIT);
        if (!d)
                return NULL;
 
@@ -366,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan)
        unsigned long flags;
 
        spin_lock_irqsave(&c->vc.lock, flags);
-       if (vchan_issue_pending(&c->vc)) {
+       if (vchan_issue_pending(&c->vc) && !c->desc) {
                vd = vchan_next_desc(&c->vc);
                c->desc = to_mtk_uart_apdma_desc(&vd->tx);
 
index fd8d2bc..110de8a 100644 (file)
@@ -2694,13 +2694,15 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
        for (i = 0; i < len / period_len; i++) {
                desc = pl330_get_desc(pch);
                if (!desc) {
+                       unsigned long iflags;
+
                        dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
                                __func__, __LINE__);
 
                        if (!first)
                                return NULL;
 
-                       spin_lock_irqsave(&pl330->pool_lock, flags);
+                       spin_lock_irqsave(&pl330->pool_lock, iflags);
 
                        while (!list_empty(&first->node)) {
                                desc = list_entry(first->node.next,
@@ -2710,7 +2712,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 
                        list_move_tail(&first->node, &pl330->desc_pool);
 
-                       spin_unlock_irqrestore(&pl330->pool_lock, flags);
+                       spin_unlock_irqrestore(&pl330->pool_lock, iflags);
 
                        return NULL;
                }
index 365f94e..3f926a6 100644 (file)
@@ -33,6 +33,7 @@ config QCOM_GPI_DMA
 
 config QCOM_HIDMA_MGMT
        tristate "Qualcomm Technologies HIDMA Management support"
+       depends on HAS_IOMEM
        select DMA_ENGINE
        help
          Enable support for the Qualcomm Technologies HIDMA Management.
index f8ffa02..ba46a0a 100644 (file)
@@ -1,5 +1,6 @@
 config SF_PDMA
        tristate "Sifive PDMA controller driver"
+       depends on HAS_IOMEM
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
index d530c1b..6885b3d 100644 (file)
@@ -1913,7 +1913,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 
        /* Enable runtime PM and initialize the device. */
        pm_runtime_enable(&pdev->dev);
-       ret = pm_runtime_get_sync(&pdev->dev);
+       ret = pm_runtime_resume_and_get(&pdev->dev);
        if (ret < 0) {
                dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
                return ret;
index 265d7c0..e182739 100644 (file)
@@ -3675,6 +3675,9 @@ static int __init d40_probe(struct platform_device *pdev)
 
        kfree(base->lcla_pool.base_unaligned);
 
+       if (base->lcpa_base)
+               iounmap(base->lcpa_base);
+
        if (base->phy_lcpa)
                release_mem_region(base->phy_lcpa,
                                   base->lcpa_size);
index 36ba8b4..18cbd1e 100644 (file)
@@ -1452,7 +1452,7 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c)
                return -ENOMEM;
        }
 
-       ret = pm_runtime_get_sync(dmadev->ddev.dev);
+       ret = pm_runtime_resume_and_get(dmadev->ddev.dev);
        if (ret < 0)
                return ret;
 
@@ -1718,7 +1718,7 @@ static int stm32_mdma_pm_suspend(struct device *dev)
        u32 ccr, id;
        int ret;
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret < 0)
                return ret;
 
index 70b29bd..6c70980 100644 (file)
 #define XILINX_DPDMA_CH_VDO                            0x020
 #define XILINX_DPDMA_CH_PYLD_SZ                                0x024
 #define XILINX_DPDMA_CH_DESC_ID                                0x028
+#define XILINX_DPDMA_CH_DESC_ID_MASK                   GENMASK(15, 0)
 
 /* DPDMA descriptor fields */
 #define XILINX_DPDMA_DESC_CONTROL_PREEMBLE             0xa5
@@ -866,7 +867,8 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan)
         * will be used, but it should be enough.
         */
        list_for_each_entry(sw_desc, &desc->descriptors, node)
-               sw_desc->hw.desc_id = desc->vdesc.tx.cookie;
+               sw_desc->hw.desc_id = desc->vdesc.tx.cookie
+                                   & XILINX_DPDMA_CH_DESC_ID_MASK;
 
        sw_desc = list_first_entry(&desc->descriptors,
                                   struct xilinx_dpdma_sw_desc, node);
@@ -1086,7 +1088,8 @@ static void xilinx_dpdma_chan_vsync_irq(struct  xilinx_dpdma_chan *chan)
        if (!chan->running || !pending)
                goto out;
 
-       desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID);
+       desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID)
+               & XILINX_DPDMA_CH_DESC_ID_MASK;
 
        /* If the retrigger raced with vsync, retry at the next frame. */
        sw_desc = list_first_entry(&pending->descriptors,
@@ -1459,7 +1462,7 @@ static void xilinx_dpdma_enable_irq(struct xilinx_dpdma_device *xdev)
  */
 static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev)
 {
-       dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ERR_ALL);
+       dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL);
        dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL);
 }
 
@@ -1596,6 +1599,26 @@ static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
        return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan);
 }
 
+static void dpdma_hw_init(struct xilinx_dpdma_device *xdev)
+{
+       unsigned int i;
+       void __iomem *reg;
+
+       /* Disable all interrupts */
+       xilinx_dpdma_disable_irq(xdev);
+
+       /* Stop all channels */
+       for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) {
+               reg = xdev->reg + XILINX_DPDMA_CH_BASE
+                               + XILINX_DPDMA_CH_OFFSET * i;
+               dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE);
+       }
+
+       /* Clear the interrupt status registers */
+       dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL);
+       dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL);
+}
+
 static int xilinx_dpdma_probe(struct platform_device *pdev)
 {
        struct xilinx_dpdma_device *xdev;
@@ -1622,6 +1645,8 @@ static int xilinx_dpdma_probe(struct platform_device *pdev)
        if (IS_ERR(xdev->reg))
                return PTR_ERR(xdev->reg);
 
+       dpdma_hw_init(xdev);
+
        xdev->irq = platform_get_irq(pdev, 0);
        if (xdev->irq < 0) {
                dev_err(xdev->dev, "failed to get platform irq\n");
index d841956..5fecf5a 100644 (file)
@@ -468,7 +468,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan)
        struct zynqmp_dma_desc_sw *desc;
        int i, ret;
 
-       ret = pm_runtime_get_sync(chan->dev);
+       ret = pm_runtime_resume_and_get(chan->dev);
        if (ret < 0)
                return ret;
 
index 0597aeb..327b1f8 100644 (file)
@@ -6871,8 +6871,12 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
        if (ring->use_doorbell) {
                WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
                        (adev->doorbell_index.kiq * 2) << 2);
+               /* If GC has entered CGPG, ringing doorbell > first page doesn't
+                * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
+                * this issue.
+                */
                WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
-                       (adev->doorbell_index.userqueue_end * 2) << 2);
+                       (adev->doorbell.size - 4));
        }
 
        WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
index 516467e..c09225d 100644 (file)
@@ -3673,8 +3673,12 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
        if (ring->use_doorbell) {
                WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
                                        (adev->doorbell_index.kiq * 2) << 2);
+               /* If GC has entered CGPG, ringing doorbell > first page doesn't
+                * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
+                * this issue.
+                */
                WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
-                                       (adev->doorbell_index.userqueue_end * 2) << 2);
+                                       (adev->doorbell.size - 4));
        }
 
        WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
index 37a23aa..66d623f 100644 (file)
@@ -642,11 +642,45 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
                nmi_exit();
 }
 
+static u32 do_read_iar(struct pt_regs *regs)
+{
+       u32 iar;
+
+       if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) {
+               u64 pmr;
+
+               /*
+                * We were in a context with IRQs disabled. However, the
+                * entry code has set PMR to a value that allows any
+                * interrupt to be acknowledged, and not just NMIs. This can
+                * lead to surprising effects if the NMI has been retired in
+                * the meantime, and that there is an IRQ pending. The IRQ
+                * would then be taken in NMI context, something that nobody
+                * wants to debug twice.
+                *
+                * Until we sort this, drop PMR again to a level that will
+                * actually only allow NMIs before reading IAR, and then
+                * restore it to what it was.
+                */
+               pmr = gic_read_pmr();
+               gic_pmr_mask_irqs();
+               isb();
+
+               iar = gic_read_iar();
+
+               gic_write_pmr(pmr);
+       } else {
+               iar = gic_read_iar();
+       }
+
+       return iar;
+}
+
 static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 {
        u32 irqnr;
 
-       irqnr = gic_read_iar();
+       irqnr = do_read_iar(regs);
 
        /* Check for special IDs first */
        if ((irqnr >= 1020 && irqnr <= 1023))
index d174823..4ffbfd5 100644 (file)
@@ -350,6 +350,7 @@ static int ldisc_open(struct tty_struct *tty)
        rtnl_lock();
        result = register_netdevice(dev);
        if (result) {
+               tty_kref_put(tty);
                rtnl_unlock();
                free_netdev(dev);
                return -ENODEV;
index 029e77d..a45865b 100644 (file)
@@ -82,6 +82,8 @@ struct mcba_priv {
        bool can_ka_first_pass;
        bool can_speed_check;
        atomic_t free_ctx_cnt;
+       void *rxbuf[MCBA_MAX_RX_URBS];
+       dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS];
 };
 
 /* CAN frame */
@@ -633,6 +635,7 @@ static int mcba_usb_start(struct mcba_priv *priv)
        for (i = 0; i < MCBA_MAX_RX_URBS; i++) {
                struct urb *urb = NULL;
                u8 *buf;
+               dma_addr_t buf_dma;
 
                /* create a URB, and a buffer for it */
                urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -642,7 +645,7 @@ static int mcba_usb_start(struct mcba_priv *priv)
                }
 
                buf = usb_alloc_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE,
-                                        GFP_KERNEL, &urb->transfer_dma);
+                                        GFP_KERNEL, &buf_dma);
                if (!buf) {
                        netdev_err(netdev, "No memory left for USB buffer\n");
                        usb_free_urb(urb);
@@ -661,11 +664,14 @@ static int mcba_usb_start(struct mcba_priv *priv)
                if (err) {
                        usb_unanchor_urb(urb);
                        usb_free_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE,
-                                         buf, urb->transfer_dma);
+                                         buf, buf_dma);
                        usb_free_urb(urb);
                        break;
                }
 
+               priv->rxbuf[i] = buf;
+               priv->rxbuf_dma[i] = buf_dma;
+
                /* Drop reference, USB core will take care of freeing it */
                usb_free_urb(urb);
        }
@@ -708,7 +714,14 @@ static int mcba_usb_open(struct net_device *netdev)
 
 static void mcba_urb_unlink(struct mcba_priv *priv)
 {
+       int i;
+
        usb_kill_anchored_urbs(&priv->rx_submitted);
+
+       for (i = 0; i < MCBA_MAX_RX_URBS; ++i)
+               usb_free_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE,
+                                 priv->rxbuf[i], priv->rxbuf_dma[i]);
+
        usb_kill_anchored_urbs(&priv->tx_submitted);
 }
 
index 881f887..5257148 100644 (file)
@@ -236,36 +236,48 @@ static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
 static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
                                struct ena_tx_buffer *tx_info,
                                struct xdp_frame *xdpf,
-                               void **push_hdr,
-                               u32 *push_len)
+                               struct ena_com_tx_ctx *ena_tx_ctx)
 {
        struct ena_adapter *adapter = xdp_ring->adapter;
        struct ena_com_buf *ena_buf;
-       dma_addr_t dma = 0;
+       int push_len = 0;
+       dma_addr_t dma;
+       void *data;
        u32 size;
 
        tx_info->xdpf = xdpf;
+       data = tx_info->xdpf->data;
        size = tx_info->xdpf->len;
-       ena_buf = tx_info->bufs;
 
-       /* llq push buffer */
-       *push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
-       *push_hdr = tx_info->xdpf->data;
+       if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+               /* Designate part of the packet for LLQ */
+               push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
+
+               ena_tx_ctx->push_header = data;
+
+               size -= push_len;
+               data += push_len;
+       }
+
+       ena_tx_ctx->header_len = push_len;
 
-       if (size - *push_len > 0) {
+       if (size > 0) {
                dma = dma_map_single(xdp_ring->dev,
-                                    *push_hdr + *push_len,
-                                    size - *push_len,
+                                    data,
+                                    size,
                                     DMA_TO_DEVICE);
                if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
                        goto error_report_dma_error;
 
-               tx_info->map_linear_data = 1;
-               tx_info->num_of_bufs = 1;
-       }
+               tx_info->map_linear_data = 0;
 
-       ena_buf->paddr = dma;
-       ena_buf->len = size;
+               ena_buf = tx_info->bufs;
+               ena_buf->paddr = dma;
+               ena_buf->len = size;
+
+               ena_tx_ctx->ena_bufs = ena_buf;
+               ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
+       }
 
        return 0;
 
@@ -274,10 +286,6 @@ error_report_dma_error:
                          &xdp_ring->syncp);
        netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
 
-       xdp_return_frame_rx_napi(tx_info->xdpf);
-       tx_info->xdpf = NULL;
-       tx_info->num_of_bufs = 0;
-
        return -EINVAL;
 }
 
@@ -289,8 +297,6 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
        struct ena_com_tx_ctx ena_tx_ctx = {};
        struct ena_tx_buffer *tx_info;
        u16 next_to_use, req_id;
-       void *push_hdr;
-       u32 push_len;
        int rc;
 
        next_to_use = xdp_ring->next_to_use;
@@ -298,15 +304,11 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
        tx_info = &xdp_ring->tx_buffer_info[req_id];
        tx_info->num_of_bufs = 0;
 
-       rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len);
+       rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
        if (unlikely(rc))
                return rc;
 
-       ena_tx_ctx.ena_bufs = tx_info->bufs;
-       ena_tx_ctx.push_header = push_hdr;
-       ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
        ena_tx_ctx.req_id = req_id;
-       ena_tx_ctx.header_len = push_len;
 
        rc = ena_xmit_common(dev,
                             xdp_ring,
index b3d7433..7748b27 100644 (file)
@@ -1849,6 +1849,7 @@ out_free_netdev:
        free_netdev(netdev);
 out_pci_release:
        pci_release_mem_regions(pdev);
+       pci_disable_pcie_error_reporting(pdev);
 out_pci_disable:
        pci_disable_device(pdev);
        return err;
index fcc729d..aef3fcc 100644 (file)
@@ -7308,7 +7308,7 @@ skip_rdma:
        entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries +
                     2 * (extra_qps + ctx->qp_min_qp1_entries) + min;
        entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple);
-       entries = ctx->qp_max_l2_entries + extra_qps + ctx->qp_min_qp1_entries;
+       entries = ctx->qp_max_l2_entries + 2 * (extra_qps + ctx->qp_min_qp1_entries);
        entries = roundup(entries, ctx->tqm_entries_multiple);
        entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring);
        for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) {
@@ -11750,6 +11750,8 @@ static void bnxt_fw_init_one_p3(struct bnxt *bp)
        bnxt_hwrm_coal_params_qcaps(bp);
 }
 
+static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt);
+
 static int bnxt_fw_init_one(struct bnxt *bp)
 {
        int rc;
@@ -11764,6 +11766,9 @@ static int bnxt_fw_init_one(struct bnxt *bp)
                netdev_err(bp->dev, "Firmware init phase 2 failed\n");
                return rc;
        }
+       rc = bnxt_probe_phy(bp, false);
+       if (rc)
+               return rc;
        rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
        if (rc)
                return rc;
@@ -13155,6 +13160,7 @@ init_err_pci_clean:
        bnxt_hwrm_func_drv_unrgtr(bp);
        bnxt_free_hwrm_short_cmd_req(bp);
        bnxt_free_hwrm_resources(bp);
+       bnxt_ethtool_free(bp);
        kfree(bp->fw_health);
        bp->fw_health = NULL;
        bnxt_cleanup_pci(bp);
index 61ea3ec..83ed10a 100644 (file)
@@ -1337,13 +1337,27 @@ static int cxgb4_ethtool_flash_phy(struct net_device *netdev,
                return ret;
        }
 
-       spin_lock_bh(&adap->win0_lock);
+       /* We have to RESET the chip/firmware because we need the
+        * chip in uninitialized state for loading new PHY image.
+        * Otherwise, the running firmware will only store the PHY
+        * image in local RAM which will be lost after next reset.
+        */
+       ret = t4_fw_reset(adap, adap->mbox, PIORSTMODE_F | PIORST_F);
+       if (ret < 0) {
+               dev_err(adap->pdev_dev,
+                       "Set FW to RESET for flashing PHY FW failed. ret: %d\n",
+                       ret);
+               return ret;
+       }
+
        ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size);
-       spin_unlock_bh(&adap->win0_lock);
-       if (ret)
-               dev_err(adap->pdev_dev, "Failed to load PHY FW\n");
+       if (ret < 0) {
+               dev_err(adap->pdev_dev, "Failed to load PHY FW. ret: %d\n",
+                       ret);
+               return ret;
+       }
 
-       return ret;
+       return 0;
 }
 
 static int cxgb4_ethtool_flash_fw(struct net_device *netdev,
@@ -1610,16 +1624,14 @@ static struct filter_entry *cxgb4_get_filter_entry(struct adapter *adap,
                                                   u32 ftid)
 {
        struct tid_info *t = &adap->tids;
-       struct filter_entry *f;
 
-       if (ftid < t->nhpftids)
-               f = &adap->tids.hpftid_tab[ftid];
-       else if (ftid < t->nftids)
-               f = &adap->tids.ftid_tab[ftid - t->nhpftids];
-       else
-               f = lookup_tid(&adap->tids, ftid);
+       if (ftid >= t->hpftid_base && ftid < t->hpftid_base + t->nhpftids)
+               return &t->hpftid_tab[ftid - t->hpftid_base];
 
-       return f;
+       if (ftid >= t->ftid_base && ftid < t->ftid_base + t->nftids)
+               return &t->ftid_tab[ftid - t->ftid_base];
+
+       return lookup_tid(t, ftid);
 }
 
 static void cxgb4_fill_filter_rule(struct ethtool_rx_flow_spec *fs,
@@ -1826,6 +1838,11 @@ static int cxgb4_ntuple_del_filter(struct net_device *dev,
        filter_id = filter_info->loc_array[cmd->fs.location];
        f = cxgb4_get_filter_entry(adapter, filter_id);
 
+       if (f->fs.prio)
+               filter_id -= adapter->tids.hpftid_base;
+       else if (!f->fs.hash)
+               filter_id -= (adapter->tids.ftid_base - adapter->tids.nhpftids);
+
        ret = cxgb4_flow_rule_destroy(dev, f->fs.tc_prio, &f->fs, filter_id);
        if (ret)
                goto err;
@@ -1885,6 +1902,11 @@ static int cxgb4_ntuple_set_filter(struct net_device *netdev,
 
        filter_info = &adapter->ethtool_filters->port[pi->port_id];
 
+       if (fs.prio)
+               tid += adapter->tids.hpftid_base;
+       else if (!fs.hash)
+               tid += (adapter->tids.ftid_base - adapter->tids.nhpftids);
+
        filter_info->loc_array[cmd->fs.location] = tid;
        set_bit(cmd->fs.location, filter_info->bmap);
        filter_info->in_use++;
index 22c9ac9..6260b3b 100644 (file)
@@ -198,7 +198,7 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
                                      WORD_MASK, f->fs.nat_lip[3] |
                                      f->fs.nat_lip[2] << 8 |
                                      f->fs.nat_lip[1] << 16 |
-                                     (u64)f->fs.nat_lip[0] << 25, 1);
+                                     (u64)f->fs.nat_lip[0] << 24, 1);
                }
        }
 
index 1f601de..762113a 100644 (file)
@@ -4424,10 +4424,8 @@ static int adap_init0_phy(struct adapter *adap)
 
        /* Load PHY Firmware onto adapter.
         */
-       spin_lock_bh(&adap->win0_lock);
        ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version,
                             (u8 *)phyf->data, phyf->size);
-       spin_unlock_bh(&adap->win0_lock);
        if (ret < 0)
                dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n",
                        -ret);
index 9428ef1..a0555f4 100644 (file)
@@ -3060,16 +3060,19 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr,
  *     @addr: the start address to write
  *     @n: length of data to write in bytes
  *     @data: the data to write
+ *     @byte_oriented: whether to store data as bytes or as words
  *
  *     Writes up to a page of data (256 bytes) to the serial flash starting
  *     at the given address.  All the data must be written to the same page.
+ *     If @byte_oriented is set the write data is stored as byte stream
+ *     (i.e. matches what on disk), otherwise in big-endian.
  */
 static int t4_write_flash(struct adapter *adapter, unsigned int addr,
-                         unsigned int n, const u8 *data)
+                         unsigned int n, const u8 *data, bool byte_oriented)
 {
-       int ret;
-       u32 buf[64];
        unsigned int i, c, left, val, offset = addr & 0xff;
+       u32 buf[64];
+       int ret;
 
        if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE)
                return -EINVAL;
@@ -3080,10 +3083,14 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
            (ret = sf1_write(adapter, 4, 1, 1, val)) != 0)
                goto unlock;
 
-       for (left = n; left; left -= c) {
+       for (left = n; left; left -= c, data += c) {
                c = min(left, 4U);
-               for (val = 0, i = 0; i < c; ++i)
-                       val = (val << 8) + *data++;
+               for (val = 0, i = 0; i < c; ++i) {
+                       if (byte_oriented)
+                               val = (val << 8) + data[i];
+                       else
+                               val = (val << 8) + data[c - i - 1];
+               }
 
                ret = sf1_write(adapter, c, c != left, 1, val);
                if (ret)
@@ -3096,7 +3103,8 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
        t4_write_reg(adapter, SF_OP_A, 0);    /* unlock SF */
 
        /* Read the page to verify the write succeeded */
-       ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1);
+       ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf,
+                           byte_oriented);
        if (ret)
                return ret;
 
@@ -3692,7 +3700,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
         */
        memcpy(first_page, fw_data, SF_PAGE_SIZE);
        ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
-       ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
+       ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true);
        if (ret)
                goto out;
 
@@ -3700,14 +3708,14 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
        for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
                addr += SF_PAGE_SIZE;
                fw_data += SF_PAGE_SIZE;
-               ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data);
+               ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true);
                if (ret)
                        goto out;
        }
 
-       ret = t4_write_flash(adap,
-                            fw_start + offsetof(struct fw_hdr, fw_ver),
-                            sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
+       ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver),
+                            sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver,
+                            true);
 out:
        if (ret)
                dev_err(adap->pdev_dev, "firmware download failed, error %d\n",
@@ -3812,9 +3820,11 @@ int t4_load_phy_fw(struct adapter *adap, int win,
        /* Copy the supplied PHY Firmware image to the adapter memory location
         * allocated by the adapter firmware.
         */
+       spin_lock_bh(&adap->win0_lock);
        ret = t4_memory_rw(adap, win, mtype, maddr,
                           phy_fw_size, (__be32 *)phy_fw_data,
                           T4_MEMORY_WRITE);
+       spin_unlock_bh(&adap->win0_lock);
        if (ret)
                return ret;
 
@@ -10208,7 +10218,7 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
                        n = size - i;
                else
                        n = SF_PAGE_SIZE;
-               ret = t4_write_flash(adap, addr, n, cfg_data);
+               ret = t4_write_flash(adap, addr, n, cfg_data, true);
                if (ret)
                        goto out;
 
@@ -10677,13 +10687,14 @@ int t4_load_boot(struct adapter *adap, u8 *boot_data,
        for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
                addr += SF_PAGE_SIZE;
                boot_data += SF_PAGE_SIZE;
-               ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data);
+               ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data,
+                                    false);
                if (ret)
                        goto out;
        }
 
        ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE,
-                            (const u8 *)header);
+                            (const u8 *)header, false);
 
 out:
        if (ret)
@@ -10758,7 +10769,7 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
        for (i = 0; i < size; i += SF_PAGE_SIZE) {
                n = min_t(u32, size - i, SF_PAGE_SIZE);
 
-               ret = t4_write_flash(adap, addr, n, cfg_data);
+               ret = t4_write_flash(adap, addr, n, cfg_data, false);
                if (ret)
                        goto out;
 
@@ -10770,7 +10781,8 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
        for (i = 0; i < npad; i++) {
                u8 data = 0;
 
-               ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data);
+               ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data,
+                                    false);
                if (ret)
                        goto out;
        }
index 46b0dba..7c99217 100644 (file)
@@ -576,10 +576,12 @@ static void ec_bhf_remove(struct pci_dev *dev)
        struct ec_bhf_priv *priv = netdev_priv(net_dev);
 
        unregister_netdev(net_dev);
-       free_netdev(net_dev);
 
        pci_iounmap(dev, priv->dma_io);
        pci_iounmap(dev, priv->io);
+
+       free_netdev(net_dev);
+
        pci_release_regions(dev);
        pci_clear_master(dev);
        pci_disable_device(dev);
index b6eba29..7968568 100644 (file)
@@ -5897,6 +5897,7 @@ drv_cleanup:
 unmap_bars:
        be_unmap_pci_bars(adapter);
 free_netdev:
+       pci_disable_pcie_error_reporting(pdev);
        free_netdev(netdev);
 rel_reg:
        pci_release_regions(pdev);
index 1753807..d71eac7 100644 (file)
@@ -215,15 +215,13 @@ static u64 fec_ptp_read(const struct cyclecounter *cc)
 {
        struct fec_enet_private *fep =
                container_of(cc, struct fec_enet_private, cc);
-       const struct platform_device_id *id_entry =
-               platform_get_device_id(fep->pdev);
        u32 tempval;
 
        tempval = readl(fep->hwp + FEC_ATIME_CTRL);
        tempval |= FEC_T_CTRL_CAPTURE;
        writel(tempval, fep->hwp + FEC_ATIME_CTRL);
 
-       if (id_entry->driver_data & FEC_QUIRK_BUG_CAPTURE)
+       if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
                udelay(1);
 
        return readl(fep->hwp + FEC_ATIME);
@@ -604,6 +602,10 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx)
        fep->ptp_caps.enable = fec_ptp_enable;
 
        fep->cycle_speed = clk_get_rate(fep->clk_ptp);
+       if (!fep->cycle_speed) {
+               fep->cycle_speed = NSEC_PER_SEC;
+               dev_err(&fep->pdev->dev, "clk_ptp clock rate is zero\n");
+       }
        fep->ptp_inc = NSEC_PER_SEC / fep->cycle_speed;
 
        spin_lock_init(&fep->tmreg_lock);
index d70ee57..27f9dac 100644 (file)
@@ -1717,12 +1717,13 @@ setup_rings:
  * ice_vsi_cfg_txqs - Configure the VSI for Tx
  * @vsi: the VSI being configured
  * @rings: Tx ring array to be configured
+ * @count: number of Tx ring array elements
  *
  * Return 0 on success and a negative value on error
  * Configure the Tx VSI for operation.
  */
 static int
-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count)
 {
        struct ice_aqc_add_tx_qgrp *qg_buf;
        u16 q_idx = 0;
@@ -1734,7 +1735,7 @@ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
 
        qg_buf->num_txqs = 1;
 
-       for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+       for (q_idx = 0; q_idx < count; q_idx++) {
                err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
                if (err)
                        goto err_cfg_txqs;
@@ -1754,7 +1755,7 @@ err_cfg_txqs:
  */
 int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
 {
-       return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
+       return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
 }
 
 /**
@@ -1769,7 +1770,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
        int ret;
        int i;
 
-       ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+       ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
        if (ret)
                return ret;
 
@@ -2009,17 +2010,18 @@ int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi)
  * @rst_src: reset source
  * @rel_vmvf_num: Relative ID of VF/VM
  * @rings: Tx ring array to be stopped
+ * @count: number of Tx ring array elements
  */
 static int
 ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-                     u16 rel_vmvf_num, struct ice_ring **rings)
+                     u16 rel_vmvf_num, struct ice_ring **rings, u16 count)
 {
        u16 q_idx;
 
        if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
                return -EINVAL;
 
-       for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+       for (q_idx = 0; q_idx < count; q_idx++) {
                struct ice_txq_meta txq_meta = { };
                int status;
 
@@ -2047,7 +2049,7 @@ int
 ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
                          u16 rel_vmvf_num)
 {
-       return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
+       return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
 }
 
 /**
@@ -2056,7 +2058,7 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
  */
 int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
 {
-       return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+       return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
 }
 
 /**
index 4ee85a2..0eb2307 100644 (file)
@@ -2555,6 +2555,20 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
        return (ret || xdp_ring_err) ? -ENOMEM : 0;
 }
 
+/**
+ * ice_xdp_safe_mode - XDP handler for safe mode
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
+                            struct netdev_bpf *xdp)
+{
+       NL_SET_ERR_MSG_MOD(xdp->extack,
+                          "Please provide working DDP firmware package in order to use XDP\n"
+                          "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
+       return -EOPNOTSUPP;
+}
+
 /**
  * ice_xdp - implements XDP handler
  * @dev: netdevice
@@ -6937,6 +6951,7 @@ static const struct net_device_ops ice_netdev_safe_mode_ops = {
        .ndo_change_mtu = ice_change_mtu,
        .ndo_get_stats64 = ice_get_stats64,
        .ndo_tx_timeout = ice_tx_timeout,
+       .ndo_bpf = ice_xdp_safe_mode,
 };
 
 static const struct net_device_ops ice_netdev_ops = {
index 36dc3e5..21ef2f1 100644 (file)
@@ -154,6 +154,7 @@ static int xrx200_close(struct net_device *net_dev)
 
 static int xrx200_alloc_skb(struct xrx200_chan *ch)
 {
+       struct sk_buff *skb = ch->skb[ch->dma.desc];
        dma_addr_t mapping;
        int ret = 0;
 
@@ -168,6 +169,7 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch)
                                 XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE);
        if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) {
                dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+               ch->skb[ch->dma.desc] = skb;
                ret = -ENOMEM;
                goto skip;
        }
@@ -198,7 +200,6 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
        ch->dma.desc %= LTQ_DESC_NUM;
 
        if (ret) {
-               ch->skb[ch->dma.desc] = skb;
                net_dev->stats.rx_dropped++;
                netdev_err(net_dev, "failed to allocate new rx buffer\n");
                return ret;
@@ -352,8 +353,8 @@ static irqreturn_t xrx200_dma_irq(int irq, void *ptr)
        struct xrx200_chan *ch = ptr;
 
        if (napi_schedule_prep(&ch->napi)) {
-               __napi_schedule(&ch->napi);
                ltq_dma_disable_irq(&ch->dma);
+               __napi_schedule(&ch->napi);
        }
 
        ltq_dma_ack_irq(&ch->dma);
index a9166cd..ceebfc2 100644 (file)
@@ -303,6 +303,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
        int ret = 0, i;
 
        mutex_lock(&mlx5_intf_mutex);
+       priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
        for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
                if (!priv->adev[i]) {
                        bool is_supported = false;
@@ -320,6 +321,16 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
                        }
                } else {
                        adev = &priv->adev[i]->adev;
+
+                       /* Pay attention that this is not PCI driver that
+                        * mlx5_core_dev is connected, but auxiliary driver.
+                        *
+                        * Here we can race of module unload with devlink
+                        * reload, but we don't need to take extra lock because
+                        * we are holding global mlx5_intf_mutex.
+                        */
+                       if (!adev->dev.driver)
+                               continue;
                        adrv = to_auxiliary_drv(adev->dev.driver);
 
                        if (adrv->resume)
@@ -350,6 +361,10 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
                        continue;
 
                adev = &priv->adev[i]->adev;
+               /* Auxiliary driver was unbind manually through sysfs */
+               if (!adev->dev.driver)
+                       goto skip_suspend;
+
                adrv = to_auxiliary_drv(adev->dev.driver);
 
                if (adrv->suspend) {
@@ -357,9 +372,11 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
                        continue;
                }
 
+skip_suspend:
                del_adev(&priv->adev[i]->adev);
                priv->adev[i] = NULL;
        }
+       priv->flags |= MLX5_PRIV_FLAGS_DETACH;
        mutex_unlock(&mlx5_intf_mutex);
 }
 
@@ -448,6 +465,8 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
        struct mlx5_priv *priv = &dev->priv;
 
        lockdep_assert_held(&mlx5_intf_mutex);
+       if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
+               return 0;
 
        delete_drivers(dev);
        if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
index 0dd7615..bc33eaa 100644 (file)
@@ -64,6 +64,8 @@ struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct devlink_port *port;
 
+       if (!netif_device_present(dev))
+               return NULL;
        port = mlx5e_devlink_get_dl_port(priv);
        if (port->registered)
                return port;
index d907c1a..778e229 100644 (file)
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 // Copyright (c) 2020 Mellanox Technologies
 
-#include <linux/ptp_classify.h>
 #include "en/ptp.h"
 #include "en/txrx.h"
 #include "en/params.h"
index ab935cc..c96668b 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "en.h"
 #include "en_stats.h"
+#include <linux/ptp_classify.h>
 
 struct mlx5e_ptpsq {
        struct mlx5e_txqsq       txqsq;
@@ -43,6 +44,27 @@ struct mlx5e_ptp {
        DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES);
 };
 
+static inline bool mlx5e_use_ptpsq(struct sk_buff *skb)
+{
+       struct flow_keys fk;
+
+       if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+               return false;
+
+       if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+               return false;
+
+       if (fk.basic.n_proto == htons(ETH_P_1588))
+               return true;
+
+       if (fk.basic.n_proto != htons(ETH_P_IP) &&
+           fk.basic.n_proto != htons(ETH_P_IPV6))
+               return false;
+
+       return (fk.basic.ip_proto == IPPROTO_UDP &&
+               fk.ports.dst == htons(PTP_EV_PORT));
+}
+
 int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
                   u8 lag_port, struct mlx5e_ptp **cp);
 void mlx5e_ptp_close(struct mlx5e_ptp *c);
index be0ee03..2e9bee4 100644 (file)
@@ -129,10 +129,9 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
                                                             work);
        struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
        struct neighbour *n = update_work->n;
+       struct mlx5e_encap_entry *e = NULL;
        bool neigh_connected, same_dev;
-       struct mlx5e_encap_entry *e;
        unsigned char ha[ETH_ALEN];
-       struct mlx5e_priv *priv;
        u8 nud_state, dead;
 
        rtnl_lock();
@@ -156,14 +155,12 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
        if (!same_dev)
                goto out;
 
-       list_for_each_entry(e, &nhe->encap_list, encap_list) {
-               if (!mlx5e_encap_take(e))
-                       continue;
+       /* mlx5e_get_next_init_encap() releases previous encap before returning
+        * the next one.
+        */
+       while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
+               mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
 
-               priv = netdev_priv(e->out_dev);
-               mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
-               mlx5e_encap_put(priv, e);
-       }
 out:
        rtnl_unlock();
        mlx5e_release_neigh_update_work(update_work);
index 3113822..85eaadc 100644 (file)
@@ -94,13 +94,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
 
        ASSERT_RTNL();
 
-       /* wait for encap to be fully initialized */
-       wait_for_completion(&e->res_ready);
-
        mutex_lock(&esw->offloads.encap_tbl_lock);
        encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
-       if (e->compl_result < 0 || (encap_connected == neigh_connected &&
-                                   ether_addr_equal(e->h_dest, ha)))
+       if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
                goto unlock;
 
        mlx5e_take_all_encap_flows(e, &flow_list);
index f1fb116..490131e 100644 (file)
@@ -251,9 +251,12 @@ static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
                mlx5e_take_tmp_flow(flow, flow_list, 0);
 }
 
+typedef bool (match_cb)(struct mlx5e_encap_entry *);
+
 static struct mlx5e_encap_entry *
-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
-                          struct mlx5e_encap_entry *e)
+mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
+                             struct mlx5e_encap_entry *e,
+                             match_cb match)
 {
        struct mlx5e_encap_entry *next = NULL;
 
@@ -288,7 +291,7 @@ retry:
        /* wait for encap to be fully initialized */
        wait_for_completion(&next->res_ready);
        /* continue searching if encap entry is not in valid state after completion */
-       if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
+       if (!match(next)) {
                e = next;
                goto retry;
        }
@@ -296,6 +299,30 @@ retry:
        return next;
 }
 
+static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
+{
+       return e->flags & MLX5_ENCAP_ENTRY_VALID;
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+                          struct mlx5e_encap_entry *e)
+{
+       return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
+}
+
+static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
+{
+       return e->compl_result >= 0;
+}
+
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+                         struct mlx5e_encap_entry *e)
+{
+       return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
+}
+
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 {
        struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
index 3d45341..26f7fab 100644 (file)
@@ -532,9 +532,6 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
        struct mlx5_core_dev *mdev = priv->mdev;
        struct net_device *netdev = priv->netdev;
 
-       if (!priv->ipsec)
-               return;
-
        if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
            !MLX5_CAP_ETH(mdev, swp)) {
                mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
index 5cd466e..25403af 100644 (file)
@@ -356,7 +356,7 @@ err:
 
 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
 {
-       int err = 0;
+       int err = -ENOMEM;
        int i;
 
        if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
index ec6bafe..d26b8ed 100644 (file)
@@ -2705,8 +2705,6 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
        nch = priv->channels.params.num_channels;
        ntc = priv->channels.params.num_tc;
        num_rxqs = nch * priv->profile->rq_groups;
-       if (priv->channels.params.ptp_rx)
-               num_rxqs++;
 
        mlx5e_netdev_set_tcs(netdev, nch, ntc);
 
@@ -4824,22 +4822,15 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        }
 
        if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
-               netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
-                                          NETIF_F_GSO_UDP_TUNNEL_CSUM;
-               netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
-                                          NETIF_F_GSO_UDP_TUNNEL_CSUM;
-               netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
-               netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
-                                        NETIF_F_GSO_UDP_TUNNEL_CSUM;
+               netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL;
+               netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+               netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL;
        }
 
        if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
-               netdev->hw_features     |= NETIF_F_GSO_GRE |
-                                          NETIF_F_GSO_GRE_CSUM;
-               netdev->hw_enc_features |= NETIF_F_GSO_GRE |
-                                          NETIF_F_GSO_GRE_CSUM;
-               netdev->gso_partial_features |= NETIF_F_GSO_GRE |
-                                               NETIF_F_GSO_GRE_CSUM;
+               netdev->hw_features     |= NETIF_F_GSO_GRE;
+               netdev->hw_enc_features |= NETIF_F_GSO_GRE;
+               netdev->gso_partial_features |= NETIF_F_GSO_GRE;
        }
 
        if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
index dd64878..d4b0f27 100644 (file)
@@ -4765,7 +4765,7 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
        list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
                wait_for_completion(&hpe->res_ready);
                if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
-                       hpe->hp->pair->peer_gone = true;
+                       mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
 
                mlx5e_hairpin_put(priv, hpe);
        }
index 25c0917..1702753 100644 (file)
@@ -178,6 +178,9 @@ void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *f
 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
 
 struct mlx5e_neigh_hash_entry;
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+                         struct mlx5e_encap_entry *e);
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
 
 void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
index 8ba6267..320fe0c 100644 (file)
@@ -32,7 +32,6 @@
 
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
-#include <linux/ptp_classify.h>
 #include <net/geneve.h>
 #include <net/dsfield.h>
 #include "en.h"
@@ -67,24 +66,6 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb
 }
 #endif
 
-static bool mlx5e_use_ptpsq(struct sk_buff *skb)
-{
-       struct flow_keys fk;
-
-       if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
-               return false;
-
-       if (fk.basic.n_proto == htons(ETH_P_1588))
-               return true;
-
-       if (fk.basic.n_proto != htons(ETH_P_IP) &&
-           fk.basic.n_proto != htons(ETH_P_IPV6))
-               return false;
-
-       return (fk.basic.ip_proto == IPPROTO_UDP &&
-               fk.ports.dst == htons(PTP_EV_PORT));
-}
-
 static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
@@ -145,9 +126,9 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
                }
 
                ptp_channel = READ_ONCE(priv->channels.ptp);
-               if (unlikely(ptp_channel) &&
-                   test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
-                   mlx5e_use_ptpsq(skb))
+               if (unlikely(ptp_channel &&
+                            test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
+                            mlx5e_use_ptpsq(skb)))
                        return mlx5e_select_ptpsq(dev, skb);
 
                txq_ix = netdev_pick_tx(dev, skb, NULL);
index 77c0ca6..9403334 100644 (file)
@@ -136,7 +136,7 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
 
        eqe = next_eqe_sw(eq);
        if (!eqe)
-               return 0;
+               goto out;
 
        do {
                struct mlx5_core_cq *cq;
@@ -161,6 +161,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
                ++eq->cons_index;
 
        } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+
+out:
        eq_update_ci(eq, 1);
 
        if (cqn != -1)
@@ -248,9 +250,9 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
                ++eq->cons_index;
 
        } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
-       eq_update_ci(eq, 1);
 
 out:
+       eq_update_ci(eq, 1);
        mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
 
        return unlikely(recovery) ? num_eqes : 0;
index b88705a..97e6cb6 100644 (file)
@@ -1054,6 +1054,12 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
                        goto err_vhca_mapping;
        }
 
+       /* External controller host PF has factory programmed MAC.
+        * Read it from the device.
+        */
+       if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF)
+               mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac);
+
        esw_vport_change_handle_locked(vport);
 
        esw->enabled_vports++;
index a1d67bd..0d0f63a 100644 (file)
@@ -1161,7 +1161,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
        err = mlx5_core_set_hca_defaults(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to set hca defaults\n");
-               goto err_sriov;
+               goto err_set_hca;
        }
 
        mlx5_vhca_event_start(dev);
@@ -1194,6 +1194,7 @@ err_ec:
        mlx5_sf_hw_table_destroy(dev);
 err_vhca:
        mlx5_vhca_event_stop(dev);
+err_set_hca:
        mlx5_cleanup_fs(dev);
 err_fs:
        mlx5_accel_tls_cleanup(dev);
index 50af84e..174f71e 100644 (file)
@@ -54,7 +54,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
        mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
        mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
        mkey->size = MLX5_GET64(mkc, mkc, len);
-       mkey->key |= mlx5_idx_to_mkey(mkey_index);
+       mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
        mkey->pd = MLX5_GET(mkc, mkc, pd);
        init_waitqueue_head(&mkey->wait);
 
index 441b545..540cf05 100644 (file)
@@ -156,6 +156,9 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
 {
        int err;
 
+       if (!MLX5_CAP_GEN(dev, roce))
+               return;
+
        err = mlx5_nic_vport_enable_roce(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
index 6a0c6f9..fa0288a 100644 (file)
@@ -163,6 +163,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
        sf_index = event->function_id - base_id;
        sf_dev = xa_load(&table->devices, sf_index);
        switch (event->new_vhca_state) {
+       case MLX5_VHCA_STATE_INVALID:
        case MLX5_VHCA_STATE_ALLOCATED:
                if (sf_dev)
                        mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
index 054c2e2..7466f01 100644 (file)
@@ -694,7 +694,11 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
        if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM)
                return -EINVAL;
 
-       memcpy(padded_data, data, data_sz);
+       inline_data_sz =
+               MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+
+       /* Add an alignment padding  */
+       memcpy(padded_data + data_sz % inline_data_sz, data, data_sz);
 
        /* Remove L2L3 outer headers */
        MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id,
@@ -706,32 +710,34 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
        hw_action += DR_STE_ACTION_DOUBLE_SZ;
        used_actions++; /* Remove and NOP are a single double action */
 
-       inline_data_sz =
-               MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+       /* Point to the last dword of the header */
+       data_ptr += (data_sz / inline_data_sz) * inline_data_sz;
 
-       /* Add the new header inline + 2 extra bytes */
+       /* Add the new header using inline action 4Byte at a time, the header
+        * is added in reversed order to the beginning of the packet to avoid
+        * incorrect parsing by the HW. Since header is 14B or 18B an extra
+        * two bytes are padded and later removed.
+        */
        for (i = 0; i < data_sz / inline_data_sz + 1; i++) {
                void *addr_inline;
 
                MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id,
                         DR_STE_V1_ACTION_ID_INSERT_INLINE);
                /* The hardware expects here offset to words (2 bytes) */
-               MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset,
-                        i * 2);
+               MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0);
 
                /* Copy bytes one by one to avoid endianness problem */
                addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1,
                                           hw_action, inline_data);
-               memcpy(addr_inline, data_ptr, inline_data_sz);
+               memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz);
                hw_action += DR_STE_ACTION_DOUBLE_SZ;
-               data_ptr += inline_data_sz;
                used_actions++;
        }
 
-       /* Remove 2 extra bytes */
+       /* Remove first 2 extra bytes */
        MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id,
                 DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
-       MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, data_sz / 2);
+       MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0);
        /* The hardware expects here size in words (2 bytes) */
        MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1);
        used_actions++;
index 612b0ac..9737565 100644 (file)
@@ -124,10 +124,11 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action);
 static inline bool
 mlx5dr_is_supported(struct mlx5_core_dev *dev)
 {
-       return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
-              (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
-               (MLX5_CAP_GEN(dev, steering_format_version) <=
-                MLX5_STEERING_FORMAT_CONNECTX_6DX));
+       return MLX5_CAP_GEN(dev, roce) &&
+              (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
+               (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
+                (MLX5_CAP_GEN(dev, steering_format_version) <=
+                 MLX5_STEERING_FORMAT_CONNECTX_6DX)));
 }
 
 /* buddy functions & structure */
index 01cc00a..b6931bb 100644 (file)
@@ -424,6 +424,15 @@ err_modify_sq:
        return err;
 }
 
+static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp)
+{
+       int i;
+
+       for (i = 0; i < hp->num_channels; i++)
+               mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
+                                      MLX5_SQC_STATE_RST, 0, 0);
+}
+
 static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
 {
        int i;
@@ -432,13 +441,9 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
        for (i = 0; i < hp->num_channels; i++)
                mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY,
                                       MLX5_RQC_STATE_RST, 0, 0);
-
        /* unset peer SQs */
-       if (hp->peer_gone)
-               return;
-       for (i = 0; i < hp->num_channels; i++)
-               mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
-                                      MLX5_SQC_STATE_RST, 0, 0);
+       if (!hp->peer_gone)
+               mlx5_hairpin_unpair_peer_sq(hp);
 }
 
 struct mlx5_hairpin *
@@ -485,3 +490,16 @@ void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp)
        mlx5_hairpin_destroy_queues(hp);
        kfree(hp);
 }
+
+void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp)
+{
+       int i;
+
+       mlx5_hairpin_unpair_peer_sq(hp);
+
+       /* destroy peer SQ */
+       for (i = 0; i < hp->num_channels; i++)
+               mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+
+       hp->peer_gone = true;
+}
index 457ad42..4c1440a 100644 (file)
@@ -465,8 +465,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
        void *in;
        int err;
 
-       if (!vport)
-               return -EINVAL;
        if (!MLX5_CAP_GEN(mdev, vport_group_manager))
                return -EACCES;
 
index dfea143..85f0ce2 100644 (file)
@@ -693,7 +693,8 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
                                                        MLXSW_THERMAL_TRIP_MASK,
                                                        module_tz,
                                                        &mlxsw_thermal_module_ops,
-                                                       NULL, 0, 0);
+                                                       NULL, 0,
+                                                       module_tz->parent->polling_delay);
        if (IS_ERR(module_tz->tzdev)) {
                err = PTR_ERR(module_tz->tzdev);
                return err;
@@ -815,7 +816,8 @@ mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
                                                MLXSW_THERMAL_TRIP_MASK,
                                                gearbox_tz,
                                                &mlxsw_thermal_gearbox_ops,
-                                               NULL, 0, 0);
+                                               NULL, 0,
+                                               gearbox_tz->parent->polling_delay);
        if (IS_ERR(gearbox_tz->tzdev))
                return PTR_ERR(gearbox_tz->tzdev);
 
index 900b4bf..2bc5a90 100644 (file)
@@ -3907,7 +3907,7 @@ MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6);
 #define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS       25
 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1    5
 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2    11
-#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3    5
+#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3    11
 
 static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
                                       enum mlxsw_reg_qeec_hr hr, u8 index,
index 04672eb..9958d50 100644 (file)
@@ -1332,6 +1332,7 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
                           u8 band, u32 child_handle)
 {
        struct mlxsw_sp_qdisc *old_qdisc;
+       u32 parent;
 
        if (band < mlxsw_sp_qdisc->num_classes &&
            mlxsw_sp_qdisc->qdiscs[band].handle == child_handle)
@@ -1352,7 +1353,9 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
        if (old_qdisc)
                mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
 
-       mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, band);
+       parent = TC_H_MAKE(mlxsw_sp_qdisc->handle, band + 1);
+       mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc,
+                                                        parent);
        if (!WARN_ON(!mlxsw_sp_qdisc))
                mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
 
index 0c42833..adfb978 100644 (file)
@@ -379,6 +379,7 @@ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
 
 int ocelot_port_flush(struct ocelot *ocelot, int port)
 {
+       unsigned int pause_ena;
        int err, val;
 
        /* Disable dequeuing from the egress queues */
@@ -387,6 +388,7 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
                       QSYS_PORT_MODE, port);
 
        /* Disable flow control */
+       ocelot_fields_read(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, &pause_ena);
        ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
 
        /* Disable priority flow control */
@@ -422,6 +424,9 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
        /* Clear flushing again. */
        ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port);
 
+       /* Re-enable flow control */
+       ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, pause_ena);
+
        return err;
 }
 EXPORT_SYMBOL(ocelot_port_flush);
index 7e6bac8..344ea11 100644 (file)
@@ -1602,6 +1602,8 @@ err_out_free_netdev:
        free_netdev(netdev);
 
 err_out_free_res:
+       if (NX_IS_REVISION_P3(pdev->revision))
+               pci_disable_pcie_error_reporting(pdev);
        pci_release_regions(pdev);
 
 err_out_disable_pdev:
index 17d5b64..e81dd34 100644 (file)
@@ -1266,9 +1266,11 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn,
                p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC;
 
        p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled;
+       BUILD_BUG_ON(sizeof(dcbx_info->operational.params) !=
+                    sizeof(p_hwfn->p_dcbx_info->set.config.params));
        memcpy(&p_hwfn->p_dcbx_info->set.config.params,
               &dcbx_info->operational.params,
-              sizeof(struct qed_dcbx_admin_params));
+              sizeof(p_hwfn->p_dcbx_info->set.config.params));
        p_hwfn->p_dcbx_info->set.config.valid = true;
 
        memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set));
index 96b947f..3beafc6 100644 (file)
@@ -2690,6 +2690,7 @@ err_out_free_hw_res:
        kfree(ahw);
 
 err_out_free_res:
+       pci_disable_pcie_error_reporting(pdev);
        pci_release_regions(pdev);
 
 err_out_disable_pdev:
index 41fbd2c..ab1e0fc 100644 (file)
@@ -126,24 +126,24 @@ static void rmnet_get_stats64(struct net_device *dev,
                              struct rtnl_link_stats64 *s)
 {
        struct rmnet_priv *priv = netdev_priv(dev);
-       struct rmnet_vnd_stats total_stats;
+       struct rmnet_vnd_stats total_stats = { };
        struct rmnet_pcpu_stats *pcpu_ptr;
+       struct rmnet_vnd_stats snapshot;
        unsigned int cpu, start;
 
-       memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
-
        for_each_possible_cpu(cpu) {
                pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
 
                do {
                        start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
-                       total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts;
-                       total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes;
-                       total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts;
-                       total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes;
+                       snapshot = pcpu_ptr->stats;     /* struct assignment */
                } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start));
 
-               total_stats.tx_drops += pcpu_ptr->stats.tx_drops;
+               total_stats.rx_pkts += snapshot.rx_pkts;
+               total_stats.rx_bytes += snapshot.rx_bytes;
+               total_stats.tx_pkts += snapshot.tx_pkts;
+               total_stats.tx_bytes += snapshot.tx_bytes;
+               total_stats.tx_drops += snapshot.tx_drops;
        }
 
        s->rx_packets = total_stats.rx_pkts;
@@ -354,4 +354,4 @@ int rmnet_vnd_update_dev_mtu(struct rmnet_port *port,
        }
 
        return 0;
-}
\ No newline at end of file
+}
index 2c89cde..2ee72dc 100644 (file)
@@ -1671,7 +1671,7 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
        switch(stringset) {
        case ETH_SS_STATS:
-               memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings));
+               memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings));
                break;
        }
 }
index c5b1548..713d362 100644 (file)
@@ -2287,7 +2287,7 @@ static void sh_eth_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 {
        switch (stringset) {
        case ETH_SS_STATS:
-               memcpy(data, *sh_eth_gstrings_stats,
+               memcpy(data, sh_eth_gstrings_stats,
                       sizeof(sh_eth_gstrings_stats));
                break;
        }
index b70d44a..3c73453 100644 (file)
@@ -76,10 +76,10 @@ enum power_event {
 #define LPI_CTRL_STATUS_TLPIEN 0x00000001      /* Transmit LPI Entry */
 
 /* GMAC HW ADDR regs */
-#define GMAC_ADDR_HIGH(reg)    (((reg > 15) ? 0x00000800 : 0x00000040) + \
-                               (reg * 8))
-#define GMAC_ADDR_LOW(reg)     (((reg > 15) ? 0x00000804 : 0x00000044) + \
-                               (reg * 8))
+#define GMAC_ADDR_HIGH(reg)    ((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \
+                                0x00000040 + (reg * 8))
+#define GMAC_ADDR_LOW(reg)     ((reg > 15) ? 0x00000804 + (reg - 16) * 8 : \
+                                0x00000044 + (reg * 8))
 #define GMAC_MAX_PERFECT_ADDRESSES     1
 
 #define GMAC_PCS_BASE          0x000000c0      /* PCS register base */
index 1e17a23..a696ada 100644 (file)
@@ -622,6 +622,8 @@ error_pclk_get:
 void stmmac_remove_config_dt(struct platform_device *pdev,
                             struct plat_stmmacenet_data *plat)
 {
+       clk_disable_unprepare(plat->stmmac_clk);
+       clk_disable_unprepare(plat->pclk);
        of_node_put(plat->phy_node);
        of_node_put(plat->mdio_node);
 }
index a1f5f07..9a13953 100644 (file)
@@ -774,12 +774,15 @@ static void temac_start_xmit_done(struct net_device *ndev)
        stat = be32_to_cpu(cur_p->app0);
 
        while (stat & STS_CTRL_APP0_CMPLT) {
+               /* Make sure that the other fields are read after bd is
+                * released by dma
+                */
+               rmb();
                dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
                                 be32_to_cpu(cur_p->len), DMA_TO_DEVICE);
                skb = (struct sk_buff *)ptr_from_txbd(cur_p);
                if (skb)
                        dev_consume_skb_irq(skb);
-               cur_p->app0 = 0;
                cur_p->app1 = 0;
                cur_p->app2 = 0;
                cur_p->app3 = 0;
@@ -788,6 +791,12 @@ static void temac_start_xmit_done(struct net_device *ndev)
                ndev->stats.tx_packets++;
                ndev->stats.tx_bytes += be32_to_cpu(cur_p->len);
 
+               /* app0 must be visible last, as it is used to flag
+                * availability of the bd
+                */
+               smp_mb();
+               cur_p->app0 = 0;
+
                lp->tx_bd_ci++;
                if (lp->tx_bd_ci >= lp->tx_bd_num)
                        lp->tx_bd_ci = 0;
@@ -814,6 +823,9 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag)
                if (cur_p->app0)
                        return NETDEV_TX_BUSY;
 
+               /* Make sure to read next bd app0 after this one */
+               rmb();
+
                tail++;
                if (tail >= lp->tx_bd_num)
                        tail = 0;
@@ -849,7 +861,7 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                smp_mb();
 
                /* Space might have just been freed - check again */
-               if (temac_check_tx_bd_space(lp, num_frag))
+               if (temac_check_tx_bd_space(lp, num_frag + 1))
                        return NETDEV_TX_BUSY;
 
                netif_wake_queue(ndev);
@@ -876,7 +888,6 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                return NETDEV_TX_OK;
        }
        cur_p->phys = cpu_to_be32(skb_dma_addr);
-       ptr_to_txbd((void *)skb, cur_p);
 
        for (ii = 0; ii < num_frag; ii++) {
                if (++lp->tx_bd_tail >= lp->tx_bd_num)
@@ -915,6 +926,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        }
        cur_p->app0 |= cpu_to_be32(STS_CTRL_APP0_EOP);
 
+       /* Mark last fragment with skb address, so it can be consumed
+        * in temac_start_xmit_done()
+        */
+       ptr_to_txbd((void *)skb, cur_p);
+
        tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail;
        lp->tx_bd_tail++;
        if (lp->tx_bd_tail >= lp->tx_bd_num)
@@ -926,6 +942,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        wmb();
        lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */
 
+       if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
+               netdev_info(ndev, "%s -> netif_stop_queue\n", __func__);
+               netif_stop_queue(ndev);
+       }
+
        return NETDEV_TX_OK;
 }
 
index 6515422..7685a17 100644 (file)
@@ -799,6 +799,7 @@ static void mkiss_close(struct tty_struct *tty)
        ax->tty = NULL;
 
        unregister_netdev(ax->dev);
+       free_netdev(ax->dev);
 }
 
 /* Perform I/O control on an active ax25 channel. */
index 0d8293a..b806f2f 100644 (file)
@@ -49,7 +49,7 @@ static int mhi_ndo_stop(struct net_device *ndev)
        return 0;
 }
 
-static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
        struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
        const struct mhi_net_proto *proto = mhi_netdev->proto;
index 9bd9a5c..6bbc81a 100644 (file)
@@ -826,16 +826,12 @@ static int dp83867_phy_reset(struct phy_device *phydev)
 {
        int err;
 
-       err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET);
+       err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART);
        if (err < 0)
                return err;
 
        usleep_range(10, 20);
 
-       /* After reset FORCE_LINK_GOOD bit is set. Although the
-        * default value should be unset. Disable FORCE_LINK_GOOD
-        * for the phy to work properly.
-        */
        return phy_modify(phydev, MII_DP83867_PHYCTRL,
                         DP83867_PHYCR_FORCE_LINK_GOOD, 0);
 }
index 2e60bc1..359ea0d 100644 (file)
@@ -123,10 +123,10 @@ static struct sk_buff *eem_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
        }
 
        skb2 = skb_copy_expand(skb, EEM_HEAD, ETH_FCS_LEN + padlen, flags);
+       dev_kfree_skb_any(skb);
        if (!skb2)
                return NULL;
 
-       dev_kfree_skb_any(skb);
        skb = skb2;
 
 done:
index b04055f..df0d183 100644 (file)
@@ -1880,7 +1880,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 static const struct driver_info cdc_ncm_info = {
        .description = "CDC NCM",
        .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
-                       | FLAG_LINK_INTR,
+                       | FLAG_LINK_INTR | FLAG_ETHER,
        .bind = cdc_ncm_bind,
        .unbind = cdc_ncm_unbind,
        .manage_power = usbnet_manage_power,
index 6700f19..bc55ec7 100644 (file)
@@ -575,7 +575,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
        if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) {
                skb->protocol = htons(ETH_P_MAP);
-               return (netif_rx(skb) == NET_RX_SUCCESS);
+               return 1;
        }
 
        switch (skb->data[0] & 0xf0) {
index f6abb2f..e25bfb7 100644 (file)
@@ -8678,7 +8678,7 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
        switch (stringset) {
        case ETH_SS_STATS:
-               memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings));
+               memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings));
                break;
        }
 }
index b286993..13141db 100644 (file)
@@ -1483,7 +1483,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
        ret = smsc75xx_wait_ready(dev, 0);
        if (ret < 0) {
                netdev_warn(dev->net, "device not ready in smsc75xx_bind\n");
-               goto err;
+               goto free_pdata;
        }
 
        smsc75xx_init_mac_address(dev);
@@ -1492,7 +1492,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
        ret = smsc75xx_reset(dev);
        if (ret < 0) {
                netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret);
-               goto err;
+               goto cancel_work;
        }
 
        dev->net->netdev_ops = &smsc75xx_netdev_ops;
@@ -1503,8 +1503,11 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE;
        return 0;
 
-err:
+cancel_work:
+       cancel_work_sync(&pdata->set_multicast);
+free_pdata:
        kfree(pdata);
+       dev->data[0] = 0;
        return ret;
 }
 
@@ -1515,7 +1518,6 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
                cancel_work_sync(&pdata->set_multicast);
                netif_dbg(dev, ifdown, dev->net, "free pdata\n");
                kfree(pdata);
-               pdata = NULL;
                dev->data[0] = 0;
        }
 }
index 503e2fd..28a6c4c 100644 (file)
@@ -1183,9 +1183,6 @@ static int vrf_dev_init(struct net_device *dev)
 
        dev->flags = IFF_MASTER | IFF_NOARP;
 
-       /* MTU is irrelevant for VRF device; set to 64k similar to lo */
-       dev->mtu = 64 * 1024;
-
        /* similarly, oper state is irrelevant; set to up to avoid confusion */
        dev->operstate = IF_OPER_UP;
        netdev_lockdep_set_classes(dev);
@@ -1685,7 +1682,8 @@ static void vrf_setup(struct net_device *dev)
         * which breaks networking.
         */
        dev->min_mtu = IPV6_MIN_MTU;
-       dev->max_mtu = ETH_MAX_MTU;
+       dev->max_mtu = IP6_MAX_MTU;
+       dev->mtu = dev->max_mtu;
 }
 
 static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
index 51ce767..7a6fd46 100644 (file)
@@ -1693,8 +1693,13 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw)
 static void mac80211_hwsim_stop(struct ieee80211_hw *hw)
 {
        struct mac80211_hwsim_data *data = hw->priv;
+
        data->started = false;
        hrtimer_cancel(&data->beacon_timer);
+
+       while (!skb_queue_empty(&data->pending))
+               ieee80211_free_txskb(hw, skb_dequeue(&data->pending));
+
        wiphy_dbg(hw->wiphy, "%s\n", __func__);
 }
 
index eca805c..9e6ce0d 100644 (file)
@@ -18,6 +18,7 @@ obj-$(CONFIG_PCIE_INTEL_GW) += pcie-intel-gw.o
 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
 obj-$(CONFIG_PCI_MESON) += pci-meson.o
+obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
 
@@ -38,6 +39,6 @@ ifdef CONFIG_ACPI
 ifdef CONFIG_PCI_QUIRKS
 obj-$(CONFIG_ARM64) += pcie-al.o
 obj-$(CONFIG_ARM64) += pcie-hisi.o
-obj-$(CONFIG_ARM64) += pcie-tegra194.o
+obj-$(CONFIG_ARM64) += pcie-tegra194-acpi.o
 endif
 endif
diff --git a/drivers/pci/controller/dwc/pcie-tegra194-acpi.c b/drivers/pci/controller/dwc/pcie-tegra194-acpi.c
new file mode 100644 (file)
index 0000000..c2de6ed
--- /dev/null
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * ACPI quirks for Tegra194 PCIe host controller
+ *
+ * Copyright (C) 2021 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
+
+#include "pcie-designware.h"
+
+struct tegra194_pcie_ecam  {
+       void __iomem *config_base;
+       void __iomem *iatu_base;
+       void __iomem *dbi_base;
+};
+
+static int tegra194_acpi_init(struct pci_config_window *cfg)
+{
+       struct device *dev = cfg->parent;
+       struct tegra194_pcie_ecam *pcie_ecam;
+
+       pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
+       if (!pcie_ecam)
+               return -ENOMEM;
+
+       pcie_ecam->config_base = cfg->win;
+       pcie_ecam->iatu_base = cfg->win + SZ_256K;
+       pcie_ecam->dbi_base = cfg->win + SZ_512K;
+       cfg->priv = pcie_ecam;
+
+       return 0;
+}
+
+static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
+                         u32 val, u32 reg)
+{
+       u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);
+
+       writel(val, pcie_ecam->iatu_base + offset + reg);
+}
+
+static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
+                                int index, int type, u64 cpu_addr,
+                                u64 pci_addr, u64 size)
+{
+       atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
+                     PCIE_ATU_LOWER_BASE);
+       atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
+                     PCIE_ATU_UPPER_BASE);
+       atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
+                     PCIE_ATU_LOWER_TARGET);
+       atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
+                     PCIE_ATU_LIMIT);
+       atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
+                     PCIE_ATU_UPPER_TARGET);
+       atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1);
+       atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
+}
+
+static void __iomem *tegra194_map_bus(struct pci_bus *bus,
+                                     unsigned int devfn, int where)
+{
+       struct pci_config_window *cfg = bus->sysdata;
+       struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
+       u32 busdev;
+       int type;
+
+       if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
+               return NULL;
+
+       if (bus->number == cfg->busr.start) {
+               if (PCI_SLOT(devfn) == 0)
+                       return pcie_ecam->dbi_base + where;
+               else
+                       return NULL;
+       }
+
+       busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
+                PCIE_ATU_FUNC(PCI_FUNC(devfn));
+
+       if (bus->parent->number == cfg->busr.start) {
+               if (PCI_SLOT(devfn) == 0)
+                       type = PCIE_ATU_TYPE_CFG0;
+               else
+                       return NULL;
+       } else {
+               type = PCIE_ATU_TYPE_CFG1;
+       }
+
+       program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
+                            SZ_256K);
+
+       return pcie_ecam->config_base + where;
+}
+
+const struct pci_ecam_ops tegra194_pcie_ops = {
+       .init           = tegra194_acpi_init,
+       .pci_ops        = {
+               .map_bus        = tegra194_map_bus,
+               .read           = pci_generic_config_read,
+               .write          = pci_generic_config_write,
+       }
+};
index bafd2c6..504669e 100644 (file)
@@ -22,8 +22,6 @@
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
-#include <linux/pci-acpi.h>
-#include <linux/pci-ecam.h>
 #include <linux/phy/phy.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
@@ -247,24 +245,6 @@ static const unsigned int pcie_gen_freq[] = {
        GEN4_CORE_CLK_FREQ
 };
 
-static const u32 event_cntr_ctrl_offset[] = {
-       0x1d8,
-       0x1a8,
-       0x1a8,
-       0x1a8,
-       0x1c4,
-       0x1d8
-};
-
-static const u32 event_cntr_data_offset[] = {
-       0x1dc,
-       0x1ac,
-       0x1ac,
-       0x1ac,
-       0x1c8,
-       0x1dc
-};
-
 struct tegra_pcie_dw {
        struct device *dev;
        struct resource *appl_res;
@@ -313,104 +293,6 @@ struct tegra_pcie_dw_of_data {
        enum dw_pcie_device_mode mode;
 };
 
-#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
-struct tegra194_pcie_ecam  {
-       void __iomem *config_base;
-       void __iomem *iatu_base;
-       void __iomem *dbi_base;
-};
-
-static int tegra194_acpi_init(struct pci_config_window *cfg)
-{
-       struct device *dev = cfg->parent;
-       struct tegra194_pcie_ecam *pcie_ecam;
-
-       pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
-       if (!pcie_ecam)
-               return -ENOMEM;
-
-       pcie_ecam->config_base = cfg->win;
-       pcie_ecam->iatu_base = cfg->win + SZ_256K;
-       pcie_ecam->dbi_base = cfg->win + SZ_512K;
-       cfg->priv = pcie_ecam;
-
-       return 0;
-}
-
-static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
-                         u32 val, u32 reg)
-{
-       u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);
-
-       writel(val, pcie_ecam->iatu_base + offset + reg);
-}
-
-static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
-                                int index, int type, u64 cpu_addr,
-                                u64 pci_addr, u64 size)
-{
-       atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
-                     PCIE_ATU_LOWER_BASE);
-       atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
-                     PCIE_ATU_UPPER_BASE);
-       atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
-                     PCIE_ATU_LOWER_TARGET);
-       atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
-                     PCIE_ATU_LIMIT);
-       atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
-                     PCIE_ATU_UPPER_TARGET);
-       atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1);
-       atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
-}
-
-static void __iomem *tegra194_map_bus(struct pci_bus *bus,
-                                     unsigned int devfn, int where)
-{
-       struct pci_config_window *cfg = bus->sysdata;
-       struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
-       u32 busdev;
-       int type;
-
-       if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
-               return NULL;
-
-       if (bus->number == cfg->busr.start) {
-               if (PCI_SLOT(devfn) == 0)
-                       return pcie_ecam->dbi_base + where;
-               else
-                       return NULL;
-       }
-
-       busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
-                PCIE_ATU_FUNC(PCI_FUNC(devfn));
-
-       if (bus->parent->number == cfg->busr.start) {
-               if (PCI_SLOT(devfn) == 0)
-                       type = PCIE_ATU_TYPE_CFG0;
-               else
-                       return NULL;
-       } else {
-               type = PCIE_ATU_TYPE_CFG1;
-       }
-
-       program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
-                            SZ_256K);
-
-       return pcie_ecam->config_base + where;
-}
-
-const struct pci_ecam_ops tegra194_pcie_ops = {
-       .init           = tegra194_acpi_init,
-       .pci_ops        = {
-               .map_bus        = tegra194_map_bus,
-               .read           = pci_generic_config_read,
-               .write          = pci_generic_config_write,
-       }
-};
-#endif /* defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) */
-
-#ifdef CONFIG_PCIE_TEGRA194
-
 static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci)
 {
        return container_of(pci, struct tegra_pcie_dw, pci);
@@ -694,6 +576,24 @@ static struct pci_ops tegra_pci_ops = {
 };
 
 #if defined(CONFIG_PCIEASPM)
+static const u32 event_cntr_ctrl_offset[] = {
+       0x1d8,
+       0x1a8,
+       0x1a8,
+       0x1a8,
+       0x1c4,
+       0x1d8
+};
+
+static const u32 event_cntr_data_offset[] = {
+       0x1dc,
+       0x1ac,
+       0x1ac,
+       0x1ac,
+       0x1c8,
+       0x1dc
+};
+
 static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
 {
        u32 val;
@@ -2411,5 +2311,3 @@ MODULE_DEVICE_TABLE(of, tegra_pcie_dw_of_match);
 MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
 MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
 MODULE_LICENSE("GPL v2");
-
-#endif /* CONFIG_PCIE_TEGRA194 */
index 051b48b..e3f5e7a 100644 (file)
@@ -514,7 +514,7 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie)
                udelay(PIO_RETRY_DELAY);
        }
 
-       dev_err(dev, "config read/write timed out\n");
+       dev_err(dev, "PIO read/write transfer time out\n");
        return -ETIMEDOUT;
 }
 
@@ -657,6 +657,35 @@ static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus,
        return true;
 }
 
+static bool advk_pcie_pio_is_running(struct advk_pcie *pcie)
+{
+       struct device *dev = &pcie->pdev->dev;
+
+       /*
+        * Trying to start a new PIO transfer when previous has not completed
+        * cause External Abort on CPU which results in kernel panic:
+        *
+        *     SError Interrupt on CPU0, code 0xbf000002 -- SError
+        *     Kernel panic - not syncing: Asynchronous SError Interrupt
+        *
+        * Functions advk_pcie_rd_conf() and advk_pcie_wr_conf() are protected
+        * by raw_spin_lock_irqsave() at pci_lock_config() level to prevent
+        * concurrent calls at the same time. But because PIO transfer may take
+        * about 1.5s when link is down or card is disconnected, it means that
+        * advk_pcie_wait_pio() does not always have to wait for completion.
+        *
+        * Some versions of ARM Trusted Firmware handles this External Abort at
+        * EL3 level and mask it to prevent kernel panic. Relevant TF-A commit:
+        * https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/commit/?id=3c7dcdac5c50
+        */
+       if (advk_readl(pcie, PIO_START)) {
+               dev_err(dev, "Previous PIO read/write transfer is still running\n");
+               return true;
+       }
+
+       return false;
+}
+
 static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
                             int where, int size, u32 *val)
 {
@@ -673,9 +702,10 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
                return pci_bridge_emul_conf_read(&pcie->bridge, where,
                                                 size, val);
 
-       /* Start PIO */
-       advk_writel(pcie, 0, PIO_START);
-       advk_writel(pcie, 1, PIO_ISR);
+       if (advk_pcie_pio_is_running(pcie)) {
+               *val = 0xffffffff;
+               return PCIBIOS_SET_FAILED;
+       }
 
        /* Program the control register */
        reg = advk_readl(pcie, PIO_CTRL);
@@ -694,7 +724,8 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
        /* Program the data strobe */
        advk_writel(pcie, 0xf, PIO_WR_DATA_STRB);
 
-       /* Start the transfer */
+       /* Clear PIO DONE ISR and start the transfer */
+       advk_writel(pcie, 1, PIO_ISR);
        advk_writel(pcie, 1, PIO_START);
 
        ret = advk_pcie_wait_pio(pcie);
@@ -734,9 +765,8 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
        if (where % size)
                return PCIBIOS_SET_FAILED;
 
-       /* Start PIO */
-       advk_writel(pcie, 0, PIO_START);
-       advk_writel(pcie, 1, PIO_ISR);
+       if (advk_pcie_pio_is_running(pcie))
+               return PCIBIOS_SET_FAILED;
 
        /* Program the control register */
        reg = advk_readl(pcie, PIO_CTRL);
@@ -763,7 +793,8 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
        /* Program the data strobe */
        advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB);
 
-       /* Start the transfer */
+       /* Clear PIO DONE ISR and start the transfer */
+       advk_writel(pcie, 1, PIO_ISR);
        advk_writel(pcie, 1, PIO_START);
 
        ret = advk_pcie_wait_pio(pcie);
index 85dcb70..a143b02 100644 (file)
@@ -353,6 +353,8 @@ static int devm_of_pci_get_host_bridge_resources(struct device *dev,
                                dev_warn(dev, "More than one I/O resource converted for %pOF. CPU base address for old range lost!\n",
                                         dev_node);
                        *io_base = range.cpu_addr;
+               } else if (resource_type(res) == IORESOURCE_MEM) {
+                       res->flags &= ~IORESOURCE_MEM_64;
                }
 
                pci_add_resource_offset(resources, res, res->start - range.pci_addr);
index dcb229d..22b2bb1 100644 (file)
@@ -3546,6 +3546,18 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
        dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
 }
 
+/*
+ * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
+ * prevented for those affected devices.
+ */
+static void quirk_nvidia_no_bus_reset(struct pci_dev *dev)
+{
+       if ((dev->device & 0xffc0) == 0x2340)
+               quirk_no_bus_reset(dev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+                        quirk_nvidia_no_bus_reset);
+
 /*
  * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
  * The device will throw a Link Down error on AER-capable systems and
@@ -3566,6 +3578,16 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0034, quirk_no_bus_reset);
  */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
 
+/*
+ * Some TI KeyStone C667X devices do not support bus/hot reset.  The PCIESS
+ * automatically disables LTSSM when Secondary Bus Reset is received and
+ * the device stops working.  Prevent bus reset for these devices.  With
+ * this change, the device can be assigned to VMs with VFIO, but it will
+ * leak state between VMs.  Reference
+ * https://e2e.ti.com/support/processors/f/791/t/954382
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0xb005, quirk_no_bus_reset);
+
 static void quirk_no_pm_reset(struct pci_dev *dev)
 {
        /*
@@ -3901,6 +3923,69 @@ static int delay_250ms_after_flr(struct pci_dev *dev, int probe)
        return 0;
 }
 
+#define PCI_DEVICE_ID_HINIC_VF      0x375E
+#define HINIC_VF_FLR_TYPE           0x1000
+#define HINIC_VF_FLR_CAP_BIT        (1UL << 30)
+#define HINIC_VF_OP                 0xE80
+#define HINIC_VF_FLR_PROC_BIT       (1UL << 18)
+#define HINIC_OPERATION_TIMEOUT     15000      /* 15 seconds */
+
+/* Device-specific reset method for Huawei Intelligent NIC virtual functions */
+static int reset_hinic_vf_dev(struct pci_dev *pdev, int probe)
+{
+       unsigned long timeout;
+       void __iomem *bar;
+       u32 val;
+
+       if (probe)
+               return 0;
+
+       bar = pci_iomap(pdev, 0, 0);
+       if (!bar)
+               return -ENOTTY;
+
+       /* Get and check firmware capabilities */
+       val = ioread32be(bar + HINIC_VF_FLR_TYPE);
+       if (!(val & HINIC_VF_FLR_CAP_BIT)) {
+               pci_iounmap(pdev, bar);
+               return -ENOTTY;
+       }
+
+       /* Set HINIC_VF_FLR_PROC_BIT for the start of FLR */
+       val = ioread32be(bar + HINIC_VF_OP);
+       val = val | HINIC_VF_FLR_PROC_BIT;
+       iowrite32be(val, bar + HINIC_VF_OP);
+
+       pcie_flr(pdev);
+
+       /*
+        * The device must recapture its Bus and Device Numbers after FLR
+        * in order generate Completions.  Issue a config write to let the
+        * device capture this information.
+        */
+       pci_write_config_word(pdev, PCI_VENDOR_ID, 0);
+
+       /* Firmware clears HINIC_VF_FLR_PROC_BIT when reset is complete */
+       timeout = jiffies + msecs_to_jiffies(HINIC_OPERATION_TIMEOUT);
+       do {
+               val = ioread32be(bar + HINIC_VF_OP);
+               if (!(val & HINIC_VF_FLR_PROC_BIT))
+                       goto reset_complete;
+               msleep(20);
+       } while (time_before(jiffies, timeout));
+
+       val = ioread32be(bar + HINIC_VF_OP);
+       if (!(val & HINIC_VF_FLR_PROC_BIT))
+               goto reset_complete;
+
+       pci_warn(pdev, "Reset dev timeout, FLR ack reg: %#010x\n", val);
+
+reset_complete:
+       pci_iounmap(pdev, bar);
+
+       return 0;
+}
+
 static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
        { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
                 reset_intel_82599_sfp_virtfn },
@@ -3913,6 +3998,8 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
        { PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr },
        { PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
                reset_chelsio_generic_dev },
+       { PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF,
+               reset_hinic_vf_dev },
        { 0 }
 };
 
@@ -4753,6 +4840,8 @@ static const struct pci_dev_acs_enabled {
        { PCI_VENDOR_ID_AMPERE, 0xE00A, pci_quirk_xgene_acs },
        { PCI_VENDOR_ID_AMPERE, 0xE00B, pci_quirk_xgene_acs },
        { PCI_VENDOR_ID_AMPERE, 0xE00C, pci_quirk_xgene_acs },
+       /* Broadcom multi-function device */
+       { PCI_VENDOR_ID_BROADCOM, 0x16D7, pci_quirk_mf_endpoint_acs },
        { PCI_VENDOR_ID_BROADCOM, 0xD714, pci_quirk_brcm_acs },
        /* Amazon Annapurna Labs */
        { PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031, pci_quirk_al_acs },
@@ -5154,7 +5243,8 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
 static void quirk_amd_harvest_no_ats(struct pci_dev *pdev)
 {
        if ((pdev->device == 0x7312 && pdev->revision != 0x00) ||
-           (pdev->device == 0x7340 && pdev->revision != 0xc5))
+           (pdev->device == 0x7340 && pdev->revision != 0xc5) ||
+           (pdev->device == 0x7341 && pdev->revision != 0x00))
                return;
 
        if (pdev->device == 0x15d8) {
@@ -5181,6 +5271,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats);
 /* AMD Navi14 dGPU */
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats);
 /* AMD Raven platform iGPU */
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats);
 #endif /* CONFIG_PCI_ATS */
index 03a246e..21c4c34 100644 (file)
@@ -63,7 +63,7 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
        spin_unlock_irqrestore(&queue->lock, flags);
 }
 
-s32 scaled_ppm_to_ppb(long ppm)
+long scaled_ppm_to_ppb(long ppm)
 {
        /*
         * The 'freq' field in the 'struct timex' is in parts per
@@ -80,7 +80,7 @@ s32 scaled_ppm_to_ppb(long ppm)
        s64 ppb = 1 + ppm;
        ppb *= 125;
        ppb >>= 13;
-       return (s32) ppb;
+       return (long) ppb;
 }
 EXPORT_SYMBOL(scaled_ppm_to_ppb);
 
@@ -138,7 +138,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
                delta = ktime_to_ns(kt);
                err = ops->adjtime(ops, delta);
        } else if (tx->modes & ADJ_FREQUENCY) {
-               s32 ppb = scaled_ppm_to_ppb(tx->freq);
+               long ppb = scaled_ppm_to_ppb(tx->freq);
                if (ppb > ops->max_adj || ppb < -ops->max_adj)
                        return -ERANGE;
                if (ops->adjfine)
index ecefc25..337353c 100644 (file)
@@ -135,12 +135,13 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
 {
        struct ap_queue_status status;
        struct ap_message *ap_msg;
+       bool found = false;
 
        status = ap_dqap(aq->qid, &aq->reply->psmid,
                         aq->reply->msg, aq->reply->len);
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
-               aq->queue_count--;
+               aq->queue_count = max_t(int, 0, aq->queue_count - 1);
                if (aq->queue_count > 0)
                        mod_timer(&aq->timeout,
                                  jiffies + aq->request_timeout);
@@ -150,8 +151,14 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
                        list_del_init(&ap_msg->list);
                        aq->pendingq_count--;
                        ap_msg->receive(aq, ap_msg, aq->reply);
+                       found = true;
                        break;
                }
+               if (!found) {
+                       AP_DBF_WARN("%s unassociated reply psmid=0x%016llx on 0x%02x.%04x\n",
+                                   __func__, aq->reply->psmid,
+                                   AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
+               }
                fallthrough;
        case AP_RESPONSE_NO_PENDING_REPLY:
                if (!status.queue_empty || aq->queue_count <= 0)
@@ -232,7 +239,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
                           ap_msg->flags & AP_MSG_FLAG_SPECIAL);
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
-               aq->queue_count++;
+               aq->queue_count = max_t(int, 1, aq->queue_count + 1);
                if (aq->queue_count == 1)
                        mod_timer(&aq->timeout, jiffies + aq->request_timeout);
                list_move_tail(&ap_msg->list, &aq->pendingq);
index ffa1cf4..4378592 100644 (file)
@@ -2284,7 +2284,7 @@ static int rtw_cfg80211_add_monitor_if(struct adapter *padapter, char *name, str
        mon_wdev->iftype = NL80211_IFTYPE_MONITOR;
        mon_ndev->ieee80211_ptr = mon_wdev;
 
-       ret = register_netdevice(mon_ndev);
+       ret = cfg80211_register_netdevice(mon_ndev);
        if (ret) {
                goto out;
        }
@@ -2360,7 +2360,7 @@ static int cfg80211_rtw_del_virtual_intf(struct wiphy *wiphy,
        adapter = rtw_netdev_priv(ndev);
        pwdev_priv = adapter_wdev_data(adapter);
 
-       unregister_netdevice(ndev);
+       cfg80211_unregister_netdevice(ndev);
 
        if (ndev == pwdev_priv->pmon_ndev) {
                pwdev_priv->pmon_ndev = NULL;
index 4545b23..bac0f54 100644 (file)
@@ -686,6 +686,16 @@ static int imx7d_charger_secondary_detection(struct imx_usbmisc_data *data)
        int val;
        unsigned long flags;
 
+       /* Clear VDATSRCENB0 to disable VDP_SRC and IDM_SNK required by BC 1.2 spec */
+       spin_lock_irqsave(&usbmisc->lock, flags);
+       val = readl(usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
+       val &= ~MX7D_USB_OTG_PHY_CFG2_CHRG_VDATSRCENB0;
+       writel(val, usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
+       spin_unlock_irqrestore(&usbmisc->lock, flags);
+
+       /* TVDMSRC_DIS */
+       msleep(20);
+
        /* VDM_SRC is connected to D- and IDP_SINK is connected to D+ */
        spin_lock_irqsave(&usbmisc->lock, flags);
        val = readl(usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
@@ -695,7 +705,8 @@ static int imx7d_charger_secondary_detection(struct imx_usbmisc_data *data)
                                usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
        spin_unlock_irqrestore(&usbmisc->lock, flags);
 
-       usleep_range(1000, 2000);
+       /* TVDMSRC_ON */
+       msleep(40);
 
        /*
         * Per BC 1.2, check voltage of D+:
@@ -798,7 +809,8 @@ static int imx7d_charger_primary_detection(struct imx_usbmisc_data *data)
                                usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
        spin_unlock_irqrestore(&usbmisc->lock, flags);
 
-       usleep_range(1000, 2000);
+       /* TVDPSRC_ON */
+       msleep(40);
 
        /* Check if D- is less than VDAT_REF to determine an SDP per BC 1.2 */
        val = readl(usbmisc->base + MX7D_USB_OTG_PHY_STATUS);
index fc7d6cd..df8e69e 100644 (file)
@@ -41,6 +41,8 @@
 #define USB_VENDOR_GENESYS_LOGIC               0x05e3
 #define USB_VENDOR_SMSC                                0x0424
 #define USB_PRODUCT_USB5534B                   0x5534
+#define USB_VENDOR_CYPRESS                     0x04b4
+#define USB_PRODUCT_CY7C65632                  0x6570
 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND       0x01
 #define HUB_QUIRK_DISABLE_AUTOSUSPEND          0x02
 
@@ -5697,6 +5699,11 @@ static const struct usb_device_id hub_id_table[] = {
       .idProduct = USB_PRODUCT_USB5534B,
       .bInterfaceClass = USB_CLASS_HUB,
       .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
+    { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
+                   | USB_DEVICE_ID_MATCH_PRODUCT,
+      .idVendor = USB_VENDOR_CYPRESS,
+      .idProduct = USB_PRODUCT_CY7C65632,
+      .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
     { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                        | USB_DEVICE_ID_MATCH_INT_CLASS,
       .idVendor = USB_VENDOR_GENESYS_LOGIC,
index 21129d3..4ac397e 100644 (file)
@@ -1671,8 +1671,8 @@ static int dwc3_remove(struct platform_device *pdev)
 
        pm_runtime_get_sync(&pdev->dev);
 
-       dwc3_debugfs_exit(dwc);
        dwc3_core_exit_mode(dwc);
+       dwc3_debugfs_exit(dwc);
 
        dwc3_core_exit(dwc);
        dwc3_ulpi_exit(dwc);
index b297525..179004b 100644 (file)
@@ -203,8 +203,8 @@ static int __init afs_init(void)
                goto error_fs;
 
        afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs");
-       if (IS_ERR(afs_proc_symlink)) {
-               ret = PTR_ERR(afs_proc_symlink);
+       if (!afs_proc_symlink) {
+               ret = -ENOMEM;
                goto error_proc;
        }
 
index a523bb8..e9ccaa3 100644 (file)
@@ -837,6 +837,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
        struct inode *inode = file_inode(file);
        struct afs_vnode *vnode = AFS_FS_I(inode);
        unsigned long priv;
+       vm_fault_t ret = VM_FAULT_RETRY;
 
        _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index);
 
@@ -848,14 +849,14 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 #ifdef CONFIG_AFS_FSCACHE
        if (PageFsCache(page) &&
            wait_on_page_fscache_killable(page) < 0)
-               return VM_FAULT_RETRY;
+               goto out;
 #endif
 
        if (wait_on_page_writeback_killable(page))
-               return VM_FAULT_RETRY;
+               goto out;
 
        if (lock_page_killable(page) < 0)
-               return VM_FAULT_RETRY;
+               goto out;
 
        /* We mustn't change page->private until writeback is complete as that
         * details the portion of the page we need to write back and we might
@@ -863,7 +864,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
         */
        if (wait_on_page_writeback_killable(page) < 0) {
                unlock_page(page);
-               return VM_FAULT_RETRY;
+               goto out;
        }
 
        priv = afs_page_dirty(page, 0, thp_size(page));
@@ -877,8 +878,10 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
        }
        file_update_time(file);
 
+       ret = VM_FAULT_LOCKED;
+out:
        sb_end_pagefault(inode->i_sb);
-       return VM_FAULT_LOCKED;
+       return ret;
 }
 
 /*
index aa57bdc..6d5c4e4 100644 (file)
@@ -2442,16 +2442,16 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
        spin_lock(&sinfo->lock);
        spin_lock(&cache->lock);
        if (!--cache->ro) {
-               num_bytes = cache->length - cache->reserved -
-                           cache->pinned - cache->bytes_super -
-                           cache->zone_unusable - cache->used;
-               sinfo->bytes_readonly -= num_bytes;
                if (btrfs_is_zoned(cache->fs_info)) {
                        /* Migrate zone_unusable bytes back */
                        cache->zone_unusable = cache->alloc_offset - cache->used;
                        sinfo->bytes_zone_unusable += cache->zone_unusable;
                        sinfo->bytes_readonly -= cache->zone_unusable;
                }
+               num_bytes = cache->length - cache->reserved -
+                           cache->pinned - cache->bytes_super -
+                           cache->zone_unusable - cache->used;
+               sinfo->bytes_readonly -= num_bytes;
                list_del_init(&cache->ro_list);
        }
        spin_unlock(&cache->lock);
index 55efd3d..30dee68 100644 (file)
@@ -735,6 +735,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
                __SetPageUptodate(page);
                error = huge_add_to_page_cache(page, mapping, index);
                if (unlikely(error)) {
+                       restore_reserve_on_error(h, &pseudo_vma, addr, page);
                        put_page(page);
                        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                        goto out;
index be5b6d2..64864fb 100644 (file)
@@ -471,7 +471,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
                                        info_type, fanotify_info_name(info),
                                        info->name_len, buf, count);
                if (ret < 0)
-                       return ret;
+                       goto out_close_fd;
 
                buf += ret;
                count -= ret;
@@ -519,7 +519,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
                                        fanotify_event_object_fh(event),
                                        info_type, dot, dot_len, buf, count);
                if (ret < 0)
-                       return ret;
+                       goto out_close_fd;
 
                buf += ret;
                count -= ret;
index 7118ebe..9cbd915 100644 (file)
@@ -2676,7 +2676,9 @@ out:
 #ifdef CONFIG_SECURITY
 static int proc_pid_attr_open(struct inode *inode, struct file *file)
 {
-       return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+       file->private_data = NULL;
+       __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+       return 0;
 }
 
 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
index f180240..11e555c 100644 (file)
@@ -37,7 +37,6 @@ bool topology_scale_freq_invariant(void);
 enum scale_freq_source {
        SCALE_FREQ_SOURCE_CPUFREQ = 0,
        SCALE_FREQ_SOURCE_ARCH,
-       SCALE_FREQ_SOURCE_CPPC,
 };
 
 struct scale_freq_data {
index 9626fda..2a8ebe6 100644 (file)
@@ -286,6 +286,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
+extern unsigned long huge_zero_pfn;
 
 static inline bool is_huge_zero_page(struct page *page)
 {
@@ -294,7 +295,7 @@ static inline bool is_huge_zero_page(struct page *page)
 
 static inline bool is_huge_zero_pmd(pmd_t pmd)
 {
-       return is_huge_zero_page(pmd_page(pmd));
+       return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
 }
 
 static inline bool is_huge_zero_pud(pud_t pud)
@@ -440,6 +441,11 @@ static inline bool is_huge_zero_page(struct page *page)
        return false;
 }
 
+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+       return false;
+}
+
 static inline bool is_huge_zero_pud(pud_t pud)
 {
        return false;
index b92f25c..6504346 100644 (file)
@@ -149,6 +149,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
                                                long freed);
 bool isolate_huge_page(struct page *page, struct list_head *list);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
 void putback_active_hugepage(struct page *page);
 void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
 void free_huge_page(struct page *page);
@@ -339,6 +340,11 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list)
        return false;
 }
 
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+       return 0;
+}
+
 static inline void putback_active_hugepage(struct page *page)
 {
 }
@@ -604,6 +610,8 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
                                unsigned long address);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
                        pgoff_t idx);
+void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
+                               unsigned long address, struct page *page);
 
 /* arch callback */
 int __init __alloc_bootmem_huge_page(struct hstate *h);
index 020a8f7..f8902bc 100644 (file)
@@ -542,6 +542,10 @@ struct mlx5_core_roce {
 enum {
        MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0,
        MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1,
+       /* Set during device detach to block any further devices
+        * creation/deletion on drivers rescan. Unset during device attach.
+        */
+       MLX5_PRIV_FLAGS_DETACH = 1 << 2,
 };
 
 struct mlx5_adev {
index 028f442..60ffeb6 100644 (file)
@@ -85,4 +85,5 @@ mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev,
                         struct mlx5_hairpin_params *params);
 
 void mlx5_core_hairpin_destroy(struct mlx5_hairpin *pair);
+void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp);
 #endif /* __TRANSOBJ_H__ */
index c274f75..8ae3162 100644 (file)
@@ -1719,6 +1719,7 @@ struct zap_details {
        struct address_space *check_mapping;    /* Check page->mapping if set */
        pgoff_t first_index;                    /* Lowest page->index to unmap */
        pgoff_t last_index;                     /* Highest page->index to unmap */
+       struct page *single_page;               /* Locked page to be unmapped */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1766,6 +1767,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 extern int fixup_user_fault(struct mm_struct *mm,
                            unsigned long address, unsigned int fault_flags,
                            bool *unlocked);
+void unmap_mapping_page(struct page *page);
 void unmap_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t nr, bool even_cows);
 void unmap_mapping_range(struct address_space *mapping,
@@ -1786,6 +1788,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
        BUG();
        return -EFAULT;
 }
+static inline void unmap_mapping_page(struct page *page) { }
 static inline void unmap_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t nr, bool even_cows) { }
 static inline void unmap_mapping_range(struct address_space *mapping,
index 0d47fd3..51d7f1b 100644 (file)
@@ -235,7 +235,7 @@ extern int ptp_clock_index(struct ptp_clock *ptp);
  * @ppm:    Parts per million, but with a 16 bit binary fractional field
  */
 
-extern s32 scaled_ppm_to_ppb(long ppm);
+extern long scaled_ppm_to_ppb(long ppm);
 
 /**
  * ptp_find_pin() - obtain the pin index of a given auxiliary function
index def5c62..8d04e7d 100644 (file)
@@ -91,6 +91,7 @@ enum ttu_flags {
 
        TTU_SPLIT_HUGE_PMD      = 0x4,  /* split huge PMD if any */
        TTU_IGNORE_MLOCK        = 0x8,  /* ignore mlock */
+       TTU_SYNC                = 0x10, /* avoid racy checks with PVMW_SYNC */
        TTU_IGNORE_HWPOISON     = 0x20, /* corrupted page is recoverable */
        TTU_BATCH_FLUSH         = 0x40, /* Batch TLB flushes where possible
                                         * and caller guarantees they will
index b8fc5c5..0d8e3dc 100644 (file)
@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
                            int __user *usockvec);
 extern int __sys_shutdown_sock(struct socket *sock, int how);
 extern int __sys_shutdown(int fd, int how);
-
-extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
index d9b7c91..6430a94 100644 (file)
 #define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
 #define SWP_OFFSET_MASK        ((1UL << SWP_TYPE_SHIFT) - 1)
 
+/* Clear all flags but only keep swp_entry_t related information */
+static inline pte_t pte_swp_clear_flags(pte_t pte)
+{
+       if (pte_swp_soft_dirty(pte))
+               pte = pte_swp_clear_soft_dirty(pte);
+       if (pte_swp_uffd_wp(pte))
+               pte = pte_swp_clear_uffd_wp(pte);
+       return pte;
+}
+
 /*
  * Store a type+offset into a swp_entry_t in an arch-independent format
  */
@@ -66,10 +76,7 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
 {
        swp_entry_t arch_entry;
 
-       if (pte_swp_soft_dirty(pte))
-               pte = pte_swp_clear_soft_dirty(pte);
-       if (pte_swp_uffd_wp(pte))
-               pte = pte_swp_clear_uffd_wp(pte);
+       pte = pte_swp_clear_flags(pte);
        arch_entry = __pte_to_swp_entry(pte);
        return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
index 445b66c..e89530d 100644 (file)
@@ -5537,7 +5537,7 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
  *
  * This function iterates over the interfaces associated with a given
  * hardware that are currently active and calls the callback for them.
- * This version can only be used while holding the RTNL.
+ * This version can only be used while holding the wiphy mutex.
  *
  * @hw: the hardware struct of which the interfaces should be iterated over
  * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags
@@ -6392,7 +6392,12 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
 
 /**
  * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header
- *                              of injected frames
+ *                              of injected frames.
+ *
+ * To accurately parse and take into account rate and retransmission fields,
+ * you must initialize the chandef field in the ieee80211_tx_info structure
+ * of the skb before calling this function.
+ *
  * @skb: packet injected by userspace
  * @dev: the &struct device of this 802.11 device
  */
index fa58871..bdc0459 100644 (file)
@@ -184,6 +184,9 @@ struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
 void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
 
 void net_ns_barrier(void);
+
+struct ns_common *get_net_ns(struct ns_common *ns);
+struct net *get_net_ns_by_fd(int fd);
 #else /* CONFIG_NET_NS */
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
@@ -203,13 +206,22 @@ static inline void net_ns_get_ownership(const struct net *net,
 }
 
 static inline void net_ns_barrier(void) {}
+
+static inline struct ns_common *get_net_ns(struct ns_common *ns)
+{
+       return ERR_PTR(-EINVAL);
+}
+
+static inline struct net *get_net_ns_by_fd(int fd)
+{
+       return ERR_PTR(-EINVAL);
+}
 #endif /* CONFIG_NET_NS */
 
 
 extern struct list_head net_namespace_list;
 
 struct net *get_net_ns_by_pid(pid_t pid);
-struct net *get_net_ns_by_fd(int fd);
 
 #ifdef CONFIG_SYSCTL
 void ipx_register_sysctl(void);
index 0e962d8..7a7058f 100644 (file)
@@ -1934,7 +1934,8 @@ static inline u32 net_tx_rndhash(void)
 
 static inline void sk_set_txhash(struct sock *sk)
 {
-       sk->sk_txhash = net_tx_rndhash();
+       /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */
+       WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
 }
 
 static inline bool sk_rethink_txhash(struct sock *sk)
@@ -2206,9 +2207,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock,
 
 static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
 {
-       if (sk->sk_txhash) {
+       /* This pairs with WRITE_ONCE() in sk_set_txhash() */
+       u32 txhash = READ_ONCE(sk->sk_txhash);
+
+       if (txhash) {
                skb->l4_hash = 1;
-               skb->hash = sk->sk_txhash;
+               skb->hash = txhash;
        }
 }
 
@@ -2266,8 +2270,13 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
 static inline int sock_error(struct sock *sk)
 {
        int err;
-       if (likely(!sk->sk_err))
+
+       /* Avoid an atomic operation for the common case.
+        * This is racy since another cpu/thread can change sk_err under us.
+        */
+       if (likely(data_race(!sk->sk_err)))
                return 0;
+
        err = xchg(&sk->sk_err, 0);
        return -err;
 }
index 6de5a7f..d2a9420 100644 (file)
@@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
 
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
index 7d66876..d1b3270 100644 (file)
@@ -289,6 +289,9 @@ struct sockaddr_in {
 /* Address indicating an error return. */
 #define        INADDR_NONE             ((unsigned long int) 0xffffffff)
 
+/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
+#define        INADDR_DUMMY            ((unsigned long int) 0xc0000008)
+
 /* Network number for local host loopback. */
 #define        IN_LOOPBACKNET          127
 
index 94ba516..c6a2757 100644 (file)
@@ -6483,6 +6483,27 @@ struct bpf_sanitize_info {
        bool mask_to_left;
 };
 
+static struct bpf_verifier_state *
+sanitize_speculative_path(struct bpf_verifier_env *env,
+                         const struct bpf_insn *insn,
+                         u32 next_idx, u32 curr_idx)
+{
+       struct bpf_verifier_state *branch;
+       struct bpf_reg_state *regs;
+
+       branch = push_stack(env, next_idx, curr_idx, true);
+       if (branch && insn) {
+               regs = branch->frame[branch->curframe]->regs;
+               if (BPF_SRC(insn->code) == BPF_K) {
+                       mark_reg_unknown(env, regs, insn->dst_reg);
+               } else if (BPF_SRC(insn->code) == BPF_X) {
+                       mark_reg_unknown(env, regs, insn->dst_reg);
+                       mark_reg_unknown(env, regs, insn->src_reg);
+               }
+       }
+       return branch;
+}
+
 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
                            struct bpf_insn *insn,
                            const struct bpf_reg_state *ptr_reg,
@@ -6566,12 +6587,26 @@ do_sim:
                tmp = *dst_reg;
                *dst_reg = *ptr_reg;
        }
-       ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
+       ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
+                                       env->insn_idx);
        if (!ptr_is_dst_reg && ret)
                *dst_reg = tmp;
        return !ret ? REASON_STACK : 0;
 }
 
+static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
+{
+       struct bpf_verifier_state *vstate = env->cur_state;
+
+       /* If we simulate paths under speculation, we don't update the
+        * insn as 'seen' such that when we verify unreachable paths in
+        * the non-speculative domain, sanitize_dead_code() can still
+        * rewrite/sanitize them.
+        */
+       if (!vstate->speculative)
+               env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+}
+
 static int sanitize_err(struct bpf_verifier_env *env,
                        const struct bpf_insn *insn, int reason,
                        const struct bpf_reg_state *off_reg,
@@ -8750,14 +8785,28 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
                if (err)
                        return err;
        }
+
        if (pred == 1) {
-               /* only follow the goto, ignore fall-through */
+               /* Only follow the goto, ignore fall-through. If needed, push
+                * the fall-through branch for simulation under speculative
+                * execution.
+                */
+               if (!env->bypass_spec_v1 &&
+                   !sanitize_speculative_path(env, insn, *insn_idx + 1,
+                                              *insn_idx))
+                       return -EFAULT;
                *insn_idx += insn->off;
                return 0;
        } else if (pred == 0) {
-               /* only follow fall-through branch, since
-                * that's where the program will go
+               /* Only follow the fall-through branch, since that's where the
+                * program will go. If needed, push the goto branch for
+                * simulation under speculative execution.
                 */
+               if (!env->bypass_spec_v1 &&
+                   !sanitize_speculative_path(env, insn,
+                                              *insn_idx + insn->off + 1,
+                                              *insn_idx))
+                       return -EFAULT;
                return 0;
        }
 
@@ -10630,7 +10679,7 @@ static int do_check(struct bpf_verifier_env *env)
                }
 
                regs = cur_regs(env);
-               env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+               sanitize_mark_insn_seen(env);
                prev_insn_idx = env->insn_idx;
 
                if (class == BPF_ALU || class == BPF_ALU64) {
@@ -10857,7 +10906,7 @@ process_bpf_exit:
                                        return err;
 
                                env->insn_idx++;
-                               env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+                               sanitize_mark_insn_seen(env);
                        } else {
                                verbose(env, "invalid BPF_LD mode\n");
                                return -EINVAL;
@@ -11366,6 +11415,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
 {
        struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
        struct bpf_insn *insn = new_prog->insnsi;
+       u32 old_seen = old_data[off].seen;
        u32 prog_len;
        int i;
 
@@ -11386,7 +11436,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
        memcpy(new_data + off + cnt - 1, old_data + off,
               sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
        for (i = off; i < off + cnt - 1; i++) {
-               new_data[i].seen = env->pass_cnt;
+               /* Expand insni[off]'s seen count to the patched range. */
+               new_data[i].seen = old_seen;
                new_data[i].zext_dst = insn_has_def32(env, insn + i);
        }
        env->insn_aux_data = new_data;
@@ -12710,6 +12761,9 @@ static void free_states(struct bpf_verifier_env *env)
  * insn_aux_data was touched. These variables are compared to clear temporary
  * data from failed pass. For testing and experiments do_check_common() can be
  * run multiple times even when prior attempt to verify is unsuccessful.
+ *
+ * Note that special handling is needed on !env->bypass_spec_v1 if this is
+ * ever called outside of error path with subsequent program rejection.
  */
 static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
 {
index 825284b..684a606 100644 (file)
@@ -464,6 +464,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
        VMCOREINFO_STRUCT_SIZE(mem_section);
        VMCOREINFO_OFFSET(mem_section, section_mem_map);
+       VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
        VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
 #endif
        VMCOREINFO_STRUCT_SIZE(page);
index 7a14146..9423218 100644 (file)
@@ -391,6 +391,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
        /* No obstacles. */
        return vprintk_default(fmt, args);
 }
+EXPORT_SYMBOL(vprintk);
 
 void __init printk_safe_init(void)
 {
@@ -411,4 +412,3 @@ void __init printk_safe_init(void)
        /* Flush pending messages that did not have scheduled IRQ works. */
        printk_safe_flush();
 }
-EXPORT_SYMBOL(vprintk);
index 5226cc2..4ca80df 100644 (file)
@@ -6389,7 +6389,6 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
 {
        return __sched_setscheduler(p, attr, false, true);
 }
-EXPORT_SYMBOL_GPL(sched_setattr_nocheck);
 
 /**
  * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
index 9299057..d23a09d 100644 (file)
@@ -2198,9 +2198,6 @@ struct saved_cmdlines_buffer {
 };
 static struct saved_cmdlines_buffer *savedcmd;
 
-/* temporary disable recording */
-static atomic_t trace_record_taskinfo_disabled __read_mostly;
-
 static inline char *get_saved_cmdlines(int idx)
 {
        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
@@ -2486,8 +2483,6 @@ static bool tracing_record_taskinfo_skip(int flags)
 {
        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
                return true;
-       if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
-               return true;
        if (!__this_cpu_read(trace_taskinfo_save))
                return true;
        return false;
@@ -3998,9 +3993,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
                return ERR_PTR(-EBUSY);
 #endif
 
-       if (!iter->snapshot)
-               atomic_inc(&trace_record_taskinfo_disabled);
-
        if (*pos != iter->pos) {
                iter->ent = NULL;
                iter->cpu = 0;
@@ -4043,9 +4035,6 @@ static void s_stop(struct seq_file *m, void *p)
                return;
 #endif
 
-       if (!iter->snapshot)
-               atomic_dec(&trace_record_taskinfo_disabled);
-
        trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
 }
index c1637f9..4702efb 100644 (file)
@@ -115,9 +115,9 @@ u64 notrace trace_clock_global(void)
        prev_time = READ_ONCE(trace_clock_struct.prev_time);
        now = sched_clock_cpu(this_cpu);
 
-       /* Make sure that now is always greater than prev_time */
+       /* Make sure that now is always greater than or equal to prev_time */
        if ((s64)(now - prev_time) < 0)
-               now = prev_time + 1;
+               now = prev_time;
 
        /*
         * If in an NMI context then dont risk lockups and simply return
@@ -131,7 +131,7 @@ u64 notrace trace_clock_global(void)
                /* Reread prev_time in case it was already updated */
                prev_time = READ_ONCE(trace_clock_struct.prev_time);
                if ((s64)(now - prev_time) < 0)
-                       now = prev_time + 1;
+                       now = prev_time;
 
                trace_clock_struct.prev_time = now;
 
index 63ed6b2..6d2a011 100644 (file)
@@ -62,6 +62,7 @@ static struct shrinker deferred_split_shrinker;
 
 static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
+unsigned long huge_zero_pfn __read_mostly = ~0UL;
 
 bool transparent_hugepage_enabled(struct vm_area_struct *vma)
 {
@@ -98,6 +99,7 @@ retry:
                __free_pages(zero_page, compound_order(zero_page));
                goto retry;
        }
+       WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
 
        /* We take additional reference here. It will be put back by shrinker */
        atomic_set(&huge_zero_refcount, 2);
@@ -147,6 +149,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
        if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
                struct page *zero_page = xchg(&huge_zero_page, NULL);
                BUG_ON(zero_page == NULL);
+               WRITE_ONCE(huge_zero_pfn, ~0UL);
                __free_pages(zero_page, compound_order(zero_page));
                return HPAGE_PMD_NR;
        }
@@ -2044,7 +2047,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        count_vm_event(THP_SPLIT_PMD);
 
        if (!vma_is_anonymous(vma)) {
-               _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+               old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
                /*
                 * We are going to unmap this huge page. So
                 * just go ahead and zap it
@@ -2053,16 +2056,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                        zap_deposited_table(mm, pmd);
                if (vma_is_special_huge(vma))
                        return;
-               page = pmd_page(_pmd);
-               if (!PageDirty(page) && pmd_dirty(_pmd))
-                       set_page_dirty(page);
-               if (!PageReferenced(page) && pmd_young(_pmd))
-                       SetPageReferenced(page);
-               page_remove_rmap(page, true);
-               put_page(page);
+               if (unlikely(is_pmd_migration_entry(old_pmd))) {
+                       swp_entry_t entry;
+
+                       entry = pmd_to_swp_entry(old_pmd);
+                       page = migration_entry_to_page(entry);
+               } else {
+                       page = pmd_page(old_pmd);
+                       if (!PageDirty(page) && pmd_dirty(old_pmd))
+                               set_page_dirty(page);
+                       if (!PageReferenced(page) && pmd_young(old_pmd))
+                               SetPageReferenced(page);
+                       page_remove_rmap(page, true);
+                       put_page(page);
+               }
                add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
                return;
-       } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+       }
+
+       if (is_huge_zero_pmd(*pmd)) {
                /*
                 * FIXME: Do we want to invalidate secondary mmu by calling
                 * mmu_notifier_invalidate_range() see comments below inside
@@ -2338,17 +2350,17 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
 static void unmap_page(struct page *page)
 {
-       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
+       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC |
                TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
-       bool unmap_success;
 
        VM_BUG_ON_PAGE(!PageHead(page), page);
 
        if (PageAnon(page))
                ttu_flags |= TTU_SPLIT_FREEZE;
 
-       unmap_success = try_to_unmap(page, ttu_flags);
-       VM_BUG_ON_PAGE(!unmap_success, page);
+       try_to_unmap(page, ttu_flags);
+
+       VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
 }
 
 static void remap_page(struct page *page, unsigned int nr)
@@ -2659,7 +2671,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        struct deferred_split *ds_queue = get_deferred_split_queue(head);
        struct anon_vma *anon_vma = NULL;
        struct address_space *mapping = NULL;
-       int count, mapcount, extra_pins, ret;
+       int extra_pins, ret;
        pgoff_t end;
 
        VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
@@ -2718,7 +2730,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        }
 
        unmap_page(head);
-       VM_BUG_ON_PAGE(compound_mapcount(head), head);
 
        /* block interrupt reentry in xa_lock and spinlock */
        local_irq_disable();
@@ -2736,9 +2747,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 
        /* Prevent deferred_split_scan() touching ->_refcount */
        spin_lock(&ds_queue->split_queue_lock);
-       count = page_count(head);
-       mapcount = total_mapcount(head);
-       if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
+       if (page_ref_freeze(head, 1 + extra_pins)) {
                if (!list_empty(page_deferred_list(head))) {
                        ds_queue->split_queue_len--;
                        list_del(page_deferred_list(head));
@@ -2758,16 +2767,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                __split_huge_page(page, list, end);
                ret = 0;
        } else {
-               if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
-                       pr_alert("total_mapcount: %u, page_count(): %u\n",
-                                       mapcount, count);
-                       if (PageTail(page))
-                               dump_page(head, NULL);
-                       dump_page(page, "total_mapcount(head) > 0");
-                       BUG();
-               }
                spin_unlock(&ds_queue->split_queue_lock);
-fail:          if (mapping)
+fail:
+               if (mapping)
                        xa_unlock(&mapping->i_pages);
                local_irq_enable();
                remap_page(head, thp_nr_pages(head));
index 5560b50..e0a5f9c 100644 (file)
@@ -2121,12 +2121,18 @@ out:
  * be restored when a newly allocated huge page must be freed.  It is
  * to be called after calling vma_needs_reservation to determine if a
  * reservation exists.
+ *
+ * vma_del_reservation is used in error paths where an entry in the reserve
+ * map was created during huge page allocation and must be removed.  It is to
+ * be called after calling vma_needs_reservation to determine if a reservation
+ * exists.
  */
 enum vma_resv_mode {
        VMA_NEEDS_RESV,
        VMA_COMMIT_RESV,
        VMA_END_RESV,
        VMA_ADD_RESV,
+       VMA_DEL_RESV,
 };
 static long __vma_reservation_common(struct hstate *h,
                                struct vm_area_struct *vma, unsigned long addr,
@@ -2170,11 +2176,21 @@ static long __vma_reservation_common(struct hstate *h,
                        ret = region_del(resv, idx, idx + 1);
                }
                break;
+       case VMA_DEL_RESV:
+               if (vma->vm_flags & VM_MAYSHARE) {
+                       region_abort(resv, idx, idx + 1, 1);
+                       ret = region_del(resv, idx, idx + 1);
+               } else {
+                       ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
+                       /* region_add calls of range 1 should never fail. */
+                       VM_BUG_ON(ret < 0);
+               }
+               break;
        default:
                BUG();
        }
 
-       if (vma->vm_flags & VM_MAYSHARE)
+       if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV)
                return ret;
        /*
         * We know private mapping must have HPAGE_RESV_OWNER set.
@@ -2222,25 +2238,39 @@ static long vma_add_reservation(struct hstate *h,
        return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
 }
 
+static long vma_del_reservation(struct hstate *h,
+                       struct vm_area_struct *vma, unsigned long addr)
+{
+       return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV);
+}
+
 /*
- * This routine is called to restore a reservation on error paths.  In the
- * specific error paths, a huge page was allocated (via alloc_huge_page)
- * and is about to be freed.  If a reservation for the page existed,
- * alloc_huge_page would have consumed the reservation and set
- * HPageRestoreReserve in the newly allocated page.  When the page is freed
- * via free_huge_page, the global reservation count will be incremented if
- * HPageRestoreReserve is set.  However, free_huge_page can not adjust the
- * reserve map.  Adjust the reserve map here to be consistent with global
- * reserve count adjustments to be made by free_huge_page.
+ * This routine is called to restore reservation information on error paths.
+ * It should ONLY be called for pages allocated via alloc_huge_page(), and
+ * the hugetlb mutex should remain held when calling this routine.
+ *
+ * It handles two specific cases:
+ * 1) A reservation was in place and the page consumed the reservation.
+ *    HPageRestoreReserve is set in the page.
+ * 2) No reservation was in place for the page, so HPageRestoreReserve is
+ *    not set.  However, alloc_huge_page always updates the reserve map.
+ *
+ * In case 1, free_huge_page later in the error path will increment the
+ * global reserve count.  But, free_huge_page does not have enough context
+ * to adjust the reservation map.  This case deals primarily with private
+ * mappings.  Adjust the reserve map here to be consistent with global
+ * reserve count adjustments to be made by free_huge_page.  Make sure the
+ * reserve map indicates there is a reservation present.
+ *
+ * In case 2, simply undo reserve map modifications done by alloc_huge_page.
  */
-static void restore_reserve_on_error(struct hstate *h,
-                       struct vm_area_struct *vma, unsigned long address,
-                       struct page *page)
+void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
+                       unsigned long address, struct page *page)
 {
-       if (unlikely(HPageRestoreReserve(page))) {
-               long rc = vma_needs_reservation(h, vma, address);
+       long rc = vma_needs_reservation(h, vma, address);
 
-               if (unlikely(rc < 0)) {
+       if (HPageRestoreReserve(page)) {
+               if (unlikely(rc < 0))
                        /*
                         * Rare out of memory condition in reserve map
                         * manipulation.  Clear HPageRestoreReserve so that
@@ -2253,16 +2283,57 @@ static void restore_reserve_on_error(struct hstate *h,
                         * accounting of reserve counts.
                         */
                        ClearHPageRestoreReserve(page);
-               } else if (rc) {
-                       rc = vma_add_reservation(h, vma, address);
-                       if (unlikely(rc < 0))
+               else if (rc)
+                       (void)vma_add_reservation(h, vma, address);
+               else
+                       vma_end_reservation(h, vma, address);
+       } else {
+               if (!rc) {
+                       /*
+                        * This indicates there is an entry in the reserve map
+                        * added by alloc_huge_page.  We know it was added
+                        * before the alloc_huge_page call, otherwise
+                        * HPageRestoreReserve would be set on the page.
+                        * Remove the entry so that a subsequent allocation
+                        * does not consume a reservation.
+                        */
+                       rc = vma_del_reservation(h, vma, address);
+                       if (rc < 0)
+                               /*
+                                * VERY rare out of memory condition.  Since
+                                * we can not delete the entry, set
+                                * HPageRestoreReserve so that the reserve
+                                * count will be incremented when the page
+                                * is freed.  This reserve will be consumed
+                                * on a subsequent allocation.
+                                */
+                               SetHPageRestoreReserve(page);
+               } else if (rc < 0) {
+                       /*
+                        * Rare out of memory condition from
+                        * vma_needs_reservation call.  Memory allocation is
+                        * only attempted if a new entry is needed.  Therefore,
+                        * this implies there is not an entry in the
+                        * reserve map.
+                        *
+                        * For shared mappings, no entry in the map indicates
+                        * no reservation.  We are done.
+                        */
+                       if (!(vma->vm_flags & VM_MAYSHARE))
                                /*
-                                * See above comment about rare out of
-                                * memory condition.
+                                * For private mappings, no entry indicates
+                                * a reservation is present.  Since we can
+                                * not add an entry, set SetHPageRestoreReserve
+                                * on the page so reserve count will be
+                                * incremented when freed.  This reserve will
+                                * be consumed on a subsequent allocation.
                                 */
-                               ClearHPageRestoreReserve(page);
+                               SetHPageRestoreReserve(page);
                } else
-                       vma_end_reservation(h, vma, address);
+                       /*
+                        * No reservation present, do nothing
+                        */
+                        vma_end_reservation(h, vma, address);
        }
 }
 
@@ -4037,6 +4108,8 @@ again:
                                spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
                                entry = huge_ptep_get(src_pte);
                                if (!pte_same(src_pte_old, entry)) {
+                                       restore_reserve_on_error(h, vma, addr,
+                                                               new);
                                        put_page(new);
                                        /* dst_entry won't change as in child */
                                        goto again;
@@ -5006,6 +5079,7 @@ out_release_unlock:
        if (vm_shared || is_continue)
                unlock_page(page);
 out_release_nounlock:
+       restore_reserve_on_error(h, dst_vma, dst_addr, page);
        put_page(page);
        goto out;
 }
@@ -5857,6 +5931,21 @@ unlock:
        return ret;
 }
 
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+       int ret = 0;
+
+       *hugetlb = false;
+       spin_lock_irq(&hugetlb_lock);
+       if (PageHeadHuge(page)) {
+               *hugetlb = true;
+               if (HPageFreed(page) || HPageMigratable(page))
+                       ret = get_page_unless_zero(page);
+       }
+       spin_unlock_irq(&hugetlb_lock);
+       return ret;
+}
+
 void putback_active_hugepage(struct page *page)
 {
        spin_lock_irq(&hugetlb_lock);
index 2f11829..e8fdb53 100644 (file)
@@ -384,27 +384,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
 
 /*
- * At what user virtual address is page expected in @vma?
+ * At what user virtual address is page expected in vma?
+ * Returns -EFAULT if all of the page is outside the range of vma.
+ * If page is a compound head, the entire compound page is considered.
  */
 static inline unsigned long
-__vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address(struct page *page, struct vm_area_struct *vma)
 {
-       pgoff_t pgoff = page_to_pgoff(page);
-       return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+       pgoff_t pgoff;
+       unsigned long address;
+
+       VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
+       pgoff = page_to_pgoff(page);
+       if (pgoff >= vma->vm_pgoff) {
+               address = vma->vm_start +
+                       ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+               /* Check for address beyond vma (or wrapped through 0?) */
+               if (address < vma->vm_start || address >= vma->vm_end)
+                       address = -EFAULT;
+       } else if (PageHead(page) &&
+                  pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) {
+               /* Test above avoids possibility of wrap to 0 on 32-bit */
+               address = vma->vm_start;
+       } else {
+               address = -EFAULT;
+       }
+       return address;
 }
 
+/*
+ * Then at what user virtual address will none of the page be found in vma?
+ * Assumes that vma_address() already returned a good starting address.
+ * If page is a compound head, the entire compound page is considered.
+ */
 static inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address_end(struct page *page, struct vm_area_struct *vma)
 {
-       unsigned long start, end;
-
-       start = __vma_address(page, vma);
-       end = start + thp_size(page) - PAGE_SIZE;
-
-       /* page should be within @vma mapping range */
-       VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
-
-       return max(start, vma->vm_start);
+       pgoff_t pgoff;
+       unsigned long address;
+
+       VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
+       pgoff = page_to_pgoff(page) + compound_nr(page);
+       address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+       /* Check for address beyond vma (or wrapped through 0?) */
+       if (address < vma->vm_start || address > vma->vm_end)
+               address = vma->vm_end;
+       return address;
 }
 
 static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
index 85ad98c..0143d32 100644 (file)
@@ -949,6 +949,17 @@ static int page_action(struct page_state *ps, struct page *p,
        return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
 }
 
+/*
+ * Return true if a page type of a given page is supported by hwpoison
+ * mechanism (while handling could fail), otherwise false.  This function
+ * does not return true for hugetlb or device memory pages, so it's assumed
+ * to be called only in the context where we never have such pages.
+ */
+static inline bool HWPoisonHandlable(struct page *page)
+{
+       return PageLRU(page) || __PageMovable(page);
+}
+
 /**
  * __get_hwpoison_page() - Get refcount for memory error handling:
  * @page:      raw error page (hit by memory error)
@@ -959,8 +970,22 @@ static int page_action(struct page_state *ps, struct page *p,
 static int __get_hwpoison_page(struct page *page)
 {
        struct page *head = compound_head(page);
+       int ret = 0;
+       bool hugetlb = false;
+
+       ret = get_hwpoison_huge_page(head, &hugetlb);
+       if (hugetlb)
+               return ret;
 
-       if (!PageHuge(head) && PageTransHuge(head)) {
+       /*
+        * This check prevents from calling get_hwpoison_unless_zero()
+        * for any unsupported type of page in order to reduce the risk of
+        * unexpected races caused by taking a page refcount.
+        */
+       if (!HWPoisonHandlable(head))
+               return 0;
+
+       if (PageTransHuge(head)) {
                /*
                 * Non anonymous thp exists only in allocation/free time. We
                 * can't handle such a case correctly, so let's give it up.
@@ -1017,7 +1042,7 @@ try_again:
                        ret = -EIO;
                }
        } else {
-               if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+               if (PageHuge(p) || HWPoisonHandlable(p)) {
                        ret = 1;
                } else {
                        /*
@@ -1527,7 +1552,12 @@ try_again:
                return 0;
        }
 
-       if (!PageTransTail(p) && !PageLRU(p))
+       /*
+        * __munlock_pagevec may clear a writeback page's LRU flag without
+        * page_lock. We need wait writeback completion for this page or it
+        * may trigger vfs BUG while evict inode.
+        */
+       if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
                goto identify_page_state;
 
        /*
index f3ffab9..486f4a2 100644 (file)
@@ -1361,7 +1361,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
                        else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                goto next;
                        /* fall through */
+               } else if (details && details->single_page &&
+                          PageTransCompound(details->single_page) &&
+                          next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
+                       spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
+                       /*
+                        * Take and drop THP pmd lock so that we cannot return
+                        * prematurely, while zap_huge_pmd() has cleared *pmd,
+                        * but not yet decremented compound_mapcount().
+                        */
+                       spin_unlock(ptl);
                }
+
                /*
                 * Here there can be other concurrent MADV_DONTNEED or
                 * trans huge page faults running, and if the pmd is
@@ -3236,6 +3247,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
        }
 }
 
+/**
+ * unmap_mapping_page() - Unmap single page from processes.
+ * @page: The locked page to be unmapped.
+ *
+ * Unmap this page from any userspace process which still has it mmaped.
+ * Typically, for efficiency, the range of nearby pages has already been
+ * unmapped by unmap_mapping_pages() or unmap_mapping_range().  But once
+ * truncation or invalidation holds the lock on a page, it may find that
+ * the page has been remapped again: and then uses unmap_mapping_page()
+ * to unmap it finally.
+ */
+void unmap_mapping_page(struct page *page)
+{
+       struct address_space *mapping = page->mapping;
+       struct zap_details details = { };
+
+       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON(PageTail(page));
+
+       details.check_mapping = mapping;
+       details.first_index = page->index;
+       details.last_index = page->index + thp_nr_pages(page) - 1;
+       details.single_page = page;
+
+       i_mmap_lock_write(mapping);
+       if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+               unmap_mapping_range_tree(&mapping->i_mmap, &details);
+       i_mmap_unlock_write(mapping);
+}
+
 /**
  * unmap_mapping_pages() - Unmap pages from processes.
  * @mapping: The address space containing pages to be unmapped.
index b234c3f..41ff2c9 100644 (file)
@@ -295,6 +295,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
                goto out;
 
        page = migration_entry_to_page(entry);
+       page = compound_head(page);
 
        /*
         * Once page cache replacement of page migration started, page_count
index 2cf01d9..e37bd43 100644 (file)
@@ -212,23 +212,34 @@ restart:
                        pvmw->ptl = NULL;
                }
        } else if (!pmd_present(pmde)) {
+               /*
+                * If PVMW_SYNC, take and drop THP pmd lock so that we
+                * cannot return prematurely, while zap_huge_pmd() has
+                * cleared *pmd but not decremented compound_mapcount().
+                */
+               if ((pvmw->flags & PVMW_SYNC) &&
+                   PageTransCompound(pvmw->page)) {
+                       spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+
+                       spin_unlock(ptl);
+               }
                return false;
        }
        if (!map_pte(pvmw))
                goto next_pte;
        while (1) {
+               unsigned long end;
+
                if (check_pte(pvmw))
                        return true;
 next_pte:
                /* Seek to next pte only makes sense for THP */
                if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
                        return not_found(pvmw);
+               end = vma_address_end(pvmw->page, pvmw->vma);
                do {
                        pvmw->address += PAGE_SIZE;
-                       if (pvmw->address >= pvmw->vma->vm_end ||
-                           pvmw->address >=
-                                       __vma_address(pvmw->page, pvmw->vma) +
-                                       thp_size(pvmw->page))
+                       if (pvmw->address >= end)
                                return not_found(pvmw);
                        /* Did we cross page table boundary? */
                        if (pvmw->address % PMD_SIZE == 0) {
@@ -266,14 +277,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
                .vma = vma,
                .flags = PVMW_SYNC,
        };
-       unsigned long start, end;
-
-       start = __vma_address(page, vma);
-       end = start + thp_size(page) - PAGE_SIZE;
 
-       if (unlikely(end < vma->vm_start || start >= vma->vm_end))
+       pvmw.address = vma_address(page, vma);
+       if (pvmw.address == -EFAULT)
                return 0;
-       pvmw.address = max(start, vma->vm_start);
        if (!page_vma_mapped_walk(&pvmw))
                return 0;
        page_vma_mapped_walk_done(&pvmw);
index c2210e1..4e640ba 100644 (file)
@@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
 {
        pmd_t pmd;
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-       VM_BUG_ON(!pmd_present(*pmdp));
-       /* Below assumes pmd_present() is true */
-       VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
+       VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+                          !pmd_devmap(*pmdp));
        pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
        flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return pmd;
index 693a610..e05c300 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -707,7 +707,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
  */
 unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 {
-       unsigned long address;
        if (PageAnon(page)) {
                struct anon_vma *page__anon_vma = page_anon_vma(page);
                /*
@@ -717,15 +716,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
                if (!vma->anon_vma || !page__anon_vma ||
                    vma->anon_vma->root != page__anon_vma->root)
                        return -EFAULT;
-       } else if (page->mapping) {
-               if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
-                       return -EFAULT;
-       } else
+       } else if (!vma->vm_file) {
                return -EFAULT;
-       address = __vma_address(page, vma);
-       if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+       } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
                return -EFAULT;
-       return address;
+       }
+
+       return vma_address(page, vma);
 }
 
 pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
@@ -919,7 +916,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
         */
        mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
                                0, vma, vma->vm_mm, address,
-                               min(vma->vm_end, address + page_size(page)));
+                               vma_address_end(page, vma));
        mmu_notifier_invalidate_range_start(&range);
 
        while (page_vma_mapped_walk(&pvmw)) {
@@ -1405,6 +1402,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
        struct mmu_notifier_range range;
        enum ttu_flags flags = (enum ttu_flags)(long)arg;
 
+       /*
+        * When racing against e.g. zap_pte_range() on another cpu,
+        * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+        * try_to_unmap() may return false when it is about to become true,
+        * if page table locking is skipped: use TTU_SYNC to wait for that.
+        */
+       if (flags & TTU_SYNC)
+               pvmw.flags = PVMW_SYNC;
+
        /* munlock has nothing to gain from examining un-locked vmas */
        if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
                return true;
@@ -1426,9 +1432,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         * Note that the page can not be free in this function as call of
         * try_to_unmap() must hold a reference on the page.
         */
+       range.end = PageKsm(page) ?
+                       address + PAGE_SIZE : vma_address_end(page, vma);
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
-                               address,
-                               min(vma->vm_end, address + page_size(page)));
+                               address, range.end);
        if (PageHuge(page)) {
                /*
                 * If sharing is possible, start and end will be adjusted
@@ -1777,7 +1784,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
        else
                rmap_walk(page, &rwc);
 
-       return !page_mapcount(page) ? true : false;
+       /*
+        * When racing against e.g. zap_pte_range() on another cpu,
+        * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+        * try_to_unmap() may return false when it is about to become true,
+        * if page table locking is skipped: use TTU_SYNC to wait for that.
+        */
+       return !page_mapcount(page);
 }
 
 /**
@@ -1874,6 +1887,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
                struct vm_area_struct *vma = avc->vma;
                unsigned long address = vma_address(page, vma);
 
+               VM_BUG_ON_VMA(address == -EFAULT, vma);
                cond_resched();
 
                if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
@@ -1928,6 +1942,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
                        pgoff_start, pgoff_end) {
                unsigned long address = vma_address(page, vma);
 
+               VM_BUG_ON_VMA(address == -EFAULT, vma);
                cond_resched();
 
                if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
index a4a5714..7cab776 100644 (file)
@@ -97,8 +97,7 @@ EXPORT_SYMBOL(kmem_cache_size);
 #ifdef CONFIG_DEBUG_VM
 static int kmem_cache_sanity_check(const char *name, unsigned int size)
 {
-       if (!name || in_interrupt() || size < sizeof(void *) ||
-               size > KMALLOC_MAX_SIZE) {
+       if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
                pr_err("kmem_cache_create(%s) integrity check failed\n", name);
                return -EINVAL;
        }
index 3f96e09..61bd40e 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/bit_spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/swab.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include "slab.h"
@@ -712,15 +713,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
               p, p - addr, get_freepointer(s, p));
 
        if (s->flags & SLAB_RED_ZONE)
-               print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
+               print_section(KERN_ERR, "Redzone  ", p - s->red_left_pad,
                              s->red_left_pad);
        else if (p > addr + 16)
                print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
 
-       print_section(KERN_ERR, "Object ", p,
+       print_section(KERN_ERR,         "Object   ", p,
                      min_t(unsigned int, s->object_size, PAGE_SIZE));
        if (s->flags & SLAB_RED_ZONE)
-               print_section(KERN_ERR, "Redzone ", p + s->object_size,
+               print_section(KERN_ERR, "Redzone  ", p + s->object_size,
                        s->inuse - s->object_size);
 
        off = get_info_end(s);
@@ -732,7 +733,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 
        if (off != size_from_object(s))
                /* Beginning of the filler is the free pointer */
-               print_section(KERN_ERR, "Padding ", p + off,
+               print_section(KERN_ERR, "Padding  ", p + off,
                              size_from_object(s) - off);
 
        dump_stack();
@@ -909,11 +910,11 @@ static int check_object(struct kmem_cache *s, struct page *page,
        u8 *endobject = object + s->object_size;
 
        if (s->flags & SLAB_RED_ZONE) {
-               if (!check_bytes_and_report(s, page, object, "Redzone",
+               if (!check_bytes_and_report(s, page, object, "Left Redzone",
                        object - s->red_left_pad, val, s->red_left_pad))
                        return 0;
 
-               if (!check_bytes_and_report(s, page, object, "Redzone",
+               if (!check_bytes_and_report(s, page, object, "Right Redzone",
                        endobject, val, s->inuse - s->object_size))
                        return 0;
        } else {
@@ -928,7 +929,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
                if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
                        (!check_bytes_and_report(s, page, p, "Poison", p,
                                        POISON_FREE, s->object_size - 1) ||
-                        !check_bytes_and_report(s, page, p, "Poison",
+                        !check_bytes_and_report(s, page, p, "End Poison",
                                p + s->object_size - 1, POISON_END, 1)))
                        return 0;
                /*
@@ -3689,7 +3690,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 {
        slab_flags_t flags = s->flags;
        unsigned int size = s->object_size;
-       unsigned int freepointer_area;
        unsigned int order;
 
        /*
@@ -3698,13 +3698,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
         * the possible location of the free pointer.
         */
        size = ALIGN(size, sizeof(void *));
-       /*
-        * This is the area of the object where a freepointer can be
-        * safely written. If redzoning adds more to the inuse size, we
-        * can't use that portion for writing the freepointer, so
-        * s->offset must be limited within this for the general case.
-        */
-       freepointer_area = size;
 
 #ifdef CONFIG_SLUB_DEBUG
        /*
@@ -3730,19 +3723,21 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 
        /*
         * With that we have determined the number of bytes in actual use
-        * by the object. This is the potential offset to the free pointer.
+        * by the object and redzoning.
         */
        s->inuse = size;
 
-       if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
-               s->ctor)) {
+       if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
+           ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
+           s->ctor) {
                /*
                 * Relocate free pointer after the object if it is not
                 * permitted to overwrite the first word of the object on
                 * kmem_cache_free.
                 *
                 * This is the case if we do RCU, have a constructor or
-                * destructor or are poisoning the objects.
+                * destructor, are poisoning the objects, or are
+                * redzoning an object smaller than sizeof(void *).
                 *
                 * The assumption that s->offset >= s->inuse means free
                 * pointer is outside of the object is used in the
@@ -3751,13 +3746,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
                 */
                s->offset = size;
                size += sizeof(void *);
-       } else if (freepointer_area > sizeof(void *)) {
+       } else {
                /*
                 * Store freelist pointer near middle of object to keep
                 * it away from the edges of the object to avoid small
                 * sized over/underflows from neighboring allocations.
                 */
-               s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
+               s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
        }
 
 #ifdef CONFIG_SLUB_DEBUG
index b2ada9d..55c18af 100644 (file)
@@ -344,6 +344,15 @@ size_t mem_section_usage_size(void)
        return sizeof(struct mem_section_usage) + usemap_size();
 }
 
+static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
+{
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+       return __pa_symbol(pgdat);
+#else
+       return __pa(pgdat);
+#endif
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static struct mem_section_usage * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
@@ -362,7 +371,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
         * from the same section as the pgdat where possible to avoid
         * this problem.
         */
-       goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
+       goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
        limit = goal + (1UL << PA_SECTION_SHIFT);
        nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 again:
@@ -390,7 +399,7 @@ static void __init check_usemap_section_nr(int nid,
        }
 
        usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
-       pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+       pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT);
        if (usemap_snr == pgdat_snr)
                return;
 
index 149e774..996afa8 100644 (file)
@@ -1900,7 +1900,7 @@ unsigned int count_swap_pages(int type, int free)
 
 static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte)
 {
-       return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte);
+       return pte_same(pte_swp_clear_flags(pte), swp_pte);
 }
 
 /*
index 95af244..234ddd8 100644 (file)
@@ -167,13 +167,10 @@ void do_invalidatepage(struct page *page, unsigned int offset,
  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
  */
-static void
-truncate_cleanup_page(struct address_space *mapping, struct page *page)
+static void truncate_cleanup_page(struct page *page)
 {
-       if (page_mapped(page)) {
-               unsigned int nr = thp_nr_pages(page);
-               unmap_mapping_pages(mapping, page->index, nr, false);
-       }
+       if (page_mapped(page))
+               unmap_mapping_page(page);
 
        if (page_has_private(page))
                do_invalidatepage(page, 0, thp_size(page));
@@ -218,7 +215,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
        if (page->mapping != mapping)
                return -EIO;
 
-       truncate_cleanup_page(mapping, page);
+       truncate_cleanup_page(page);
        delete_from_page_cache(page);
        return 0;
 }
@@ -325,7 +322,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
                index = indices[pagevec_count(&pvec) - 1] + 1;
                truncate_exceptional_pvec_entries(mapping, &pvec, indices);
                for (i = 0; i < pagevec_count(&pvec); i++)
-                       truncate_cleanup_page(mapping, pvec.pages[i]);
+                       truncate_cleanup_page(pvec.pages[i]);
                delete_from_page_cache_batch(mapping, &pvec);
                for (i = 0; i < pagevec_count(&pvec); i++)
                        unlock_page(pvec.pages[i]);
@@ -639,6 +636,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                continue;
                        }
 
+                       if (!did_range_unmap && page_mapped(page)) {
+                               /*
+                                * If page is mapped, before taking its lock,
+                                * zap the rest of the file in one hit.
+                                */
+                               unmap_mapping_pages(mapping, index,
+                                               (1 + end - index), false);
+                               did_range_unmap = 1;
+                       }
+
                        lock_page(page);
                        WARN_ON(page_to_index(page) != index);
                        if (page->mapping != mapping) {
@@ -646,23 +653,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                continue;
                        }
                        wait_on_page_writeback(page);
-                       if (page_mapped(page)) {
-                               if (!did_range_unmap) {
-                                       /*
-                                        * Zap the rest of the file in one hit.
-                                        */
-                                       unmap_mapping_pages(mapping, index,
-                                               (1 + end - index), false);
-                                       did_range_unmap = 1;
-                               } else {
-                                       /*
-                                        * Just zap this page
-                                        */
-                                       unmap_mapping_pages(mapping, index,
-                                                               1, false);
-                               }
-                       }
+
+                       if (page_mapped(page))
+                               unmap_mapping_page(page);
                        BUG_ON(page_mapped(page));
+
                        ret2 = do_launder_page(mapping, page);
                        if (ret2 == 0) {
                                if (!invalidate_complete_page2(mapping, page))
index be18af4..c7236da 100644 (file)
@@ -768,7 +768,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev,
        if (a && a->status & ATIF_PROBE) {
                a->status |= ATIF_PROBE_FAIL;
                /*
-                * we do not respond to probe or request packets for
+                * we do not respond to probe or request packets of
                 * this address while we are probing this address
                 */
                goto unlock;
index 789f257..fc8be49 100644 (file)
@@ -409,8 +409,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
        if (WARN_ON(!forw_packet->if_outgoing))
                return;
 
-       if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
+       if (forw_packet->if_outgoing->soft_iface != soft_iface) {
+               pr_warn("%s: soft interface switch for queued OGM\n", __func__);
                return;
+       }
 
        if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
                return;
index 372e3b2..7dd51da 100644 (file)
@@ -3229,7 +3229,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
 {
        struct l2cap_chan *chan;
 
-       bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan);
+       BT_DBG("pchan %p", pchan);
 
        chan = l2cap_chan_create();
        if (!chan)
@@ -3250,7 +3250,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
         */
        atomic_set(&chan->nesting, L2CAP_NESTING_SMP);
 
-       bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan);
+       BT_DBG("created chan %p", chan);
 
        return chan;
 }
@@ -3354,7 +3354,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
 {
        struct smp_dev *smp;
 
-       bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan);
+       BT_DBG("chan %p", chan);
 
        smp = chan->data;
        if (smp) {
index 7ce8a77..e013d33 100644 (file)
@@ -90,8 +90,8 @@ struct bridge_mcast_stats {
 #endif
 
 struct br_tunnel_info {
-       __be64                  tunnel_id;
-       struct metadata_dst     *tunnel_dst;
+       __be64                          tunnel_id;
+       struct metadata_dst __rcu       *tunnel_dst;
 };
 
 /* private vlan flags */
index 0d3a8c0..0101744 100644 (file)
@@ -41,26 +41,33 @@ static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl,
                                      br_vlan_tunnel_rht_params);
 }
 
+static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan)
+{
+       struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst);
+
+       WRITE_ONCE(vlan->tinfo.tunnel_id, 0);
+       RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL);
+       dst_release(&tdst->dst);
+}
+
 void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
                          struct net_bridge_vlan *vlan)
 {
-       if (!vlan->tinfo.tunnel_dst)
+       if (!rcu_access_pointer(vlan->tinfo.tunnel_dst))
                return;
        rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode,
                               br_vlan_tunnel_rht_params);
-       vlan->tinfo.tunnel_id = 0;
-       dst_release(&vlan->tinfo.tunnel_dst->dst);
-       vlan->tinfo.tunnel_dst = NULL;
+       vlan_tunnel_info_release(vlan);
 }
 
 static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
                                  struct net_bridge_vlan *vlan, u32 tun_id)
 {
-       struct metadata_dst *metadata = NULL;
+       struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
        __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
        int err;
 
-       if (vlan->tinfo.tunnel_dst)
+       if (metadata)
                return -EEXIST;
 
        metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
@@ -69,8 +76,8 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
                return -EINVAL;
 
        metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE;
-       vlan->tinfo.tunnel_dst = metadata;
-       vlan->tinfo.tunnel_id = key;
+       rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata);
+       WRITE_ONCE(vlan->tinfo.tunnel_id, key);
 
        err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode,
                                            br_vlan_tunnel_rht_params);
@@ -79,9 +86,7 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
 
        return 0;
 out:
-       dst_release(&vlan->tinfo.tunnel_dst->dst);
-       vlan->tinfo.tunnel_dst = NULL;
-       vlan->tinfo.tunnel_id = 0;
+       vlan_tunnel_info_release(vlan);
 
        return err;
 }
@@ -182,12 +187,15 @@ int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
 int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
                                 struct net_bridge_vlan *vlan)
 {
+       struct metadata_dst *tunnel_dst;
+       __be64 tunnel_id;
        int err;
 
-       if (!vlan || !vlan->tinfo.tunnel_id)
+       if (!vlan)
                return 0;
 
-       if (unlikely(!skb_vlan_tag_present(skb)))
+       tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id);
+       if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb)))
                return 0;
 
        skb_dst_drop(skb);
@@ -195,7 +203,9 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
        if (err)
                return err;
 
-       skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst));
+       tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
+       if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
+               skb_dst_set(skb, &tunnel_dst->dst);
 
        return 0;
 }
index 909b9e6..f3e4d95 100644 (file)
@@ -125,7 +125,7 @@ struct bcm_sock {
        struct sock sk;
        int bound;
        int ifindex;
-       struct notifier_block notifier;
+       struct list_head notifier;
        struct list_head rx_ops;
        struct list_head tx_ops;
        unsigned long dropped_usr_msgs;
@@ -133,6 +133,10 @@ struct bcm_sock {
        char procname [32]; /* inode number in decimal with \0 */
 };
 
+static LIST_HEAD(bcm_notifier_list);
+static DEFINE_SPINLOCK(bcm_notifier_lock);
+static struct bcm_sock *bcm_busy_notifier;
+
 static inline struct bcm_sock *bcm_sk(const struct sock *sk)
 {
        return (struct bcm_sock *)sk;
@@ -402,6 +406,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
                if (!op->count && (op->flags & TX_COUNTEVT)) {
 
                        /* create notification to user */
+                       memset(&msg_head, 0, sizeof(msg_head));
                        msg_head.opcode  = TX_EXPIRED;
                        msg_head.flags   = op->flags;
                        msg_head.count   = op->count;
@@ -439,6 +444,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
        /* this element is not throttled anymore */
        data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
 
+       memset(&head, 0, sizeof(head));
        head.opcode  = RX_CHANGED;
        head.flags   = op->flags;
        head.count   = op->count;
@@ -560,6 +566,7 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
        }
 
        /* create notification to user */
+       memset(&msg_head, 0, sizeof(msg_head));
        msg_head.opcode  = RX_TIMEOUT;
        msg_head.flags   = op->flags;
        msg_head.count   = op->count;
@@ -1378,20 +1385,15 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 /*
  * notification handler for netdevice status changes
  */
-static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
-                       void *ptr)
+static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
+                      struct net_device *dev)
 {
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-       struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
        struct sock *sk = &bo->sk;
        struct bcm_op *op;
        int notify_enodev = 0;
 
        if (!net_eq(dev_net(dev), sock_net(sk)))
-               return NOTIFY_DONE;
-
-       if (dev->type != ARPHRD_CAN)
-               return NOTIFY_DONE;
+               return;
 
        switch (msg) {
 
@@ -1426,7 +1428,28 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
                                sk->sk_error_report(sk);
                }
        }
+}
 
+static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
+                       void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+       if (dev->type != ARPHRD_CAN)
+               return NOTIFY_DONE;
+       if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+               return NOTIFY_DONE;
+       if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */
+               return NOTIFY_DONE;
+
+       spin_lock(&bcm_notifier_lock);
+       list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) {
+               spin_unlock(&bcm_notifier_lock);
+               bcm_notify(bcm_busy_notifier, msg, dev);
+               spin_lock(&bcm_notifier_lock);
+       }
+       bcm_busy_notifier = NULL;
+       spin_unlock(&bcm_notifier_lock);
        return NOTIFY_DONE;
 }
 
@@ -1446,9 +1469,9 @@ static int bcm_init(struct sock *sk)
        INIT_LIST_HEAD(&bo->rx_ops);
 
        /* set notifier */
-       bo->notifier.notifier_call = bcm_notifier;
-
-       register_netdevice_notifier(&bo->notifier);
+       spin_lock(&bcm_notifier_lock);
+       list_add_tail(&bo->notifier, &bcm_notifier_list);
+       spin_unlock(&bcm_notifier_lock);
 
        return 0;
 }
@@ -1471,7 +1494,14 @@ static int bcm_release(struct socket *sock)
 
        /* remove bcm_ops, timer, rx_unregister(), etc. */
 
-       unregister_netdevice_notifier(&bo->notifier);
+       spin_lock(&bcm_notifier_lock);
+       while (bcm_busy_notifier == bo) {
+               spin_unlock(&bcm_notifier_lock);
+               schedule_timeout_uninterruptible(1);
+               spin_lock(&bcm_notifier_lock);
+       }
+       list_del(&bo->notifier);
+       spin_unlock(&bcm_notifier_lock);
 
        lock_sock(sk);
 
@@ -1692,6 +1722,10 @@ static struct pernet_operations canbcm_pernet_ops __read_mostly = {
        .exit = canbcm_pernet_exit,
 };
 
+static struct notifier_block canbcm_notifier = {
+       .notifier_call = bcm_notifier
+};
+
 static int __init bcm_module_init(void)
 {
        int err;
@@ -1705,12 +1739,14 @@ static int __init bcm_module_init(void)
        }
 
        register_pernet_subsys(&canbcm_pernet_ops);
+       register_netdevice_notifier(&canbcm_notifier);
        return 0;
 }
 
 static void __exit bcm_module_exit(void)
 {
        can_proto_unregister(&bcm_can_proto);
+       unregister_netdevice_notifier(&canbcm_notifier);
        unregister_pernet_subsys(&canbcm_pernet_ops);
 }
 
index 253b244..be6183f 100644 (file)
@@ -143,10 +143,14 @@ struct isotp_sock {
        u32 force_tx_stmin;
        u32 force_rx_stmin;
        struct tpcon rx, tx;
-       struct notifier_block notifier;
+       struct list_head notifier;
        wait_queue_head_t wait;
 };
 
+static LIST_HEAD(isotp_notifier_list);
+static DEFINE_SPINLOCK(isotp_notifier_lock);
+static struct isotp_sock *isotp_busy_notifier;
+
 static inline struct isotp_sock *isotp_sk(const struct sock *sk)
 {
        return (struct isotp_sock *)sk;
@@ -1013,7 +1017,14 @@ static int isotp_release(struct socket *sock)
        /* wait for complete transmission of current pdu */
        wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
 
-       unregister_netdevice_notifier(&so->notifier);
+       spin_lock(&isotp_notifier_lock);
+       while (isotp_busy_notifier == so) {
+               spin_unlock(&isotp_notifier_lock);
+               schedule_timeout_uninterruptible(1);
+               spin_lock(&isotp_notifier_lock);
+       }
+       list_del(&so->notifier);
+       spin_unlock(&isotp_notifier_lock);
 
        lock_sock(sk);
 
@@ -1317,21 +1328,16 @@ static int isotp_getsockopt(struct socket *sock, int level, int optname,
        return 0;
 }
 
-static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
-                         void *ptr)
+static void isotp_notify(struct isotp_sock *so, unsigned long msg,
+                        struct net_device *dev)
 {
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-       struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier);
        struct sock *sk = &so->sk;
 
        if (!net_eq(dev_net(dev), sock_net(sk)))
-               return NOTIFY_DONE;
-
-       if (dev->type != ARPHRD_CAN)
-               return NOTIFY_DONE;
+               return;
 
        if (so->ifindex != dev->ifindex)
-               return NOTIFY_DONE;
+               return;
 
        switch (msg) {
        case NETDEV_UNREGISTER:
@@ -1357,7 +1363,28 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
                        sk->sk_error_report(sk);
                break;
        }
+}
 
+static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
+                         void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+       if (dev->type != ARPHRD_CAN)
+               return NOTIFY_DONE;
+       if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+               return NOTIFY_DONE;
+       if (unlikely(isotp_busy_notifier)) /* Check for reentrant bug. */
+               return NOTIFY_DONE;
+
+       spin_lock(&isotp_notifier_lock);
+       list_for_each_entry(isotp_busy_notifier, &isotp_notifier_list, notifier) {
+               spin_unlock(&isotp_notifier_lock);
+               isotp_notify(isotp_busy_notifier, msg, dev);
+               spin_lock(&isotp_notifier_lock);
+       }
+       isotp_busy_notifier = NULL;
+       spin_unlock(&isotp_notifier_lock);
        return NOTIFY_DONE;
 }
 
@@ -1394,8 +1421,9 @@ static int isotp_init(struct sock *sk)
 
        init_waitqueue_head(&so->wait);
 
-       so->notifier.notifier_call = isotp_notifier;
-       register_netdevice_notifier(&so->notifier);
+       spin_lock(&isotp_notifier_lock);
+       list_add_tail(&so->notifier, &isotp_notifier_list);
+       spin_unlock(&isotp_notifier_lock);
 
        return 0;
 }
@@ -1442,6 +1470,10 @@ static const struct can_proto isotp_can_proto = {
        .prot = &isotp_proto,
 };
 
+static struct notifier_block canisotp_notifier = {
+       .notifier_call = isotp_notifier
+};
+
 static __init int isotp_module_init(void)
 {
        int err;
@@ -1451,6 +1483,8 @@ static __init int isotp_module_init(void)
        err = can_proto_register(&isotp_can_proto);
        if (err < 0)
                pr_err("can: registration of isotp protocol failed\n");
+       else
+               register_netdevice_notifier(&canisotp_notifier);
 
        return err;
 }
@@ -1458,6 +1492,7 @@ static __init int isotp_module_init(void)
 static __exit void isotp_module_exit(void)
 {
        can_proto_unregister(&isotp_can_proto);
+       unregister_netdevice_notifier(&canisotp_notifier);
 }
 
 module_init(isotp_module_init);
index e09d087..c3946c3 100644 (file)
@@ -330,6 +330,9 @@ static void j1939_session_skb_drop_old(struct j1939_session *session)
 
        if ((do_skcb->offset + do_skb->len) < offset_start) {
                __skb_unlink(do_skb, &session->skb_queue);
+               /* drop ref taken in j1939_session_skb_queue() */
+               skb_unref(do_skb);
+
                kfree_skb(do_skb);
        }
        spin_unlock_irqrestore(&session->skb_queue.lock, flags);
@@ -349,12 +352,13 @@ void j1939_session_skb_queue(struct j1939_session *session,
 
        skcb->flags |= J1939_ECU_LOCAL_SRC;
 
+       skb_get(skb);
        skb_queue_tail(&session->skb_queue, skb);
 }
 
 static struct
-sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
-                                         unsigned int offset_start)
+sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session,
+                                        unsigned int offset_start)
 {
        struct j1939_priv *priv = session->priv;
        struct j1939_sk_buff_cb *do_skcb;
@@ -371,6 +375,10 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
                        skb = do_skb;
                }
        }
+
+       if (skb)
+               skb_get(skb);
+
        spin_unlock_irqrestore(&session->skb_queue.lock, flags);
 
        if (!skb)
@@ -381,12 +389,12 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
        return skb;
 }
 
-static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+static struct sk_buff *j1939_session_skb_get(struct j1939_session *session)
 {
        unsigned int offset_start;
 
        offset_start = session->pkt.dpo * 7;
-       return j1939_session_skb_find_by_offset(session, offset_start);
+       return j1939_session_skb_get_by_offset(session, offset_start);
 }
 
 /* see if we are receiver
@@ -776,7 +784,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
        int ret = 0;
        u8 dat[8];
 
-       se_skb = j1939_session_skb_find_by_offset(session, session->pkt.tx * 7);
+       se_skb = j1939_session_skb_get_by_offset(session, session->pkt.tx * 7);
        if (!se_skb)
                return -ENOBUFS;
 
@@ -801,7 +809,8 @@ static int j1939_session_tx_dat(struct j1939_session *session)
                        netdev_err_once(priv->ndev,
                                        "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
                                        __func__, session, skcb->offset, se_skb->len , session->pkt.tx);
-                       return -EOVERFLOW;
+                       ret = -EOVERFLOW;
+                       goto out_free;
                }
 
                if (!len) {
@@ -835,6 +844,12 @@ static int j1939_session_tx_dat(struct j1939_session *session)
        if (pkt_done)
                j1939_tp_set_rxtimeout(session, 250);
 
+ out_free:
+       if (ret)
+               kfree_skb(se_skb);
+       else
+               consume_skb(se_skb);
+
        return ret;
 }
 
@@ -1007,7 +1022,7 @@ static int j1939_xtp_txnext_receiver(struct j1939_session *session)
 static int j1939_simple_txnext(struct j1939_session *session)
 {
        struct j1939_priv *priv = session->priv;
-       struct sk_buff *se_skb = j1939_session_skb_find(session);
+       struct sk_buff *se_skb = j1939_session_skb_get(session);
        struct sk_buff *skb;
        int ret;
 
@@ -1015,8 +1030,10 @@ static int j1939_simple_txnext(struct j1939_session *session)
                return 0;
 
        skb = skb_clone(se_skb, GFP_ATOMIC);
-       if (!skb)
-               return -ENOMEM;
+       if (!skb) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
 
        can_skb_set_owner(skb, se_skb->sk);
 
@@ -1024,12 +1041,18 @@ static int j1939_simple_txnext(struct j1939_session *session)
 
        ret = j1939_send_one(priv, skb);
        if (ret)
-               return ret;
+               goto out_free;
 
        j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
        j1939_sk_queue_activate_next(session);
 
-       return 0;
+ out_free:
+       if (ret)
+               kfree_skb(se_skb);
+       else
+               consume_skb(se_skb);
+
+       return ret;
 }
 
 static bool j1939_session_deactivate_locked(struct j1939_session *session)
@@ -1170,9 +1193,10 @@ static void j1939_session_completed(struct j1939_session *session)
        struct sk_buff *skb;
 
        if (!session->transmission) {
-               skb = j1939_session_skb_find(session);
+               skb = j1939_session_skb_get(session);
                /* distribute among j1939 receivers */
                j1939_sk_recv(session->priv, skb);
+               consume_skb(skb);
        }
 
        j1939_session_deactivate_activate_next(session);
@@ -1744,7 +1768,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 {
        struct j1939_priv *priv = session->priv;
        struct j1939_sk_buff_cb *skcb;
-       struct sk_buff *se_skb;
+       struct sk_buff *se_skb = NULL;
        const u8 *dat;
        u8 *tpdat;
        int offset;
@@ -1786,7 +1810,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
                goto out_session_cancel;
        }
 
-       se_skb = j1939_session_skb_find_by_offset(session, packet * 7);
+       se_skb = j1939_session_skb_get_by_offset(session, packet * 7);
        if (!se_skb) {
                netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__,
                            session);
@@ -1848,11 +1872,13 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
                j1939_tp_set_rxtimeout(session, 250);
        }
        session->last_cmd = 0xff;
+       consume_skb(se_skb);
        j1939_session_put(session);
 
        return;
 
  out_session_cancel:
+       kfree_skb(se_skb);
        j1939_session_timers_cancel(session);
        j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
        j1939_session_put(session);
index 139d947..ac96fc2 100644 (file)
@@ -83,7 +83,7 @@ struct raw_sock {
        struct sock sk;
        int bound;
        int ifindex;
-       struct notifier_block notifier;
+       struct list_head notifier;
        int loopback;
        int recv_own_msgs;
        int fd_frames;
@@ -95,6 +95,10 @@ struct raw_sock {
        struct uniqframe __percpu *uniq;
 };
 
+static LIST_HEAD(raw_notifier_list);
+static DEFINE_SPINLOCK(raw_notifier_lock);
+static struct raw_sock *raw_busy_notifier;
+
 /* Return pointer to store the extra msg flags for raw_recvmsg().
  * We use the space of one unsigned int beyond the 'struct sockaddr_can'
  * in skb->cb.
@@ -263,21 +267,16 @@ static int raw_enable_allfilters(struct net *net, struct net_device *dev,
        return err;
 }
 
-static int raw_notifier(struct notifier_block *nb,
-                       unsigned long msg, void *ptr)
+static void raw_notify(struct raw_sock *ro, unsigned long msg,
+                      struct net_device *dev)
 {
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-       struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
        struct sock *sk = &ro->sk;
 
        if (!net_eq(dev_net(dev), sock_net(sk)))
-               return NOTIFY_DONE;
-
-       if (dev->type != ARPHRD_CAN)
-               return NOTIFY_DONE;
+               return;
 
        if (ro->ifindex != dev->ifindex)
-               return NOTIFY_DONE;
+               return;
 
        switch (msg) {
        case NETDEV_UNREGISTER:
@@ -305,7 +304,28 @@ static int raw_notifier(struct notifier_block *nb,
                        sk->sk_error_report(sk);
                break;
        }
+}
+
+static int raw_notifier(struct notifier_block *nb, unsigned long msg,
+                       void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+       if (dev->type != ARPHRD_CAN)
+               return NOTIFY_DONE;
+       if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+               return NOTIFY_DONE;
+       if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */
+               return NOTIFY_DONE;
 
+       spin_lock(&raw_notifier_lock);
+       list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) {
+               spin_unlock(&raw_notifier_lock);
+               raw_notify(raw_busy_notifier, msg, dev);
+               spin_lock(&raw_notifier_lock);
+       }
+       raw_busy_notifier = NULL;
+       spin_unlock(&raw_notifier_lock);
        return NOTIFY_DONE;
 }
 
@@ -334,9 +354,9 @@ static int raw_init(struct sock *sk)
                return -ENOMEM;
 
        /* set notifier */
-       ro->notifier.notifier_call = raw_notifier;
-
-       register_netdevice_notifier(&ro->notifier);
+       spin_lock(&raw_notifier_lock);
+       list_add_tail(&ro->notifier, &raw_notifier_list);
+       spin_unlock(&raw_notifier_lock);
 
        return 0;
 }
@@ -351,7 +371,14 @@ static int raw_release(struct socket *sock)
 
        ro = raw_sk(sk);
 
-       unregister_netdevice_notifier(&ro->notifier);
+       spin_lock(&raw_notifier_lock);
+       while (raw_busy_notifier == ro) {
+               spin_unlock(&raw_notifier_lock);
+               schedule_timeout_uninterruptible(1);
+               spin_lock(&raw_notifier_lock);
+       }
+       list_del(&ro->notifier);
+       spin_unlock(&raw_notifier_lock);
 
        lock_sock(sk);
 
@@ -889,6 +916,10 @@ static const struct can_proto raw_can_proto = {
        .prot       = &raw_proto,
 };
 
+static struct notifier_block canraw_notifier = {
+       .notifier_call = raw_notifier
+};
+
 static __init int raw_module_init(void)
 {
        int err;
@@ -898,6 +929,8 @@ static __init int raw_module_init(void)
        err = can_proto_register(&raw_can_proto);
        if (err < 0)
                pr_err("can: registration of raw protocol failed\n");
+       else
+               register_netdevice_notifier(&canraw_notifier);
 
        return err;
 }
@@ -905,6 +938,7 @@ static __init int raw_module_init(void)
 static __exit void raw_module_exit(void)
 {
        can_proto_unregister(&raw_can_proto);
+       unregister_netdevice_notifier(&canraw_notifier);
 }
 
 module_init(raw_module_init);
index 98f20ef..bf77457 100644 (file)
@@ -238,6 +238,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 
                        write_lock(&n->lock);
                        if ((n->nud_state == NUD_FAILED) ||
+                           (n->nud_state == NUD_NOARP) ||
                            (tbl->is_multicast &&
                             tbl->is_multicast(n->primary_key)) ||
                            time_after(tref, n->updated))
index 43b6ac4..9b5a767 100644 (file)
@@ -641,6 +641,18 @@ void __put_net(struct net *net)
 }
 EXPORT_SYMBOL_GPL(__put_net);
 
+/**
+ * get_net_ns - increment the refcount of the network namespace
+ * @ns: common namespace (net)
+ *
+ * Returns the net's common namespace.
+ */
+struct ns_common *get_net_ns(struct ns_common *ns)
+{
+       return &get_net(container_of(ns, struct net, ns))->ns;
+}
+EXPORT_SYMBOL_GPL(get_net_ns);
+
 struct net *get_net_ns_by_fd(int fd)
 {
        struct file *file;
@@ -660,14 +672,8 @@ struct net *get_net_ns_by_fd(int fd)
        fput(file);
        return net;
 }
-
-#else
-struct net *get_net_ns_by_fd(int fd)
-{
-       return ERR_PTR(-EINVAL);
-}
-#endif
 EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
+#endif
 
 struct net *get_net_ns_by_pid(pid_t pid)
 {
index 3e84279..ec931b0 100644 (file)
@@ -4842,10 +4842,12 @@ static int rtnl_bridge_notify(struct net_device *dev)
        if (err < 0)
                goto errout;
 
-       if (!skb->len) {
-               err = -EINVAL;
+       /* Notification info is only filled for bridge ports, not the bridge
+        * device itself. Therefore, a zero notification length is valid and
+        * should not result in an error.
+        */
+       if (!skb->len)
                goto errout;
-       }
 
        rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
        return 0;
index 3ad2287..bbc3b4b 100644 (file)
@@ -1253,6 +1253,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
        struct sock *sk = skb->sk;
        struct sk_buff_head *q;
        unsigned long flags;
+       bool is_zerocopy;
        u32 lo, hi;
        u16 len;
 
@@ -1267,6 +1268,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
        len = uarg->len;
        lo = uarg->id;
        hi = uarg->id + len - 1;
+       is_zerocopy = uarg->zerocopy;
 
        serr = SKB_EXT_ERR(skb);
        memset(serr, 0, sizeof(*serr));
@@ -1274,7 +1276,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
        serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
        serr->ee.ee_data = hi;
        serr->ee.ee_info = lo;
-       if (!uarg->zerocopy)
+       if (!is_zerocopy)
                serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
 
        q = &sk->sk_error_queue;
index 2a6733a..5d38e90 100644 (file)
@@ -95,7 +95,7 @@ static int get_module_eeprom_by_page(struct net_device *dev,
        if (dev->sfp_bus)
                return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack);
 
-       if (ops->get_module_info)
+       if (ops->get_module_eeprom_by_page)
                return ops->get_module_eeprom_by_page(dev, page_data, extack);
 
        return -EOPNOTSUPP;
index 3fa7a39..baa5d10 100644 (file)
@@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr,
        if (eeprom.offset + eeprom.len > total_len)
                return -EINVAL;
 
-       data = kmalloc(PAGE_SIZE, GFP_USER);
+       data = kzalloc(PAGE_SIZE, GFP_USER);
        if (!data)
                return -ENOMEM;
 
@@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
        if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
                return -EINVAL;
 
-       data = kmalloc(PAGE_SIZE, GFP_USER);
+       data = kzalloc(PAGE_SIZE, GFP_USER);
        if (!data)
                return -ENOMEM;
 
@@ -1765,7 +1765,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
                return -EFAULT;
 
        test.len = test_len;
-       data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
+       data = kcalloc(test_len, sizeof(u64), GFP_USER);
        if (!data)
                return -ENOMEM;
 
@@ -2293,7 +2293,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
        ret = ethtool_tunable_valid(&tuna);
        if (ret)
                return ret;
-       data = kmalloc(tuna.len, GFP_USER);
+       data = kzalloc(tuna.len, GFP_USER);
        if (!data)
                return -ENOMEM;
        ret = ops->get_tunable(dev, &tuna, data);
@@ -2485,7 +2485,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
        ret = ethtool_phy_tunable_valid(&tuna);
        if (ret)
                return ret;
-       data = kmalloc(tuna.len, GFP_USER);
+       data = kzalloc(tuna.len, GFP_USER);
        if (!data)
                return -ENOMEM;
        if (phy_drv_tunable) {
index b3029ff..2d51b7a 100644 (file)
@@ -353,6 +353,8 @@ static int strset_reply_size(const struct ethnl_req_info *req_base,
        int len = 0;
        int ret;
 
+       len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */
+
        for (i = 0; i < ETH_SS_COUNT; i++) {
                const struct strset_info *set_info = &data->sets[i];
 
index f17870e..2f94d22 100644 (file)
@@ -575,7 +575,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
                        return err;
        }
 
-       if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+       if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
                return -EAGAIN;
        return sk->sk_prot->connect(sk, uaddr, addr_len);
 }
@@ -803,7 +803,7 @@ int inet_send_prepare(struct sock *sk)
        sock_rps_record_flow(sk);
 
        /* We may need to bind the socket. */
-       if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+       if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
            inet_autobind(sk))
                return -EAGAIN;
 
index bfaf327..e0480c6 100644 (file)
@@ -472,6 +472,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
                kfree(doi_def->map.std->lvl.local);
                kfree(doi_def->map.std->cat.cipso);
                kfree(doi_def->map.std->cat.local);
+               kfree(doi_def->map.std);
                break;
        }
        kfree(doi_def);
index 2e35f68..1c6429c 100644 (file)
@@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
                return -EAFNOSUPPORT;
 
        if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
-               BUG();
+               return -EINVAL;
 
        if (tb[IFLA_INET_CONF]) {
                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
index 7b6931a..752e392 100644 (file)
@@ -759,6 +759,13 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
                icmp_param.data_len = room;
        icmp_param.head_len = sizeof(struct icmphdr);
 
+       /* if we don't have a source address at this point, fall back to the
+        * dummy address instead of sending out a packet with a source address
+        * of 0.0.0.0
+        */
+       if (!fl4.saddr)
+               fl4.saddr = htonl(INADDR_DUMMY);
+
        icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
 ende:
        ip_rt_put(rt);
index 7b272bb..6b3c558 100644 (file)
@@ -1801,6 +1801,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
        while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
                in_dev->mc_list = i->next_rcu;
                in_dev->mc_count--;
+               ip_mc_clear_src(i);
                ip_ma_put(i);
        }
 }
index 1c9f71a..95a7183 100644 (file)
@@ -954,6 +954,7 @@ bool ping_rcv(struct sk_buff *skb)
        struct sock *sk;
        struct net *net = dev_net(skb->dev);
        struct icmphdr *icmph = icmp_hdr(skb);
+       bool rc = false;
 
        /* We assume the packet has already been checked by icmp_rcv */
 
@@ -968,14 +969,15 @@ bool ping_rcv(struct sk_buff *skb)
                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
                pr_debug("rcv on socket %p\n", sk);
-               if (skb2)
-                       ping_queue_rcv_skb(sk, skb2);
+               if (skb2 && !ping_queue_rcv_skb(sk, skb2))
+                       rc = true;
                sock_put(sk);
-               return true;
        }
-       pr_debug("no socket, dropping\n");
 
-       return false;
+       if (!rc)
+               pr_debug("no socket, dropping\n");
+
+       return rc;
 }
 EXPORT_SYMBOL_GPL(ping_rcv);
 
index f6787c5..6a36ac9 100644 (file)
@@ -2056,6 +2056,19 @@ martian_source:
        return err;
 }
 
+/* get device for dst_alloc with local routes */
+static struct net_device *ip_rt_get_dev(struct net *net,
+                                       const struct fib_result *res)
+{
+       struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
+       struct net_device *dev = NULL;
+
+       if (nhc)
+               dev = l3mdev_master_dev_rcu(nhc->nhc_dev);
+
+       return dev ? : net->loopback_dev;
+}
+
 /*
  *     NOTE. We drop all the packets that has local source
  *     addresses, because every properly looped back packet
@@ -2212,7 +2225,7 @@ local_input:
                }
        }
 
-       rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
+       rth = rt_dst_alloc(ip_rt_get_dev(net, res),
                           flags | RTCF_LOCAL, res->type,
                           IN_DEV_ORCONF(in_dev, NOPOLICY), false);
        if (!rth)
index 15f5504..1307ad0 100644 (file)
@@ -2607,6 +2607,9 @@ void udp_destroy_sock(struct sock *sk)
 {
        struct udp_sock *up = udp_sk(sk);
        bool slow = lock_sock_fast(sk);
+
+       /* protects from races with udp_abort() */
+       sock_set_flag(sk, SOCK_DEAD);
        udp_flush_pending_frames(sk);
        unlock_sock_fast(sk, slow);
        if (static_branch_unlikely(&udp_encap_needed_key)) {
@@ -2857,10 +2860,17 @@ int udp_abort(struct sock *sk, int err)
 {
        lock_sock(sk);
 
+       /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
+        * with close()
+        */
+       if (sock_flag(sk, SOCK_DEAD))
+               goto out;
+
        sk->sk_err = err;
        sk->sk_error_report(sk);
        __udp_disconnect(sk, 0);
 
+out:
        release_sock(sk);
 
        return 0;
index b0ef65e..701eb82 100644 (file)
@@ -5827,7 +5827,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
                return -EAFNOSUPPORT;
 
        if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
-               BUG();
+               return -EINVAL;
 
        if (tb[IFLA_INET6_TOKEN]) {
                err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),
index e204163..92f3235 100644 (file)
@@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 }
 EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
 
+static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph)
+{
+       if (likely(next != IPPROTO_ICMPV6))
+               return false;
+
+       if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY)
+               return false;
+
+       return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL;
+}
+
 void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
                   const struct nft_pktinfo *pkt)
 {
@@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 
        lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
 
-       if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
-           nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
-               nft_fib_store_result(dest, priv, nft_in(pkt));
-               return;
+       if (nft_hook(pkt) == NF_INET_PRE_ROUTING ||
+           nft_hook(pkt) == NF_INET_INGRESS) {
+               if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) ||
+                   nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
+                       nft_fib_store_result(dest, priv, nft_in(pkt));
+                       return;
+               }
        }
 
        *dest = 0;
index 199b080..3fcd86f 100644 (file)
@@ -1598,6 +1598,9 @@ void udpv6_destroy_sock(struct sock *sk)
 {
        struct udp_sock *up = udp_sk(sk);
        lock_sock(sk);
+
+       /* protects from races with udp_abort() */
+       sock_set_flag(sk, SOCK_DEAD);
        udp_v6_flush_pending_frames(sk);
        release_sock(sk);
 
index 1c572c8..6201965 100644 (file)
@@ -1066,11 +1066,6 @@ out_error:
                goto partial_message;
        }
 
-       if (skb_has_frag_list(head)) {
-               kfree_skb_list(skb_shinfo(head)->frag_list);
-               skb_shinfo(head)->frag_list = NULL;
-       }
-
        if (head != kcm->seq_skb)
                kfree_skb(head);
 
index 9245c04..fc34ae2 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Copyright 2007      Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2019, 2021 Intel Corporation
  */
 
 #include <linux/debugfs.h>
@@ -387,10 +387,17 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf,
                           size_t count, loff_t *ppos)
 {
        struct ieee80211_local *local = file->private_data;
+       int ret;
 
        rtnl_lock();
+       wiphy_lock(local->hw.wiphy);
        __ieee80211_suspend(&local->hw, NULL);
-       __ieee80211_resume(&local->hw);
+       ret = __ieee80211_resume(&local->hw);
+       wiphy_unlock(local->hw.wiphy);
+
+       if (ret)
+               cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
        rtnl_unlock();
 
        return count;
index 214404a..648696b 100644 (file)
@@ -1442,7 +1442,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
        rcu_read_lock();
        chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
 
-       if (WARN_ON_ONCE(!chanctx_conf)) {
+       if (!chanctx_conf) {
                rcu_read_unlock();
                return NULL;
        }
index 2e2f73a..137fa4c 100644 (file)
@@ -476,14 +476,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
                                   GFP_KERNEL);
        }
 
-       /* APs need special treatment */
        if (sdata->vif.type == NL80211_IFTYPE_AP) {
-               struct ieee80211_sub_if_data *vlan, *tmpsdata;
-
-               /* down all dependent devices, that is VLANs */
-               list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
-                                        u.vlan.list)
-                       dev_close(vlan->dev);
                WARN_ON(!list_empty(&sdata->u.ap.vlans));
        } else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
                /* remove all packets in parent bc_buf pointing to this dev */
@@ -641,6 +634,15 @@ static int ieee80211_stop(struct net_device *dev)
 {
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
+       /* close all dependent VLAN interfaces before locking wiphy */
+       if (sdata->vif.type == NL80211_IFTYPE_AP) {
+               struct ieee80211_sub_if_data *vlan, *tmpsdata;
+
+               list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
+                                        u.vlan.list)
+                       dev_close(vlan->dev);
+       }
+
        wiphy_lock(sdata->local->hw.wiphy);
        ieee80211_do_stop(sdata, true);
        wiphy_unlock(sdata->local->hw.wiphy);
@@ -1591,6 +1593,9 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
 
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP:
+               if (!list_empty(&sdata->u.ap.vlans))
+                       return -EBUSY;
+               break;
        case NL80211_IFTYPE_STATION:
        case NL80211_IFTYPE_ADHOC:
        case NL80211_IFTYPE_OCB:
index 62145e5..f33a3ac 100644 (file)
@@ -252,6 +252,7 @@ static void ieee80211_restart_work(struct work_struct *work)
        struct ieee80211_local *local =
                container_of(work, struct ieee80211_local, restart_work);
        struct ieee80211_sub_if_data *sdata;
+       int ret;
 
        /* wait for scan work complete */
        flush_workqueue(local->workqueue);
@@ -301,8 +302,12 @@ static void ieee80211_restart_work(struct work_struct *work)
        /* wait for all packet processing to be done */
        synchronize_net();
 
-       ieee80211_reconfig(local);
+       ret = ieee80211_reconfig(local);
        wiphy_unlock(local->hw.wiphy);
+
+       if (ret)
+               cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
        rtnl_unlock();
 }
 
index 2480bd0..3f2aad2 100644 (file)
@@ -4062,10 +4062,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
                if (elems.mbssid_config_ie)
                        bss_conf->profile_periodicity =
                                elems.mbssid_config_ie->profile_periodicity;
+               else
+                       bss_conf->profile_periodicity = 0;
 
                if (elems.ext_capab_len >= 11 &&
                    (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
                        bss_conf->ema_ap = true;
+               else
+                       bss_conf->ema_ap = false;
 
                /* continue assoc process */
                ifmgd->assoc_data->timeout = jiffies;
@@ -5802,12 +5806,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
                                              beacon_ies->data, beacon_ies->len);
                if (elem && elem->datalen >= 3)
                        sdata->vif.bss_conf.profile_periodicity = elem->data[2];
+               else
+                       sdata->vif.bss_conf.profile_periodicity = 0;
 
                elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
                                          beacon_ies->data, beacon_ies->len);
                if (elem && elem->datalen >= 11 &&
                    (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
                        sdata->vif.bss_conf.ema_ap = true;
+               else
+                       sdata->vif.bss_conf.ema_ap = false;
        } else {
                assoc_data->timeout = jiffies;
                assoc_data->timeout_started = true;
index 6487b05..a6f3fb4 100644 (file)
@@ -1514,7 +1514,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
            (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
                return;
 
-       if (time_is_before_jiffies(mi->sample_time))
+       if (time_is_after_jiffies(mi->sample_time))
                return;
 
        mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL;
index 1bb43ed..af0ef45 100644 (file)
@@ -2240,17 +2240,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
        sc = le16_to_cpu(hdr->seq_ctrl);
        frag = sc & IEEE80211_SCTL_FRAG;
 
-       if (is_multicast_ether_addr(hdr->addr1)) {
-               I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
-               goto out_no_led;
-       }
-
        if (rx->sta)
                cache = &rx->sta->frags;
 
        if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
                goto out;
 
+       if (is_multicast_ether_addr(hdr->addr1))
+               return RX_DROP_MONITOR;
+
        I802_DEBUG_INC(rx->local->rx_handlers_fragments);
 
        if (skb_linearize(rx->skb))
@@ -2376,7 +2374,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 
  out:
        ieee80211_led_rx(rx->local);
- out_no_led:
        if (rx->sta)
                rx->sta->rx_stats.packets++;
        return RX_CONTINUE;
index d4cc9ac..6b50cb5 100644 (file)
@@ -251,13 +251,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
        struct ieee80211_mgmt *mgmt = (void *)skb->data;
        struct ieee80211_bss *bss;
        struct ieee80211_channel *channel;
+       size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
+                                     u.probe_resp.variable);
+
+       if (!ieee80211_is_probe_resp(mgmt->frame_control) &&
+           !ieee80211_is_beacon(mgmt->frame_control) &&
+           !ieee80211_is_s1g_beacon(mgmt->frame_control))
+               return;
 
        if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
-               if (skb->len < 15)
-                       return;
-       } else if (skb->len < 24 ||
-                (!ieee80211_is_probe_resp(mgmt->frame_control) &&
-                 !ieee80211_is_beacon(mgmt->frame_control)))
+               if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+                       min_hdr_len = offsetof(struct ieee80211_ext,
+                                              u.s1g_short_beacon.variable);
+               else
+                       min_hdr_len = offsetof(struct ieee80211_ext,
+                                              u.s1g_beacon);
+       }
+
+       if (skb->len < min_hdr_len)
                return;
 
        sdata1 = rcu_dereference(local->scan_sdata);
index 0b719f3..2651498 100644 (file)
@@ -2014,6 +2014,26 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
        ieee80211_tx(sdata, sta, skb, false);
 }
 
+static bool ieee80211_validate_radiotap_len(struct sk_buff *skb)
+{
+       struct ieee80211_radiotap_header *rthdr =
+               (struct ieee80211_radiotap_header *)skb->data;
+
+       /* check for not even having the fixed radiotap header part */
+       if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
+               return false; /* too short to be possibly valid */
+
+       /* is it a header version we can trust to find length from? */
+       if (unlikely(rthdr->it_version))
+               return false; /* only version 0 is supported */
+
+       /* does the skb contain enough to deliver on the alleged length? */
+       if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
+               return false; /* skb too short for claimed rt header extent */
+
+       return true;
+}
+
 bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
                                 struct net_device *dev)
 {
@@ -2022,8 +2042,6 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
        struct ieee80211_radiotap_header *rthdr =
                (struct ieee80211_radiotap_header *) skb->data;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-       struct ieee80211_supported_band *sband =
-               local->hw.wiphy->bands[info->band];
        int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
                                                   NULL);
        u16 txflags;
@@ -2036,17 +2054,8 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
        u8 vht_mcs = 0, vht_nss = 0;
        int i;
 
-       /* check for not even having the fixed radiotap header part */
-       if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
-               return false; /* too short to be possibly valid */
-
-       /* is it a header version we can trust to find length from? */
-       if (unlikely(rthdr->it_version))
-               return false; /* only version 0 is supported */
-
-       /* does the skb contain enough to deliver on the alleged length? */
-       if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
-               return false; /* skb too short for claimed rt header extent */
+       if (!ieee80211_validate_radiotap_len(skb))
+               return false;
 
        info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
                       IEEE80211_TX_CTL_DONTFRAG;
@@ -2186,6 +2195,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
                return false;
 
        if (rate_found) {
+               struct ieee80211_supported_band *sband =
+                       local->hw.wiphy->bands[info->band];
+
                info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT;
 
                for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
@@ -2199,7 +2211,7 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
                } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) {
                        ieee80211_rate_set_vht(info->control.rates, vht_mcs,
                                               vht_nss);
-               } else {
+               } else if (sband) {
                        for (i = 0; i < sband->n_bitrates; i++) {
                                if (rate * 5 != sband->bitrates[i].bitrate)
                                        continue;
@@ -2236,8 +2248,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
        info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
                      IEEE80211_TX_CTL_INJECTED;
 
-       /* Sanity-check and process the injection radiotap header */
-       if (!ieee80211_parse_tx_radiotap(skb, dev))
+       /* Sanity-check the length of the radiotap header */
+       if (!ieee80211_validate_radiotap_len(skb))
                goto fail;
 
        /* we now know there is a radiotap header with a length we can use */
@@ -2351,6 +2363,14 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
        ieee80211_select_queue_80211(sdata, skb, hdr);
        skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority));
 
+       /*
+        * Process the radiotap header. This will now take into account the
+        * selected chandef above to accurately set injection rates and
+        * retransmissions.
+        */
+       if (!ieee80211_parse_tx_radiotap(skb, dev))
+               goto fail_rcu;
+
        /* remove the injection radiotap header */
        skb_pull(skb, len_rthdr);
 
index 0a0481f..060059e 100644 (file)
@@ -947,7 +947,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
 
        switch (elem->data[0]) {
        case WLAN_EID_EXT_HE_MU_EDCA:
-               if (len == sizeof(*elems->mu_edca_param_set)) {
+               if (len >= sizeof(*elems->mu_edca_param_set)) {
                        elems->mu_edca_param_set = data;
                        if (crc)
                                *crc = crc32_be(*crc, (void *)elem,
@@ -968,7 +968,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
                }
                break;
        case WLAN_EID_EXT_UORA:
-               if (len == 1)
+               if (len >= 1)
                        elems->uora_element = data;
                break;
        case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME:
@@ -976,7 +976,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
                        elems->max_channel_switch_time = data;
                break;
        case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION:
-               if (len == sizeof(*elems->mbssid_config_ie))
+               if (len >= sizeof(*elems->mbssid_config_ie))
                        elems->mbssid_config_ie = data;
                break;
        case WLAN_EID_EXT_HE_SPR:
@@ -985,7 +985,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
                        elems->he_spr = data;
                break;
        case WLAN_EID_EXT_HE_6GHZ_CAPA:
-               if (len == sizeof(*elems->he_6ghz_capa))
+               if (len >= sizeof(*elems->he_6ghz_capa))
                        elems->he_6ghz_capa = data;
                break;
        }
@@ -1074,14 +1074,14 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 
                switch (id) {
                case WLAN_EID_LINK_ID:
-                       if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) {
+                       if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) {
                                elem_parse_failed = true;
                                break;
                        }
                        elems->lnk_id = (void *)(pos - 2);
                        break;
                case WLAN_EID_CHAN_SWITCH_TIMING:
-                       if (elen != sizeof(struct ieee80211_ch_switch_timing)) {
+                       if (elen < sizeof(struct ieee80211_ch_switch_timing)) {
                                elem_parse_failed = true;
                                break;
                        }
@@ -1244,7 +1244,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        elems->sec_chan_offs = (void *)pos;
                        break;
                case WLAN_EID_CHAN_SWITCH_PARAM:
-                       if (elen !=
+                       if (elen <
                            sizeof(*elems->mesh_chansw_params_ie)) {
                                elem_parse_failed = true;
                                break;
@@ -1253,7 +1253,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        break;
                case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
                        if (!action ||
-                           elen != sizeof(*elems->wide_bw_chansw_ie)) {
+                           elen < sizeof(*elems->wide_bw_chansw_ie)) {
                                elem_parse_failed = true;
                                break;
                        }
@@ -1272,7 +1272,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
                                              pos, elen);
                        if (ie) {
-                               if (ie[1] == sizeof(*elems->wide_bw_chansw_ie))
+                               if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie))
                                        elems->wide_bw_chansw_ie =
                                                (void *)(ie + 2);
                                else
@@ -1316,7 +1316,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        elems->cisco_dtpc_elem = pos;
                        break;
                case WLAN_EID_ADDBA_EXT:
-                       if (elen != sizeof(struct ieee80211_addba_ext_ie)) {
+                       if (elen < sizeof(struct ieee80211_addba_ext_ie)) {
                                elem_parse_failed = true;
                                break;
                        }
@@ -1342,7 +1342,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                                                          elem, elems);
                        break;
                case WLAN_EID_S1G_CAPABILITIES:
-                       if (elen == sizeof(*elems->s1g_capab))
+                       if (elen >= sizeof(*elems->s1g_capab))
                                elems->s1g_capab = (void *)pos;
                        else
                                elem_parse_failed = true;
@@ -2178,8 +2178,6 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
        list_for_each_entry(ctx, &local->chanctx_list, list)
                ctx->driver_present = false;
        mutex_unlock(&local->chanctx_mtx);
-
-       cfg80211_shutdown_all_interfaces(local->hw.wiphy);
 }
 
 static void ieee80211_assign_chanctx(struct ieee80211_local *local,
index 6b825fb..9b263f2 100644 (file)
@@ -356,6 +356,8 @@ void mptcp_get_options(const struct sk_buff *skb,
                        length--;
                        continue;
                default:
+                       if (length < 2)
+                               return;
                        opsize = *ptr++;
                        if (opsize < 2) /* "silly options" */
                                return;
index 5edc686..6323500 100644 (file)
@@ -280,11 +280,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 
        /* try to fetch required memory from subflow */
        if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
-               if (ssk->sk_forward_alloc < skb->truesize)
-                       goto drop;
-               __sk_mem_reclaim(ssk, skb->truesize);
-               if (!sk_rmem_schedule(sk, skb, skb->truesize))
+               int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
+
+               if (ssk->sk_forward_alloc < amount)
                        goto drop;
+
+               ssk->sk_forward_alloc -= amount;
+               sk->sk_forward_alloc += amount;
        }
 
        /* the skb map_seq accounts for the skb offset:
@@ -668,18 +670,22 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
 /* In most cases we will be able to lock the mptcp socket.  If its already
  * owned, we need to defer to the work queue to avoid ABBA deadlock.
  */
-static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 {
        struct sock *sk = (struct sock *)msk;
        unsigned int moved = 0;
 
        if (inet_sk_state_load(sk) == TCP_CLOSE)
-               return;
-
-       mptcp_data_lock(sk);
+               return false;
 
        __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
        __mptcp_ofo_queue(msk);
+       if (unlikely(ssk->sk_err)) {
+               if (!sock_owned_by_user(sk))
+                       __mptcp_error_report(sk);
+               else
+                       set_bit(MPTCP_ERROR_REPORT,  &msk->flags);
+       }
 
        /* If the moves have caught up with the DATA_FIN sequence number
         * it's time to ack the DATA_FIN and change socket state, but
@@ -688,7 +694,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
         */
        if (mptcp_pending_data_fin(sk, NULL))
                mptcp_schedule_work(sk);
-       mptcp_data_unlock(sk);
+       return moved > 0;
 }
 
 void mptcp_data_ready(struct sock *sk, struct sock *ssk)
@@ -696,7 +702,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
        struct mptcp_sock *msk = mptcp_sk(sk);
        int sk_rbuf, ssk_rbuf;
-       bool wake;
 
        /* The peer can send data while we are shutting down this
         * subflow at msk destruction time, but we must avoid enqueuing
@@ -705,28 +710,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
        if (unlikely(subflow->disposable))
                return;
 
-       /* move_skbs_to_msk below can legitly clear the data_avail flag,
-        * but we will need later to properly woke the reader, cache its
-        * value
-        */
-       wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
-       if (wake)
-               set_bit(MPTCP_DATA_READY, &msk->flags);
-
        ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
        sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
        if (unlikely(ssk_rbuf > sk_rbuf))
                sk_rbuf = ssk_rbuf;
 
-       /* over limit? can't append more skbs to msk */
+       /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
        if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
-               goto wake;
-
-       move_skbs_to_msk(msk, ssk);
+               return;
 
-wake:
-       if (wake)
+       /* Wake-up the reader only for in-sequence data */
+       mptcp_data_lock(sk);
+       if (move_skbs_to_msk(msk, ssk)) {
+               set_bit(MPTCP_DATA_READY, &msk->flags);
                sk->sk_data_ready(sk);
+       }
+       mptcp_data_unlock(sk);
 }
 
 static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
@@ -858,7 +857,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
        sock_owned_by_me(sk);
 
        mptcp_for_each_subflow(msk, subflow) {
-               if (subflow->data_avail)
+               if (READ_ONCE(subflow->data_avail))
                        return mptcp_subflow_tcp_sock(subflow);
        }
 
@@ -1955,6 +1954,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
                done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
                mptcp_data_unlock(sk);
                tcp_cleanup_rbuf(ssk, moved);
+
+               if (unlikely(ssk->sk_err))
+                       __mptcp_error_report(sk);
                unlock_sock_fast(ssk, slowpath);
        } while (!done);
 
index 0c6f99c..385796f 100644 (file)
@@ -362,7 +362,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
 enum mptcp_data_avail {
        MPTCP_SUBFLOW_NODATA,
        MPTCP_SUBFLOW_DATA_AVAIL,
-       MPTCP_SUBFLOW_OOO_DATA
 };
 
 struct mptcp_delegated_action {
index ef3d037..be1de40 100644 (file)
@@ -784,10 +784,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
        return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
 }
 
-static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
+static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
 {
-       WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
-                 ssn, subflow->map_subflow_seq, subflow->map_data_len);
+       pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+                ssn, subflow->map_subflow_seq, subflow->map_data_len);
 }
 
 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
@@ -812,13 +812,13 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
                /* Mapping covers data later in the subflow stream,
                 * currently unsupported.
                 */
-               warn_bad_map(subflow, ssn);
+               dbg_bad_map(subflow, ssn);
                return false;
        }
        if (unlikely(!before(ssn, subflow->map_subflow_seq +
                                  subflow->map_data_len))) {
                /* Mapping does covers past subflow data, invalid */
-               warn_bad_map(subflow, ssn + skb->len);
+               dbg_bad_map(subflow, ssn);
                return false;
        }
        return true;
@@ -1000,7 +1000,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
        struct sk_buff *skb;
 
        if (!skb_peek(&ssk->sk_receive_queue))
-               subflow->data_avail = 0;
+               WRITE_ONCE(subflow->data_avail, 0);
        if (subflow->data_avail)
                return true;
 
@@ -1039,18 +1039,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
                ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
                pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
                         ack_seq);
-               if (ack_seq == old_ack) {
-                       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
-                       break;
-               } else if (after64(ack_seq, old_ack)) {
-                       subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
-                       break;
+               if (unlikely(before64(ack_seq, old_ack))) {
+                       mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+                       continue;
                }
 
-               /* only accept in-sequence mapping. Old values are spurious
-                * retransmission
-                */
-               mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+               WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+               break;
        }
        return true;
 
@@ -1065,12 +1060,11 @@ fallback:
                 * subflow_error_report() will introduce the appropriate barriers
                 */
                ssk->sk_err = EBADMSG;
-               ssk->sk_error_report(ssk);
                tcp_set_state(ssk, TCP_CLOSE);
                subflow->reset_transient = 0;
                subflow->reset_reason = MPTCP_RST_EMPTCP;
                tcp_send_active_reset(ssk, GFP_ATOMIC);
-               subflow->data_avail = 0;
+               WRITE_ONCE(subflow->data_avail, 0);
                return false;
        }
 
@@ -1080,7 +1074,7 @@ fallback:
        subflow->map_seq = READ_ONCE(msk->ack_seq);
        subflow->map_data_len = skb->len;
        subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
-       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+       WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
        return true;
 }
 
@@ -1092,7 +1086,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
        if (subflow->map_valid &&
            mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
                subflow->map_valid = 0;
-               subflow->data_avail = 0;
+               WRITE_ONCE(subflow->data_avail, 0);
 
                pr_debug("Done with mapping: seq=%u data_len=%u",
                         subflow->map_subflow_seq,
@@ -1120,41 +1114,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
        *full_space = tcp_full_space(sk);
 }
 
-static void subflow_data_ready(struct sock *sk)
-{
-       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-       u16 state = 1 << inet_sk_state_load(sk);
-       struct sock *parent = subflow->conn;
-       struct mptcp_sock *msk;
-
-       msk = mptcp_sk(parent);
-       if (state & TCPF_LISTEN) {
-               /* MPJ subflow are removed from accept queue before reaching here,
-                * avoid stray wakeups
-                */
-               if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
-                       return;
-
-               set_bit(MPTCP_DATA_READY, &msk->flags);
-               parent->sk_data_ready(parent);
-               return;
-       }
-
-       WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
-                    !subflow->mp_join && !(state & TCPF_CLOSE));
-
-       if (mptcp_subflow_data_available(sk))
-               mptcp_data_ready(parent, sk);
-}
-
-static void subflow_write_space(struct sock *ssk)
-{
-       struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
-
-       mptcp_propagate_sndbuf(sk, ssk);
-       mptcp_write_space(sk);
-}
-
 void __mptcp_error_report(struct sock *sk)
 {
        struct mptcp_subflow_context *subflow;
@@ -1195,6 +1154,43 @@ static void subflow_error_report(struct sock *ssk)
        mptcp_data_unlock(sk);
 }
 
+static void subflow_data_ready(struct sock *sk)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+       u16 state = 1 << inet_sk_state_load(sk);
+       struct sock *parent = subflow->conn;
+       struct mptcp_sock *msk;
+
+       msk = mptcp_sk(parent);
+       if (state & TCPF_LISTEN) {
+               /* MPJ subflow are removed from accept queue before reaching here,
+                * avoid stray wakeups
+                */
+               if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+                       return;
+
+               set_bit(MPTCP_DATA_READY, &msk->flags);
+               parent->sk_data_ready(parent);
+               return;
+       }
+
+       WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+                    !subflow->mp_join && !(state & TCPF_CLOSE));
+
+       if (mptcp_subflow_data_available(sk))
+               mptcp_data_ready(parent, sk);
+       else if (unlikely(sk->sk_err))
+               subflow_error_report(sk);
+}
+
+static void subflow_write_space(struct sock *ssk)
+{
+       struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+       mptcp_propagate_sndbuf(sk, ssk);
+       mptcp_write_space(sk);
+}
+
 static struct inet_connection_sock_af_ops *
 subflow_default_af_ops(struct sock *sk)
 {
@@ -1505,6 +1501,8 @@ static void subflow_state_change(struct sock *sk)
         */
        if (mptcp_subflow_data_available(sk))
                mptcp_data_ready(parent, sk);
+       else if (unlikely(sk->sk_err))
+               subflow_error_report(sk);
 
        subflow_sched_work_if_closed(mptcp_sk(parent), sk);
 
index b100c04..3d6d494 100644 (file)
@@ -31,6 +31,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
        int length = (th->doff * 4) - sizeof(*th);
        u8 buf[40], *ptr;
 
+       if (unlikely(length < 0))
+               return false;
+
        ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
        if (ptr == NULL)
                return false;
@@ -47,6 +50,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
                        length--;
                        continue;
                default:
+                       if (length < 2)
+                               return true;
                        opsize = *ptr++;
                        if (opsize < 2)
                                return true;
index 72bc759..bf4d6ec 100644 (file)
@@ -4364,13 +4364,45 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
        err = nf_tables_set_alloc_name(&ctx, set, name);
        kfree(name);
        if (err < 0)
-               goto err_set_alloc_name;
+               goto err_set_name;
+
+       udata = NULL;
+       if (udlen) {
+               udata = set->data + size;
+               nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+       }
+
+       INIT_LIST_HEAD(&set->bindings);
+       INIT_LIST_HEAD(&set->catchall_list);
+       set->table = table;
+       write_pnet(&set->net, net);
+       set->ops = ops;
+       set->ktype = ktype;
+       set->klen = desc.klen;
+       set->dtype = dtype;
+       set->objtype = objtype;
+       set->dlen = desc.dlen;
+       set->flags = flags;
+       set->size = desc.size;
+       set->policy = policy;
+       set->udlen = udlen;
+       set->udata = udata;
+       set->timeout = timeout;
+       set->gc_int = gc_int;
+
+       set->field_count = desc.field_count;
+       for (i = 0; i < desc.field_count; i++)
+               set->field_len[i] = desc.field_len[i];
+
+       err = ops->init(set, &desc, nla);
+       if (err < 0)
+               goto err_set_init;
 
        if (nla[NFTA_SET_EXPR]) {
                expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
                if (IS_ERR(expr)) {
                        err = PTR_ERR(expr);
-                       goto err_set_alloc_name;
+                       goto err_set_expr_alloc;
                }
                set->exprs[0] = expr;
                set->num_exprs++;
@@ -4381,75 +4413,44 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 
                if (!(flags & NFT_SET_EXPR)) {
                        err = -EINVAL;
-                       goto err_set_alloc_name;
+                       goto err_set_expr_alloc;
                }
                i = 0;
                nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
                        if (i == NFT_SET_EXPR_MAX) {
                                err = -E2BIG;
-                               goto err_set_init;
+                               goto err_set_expr_alloc;
                        }
                        if (nla_type(tmp) != NFTA_LIST_ELEM) {
                                err = -EINVAL;
-                               goto err_set_init;
+                               goto err_set_expr_alloc;
                        }
                        expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
                        if (IS_ERR(expr)) {
                                err = PTR_ERR(expr);
-                               goto err_set_init;
+                               goto err_set_expr_alloc;
                        }
                        set->exprs[i++] = expr;
                        set->num_exprs++;
                }
        }
 
-       udata = NULL;
-       if (udlen) {
-               udata = set->data + size;
-               nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
-       }
-
-       INIT_LIST_HEAD(&set->bindings);
-       INIT_LIST_HEAD(&set->catchall_list);
-       set->table = table;
-       write_pnet(&set->net, net);
-       set->ops   = ops;
-       set->ktype = ktype;
-       set->klen  = desc.klen;
-       set->dtype = dtype;
-       set->objtype = objtype;
-       set->dlen  = desc.dlen;
-       set->flags = flags;
-       set->size  = desc.size;
-       set->policy = policy;
-       set->udlen  = udlen;
-       set->udata  = udata;
-       set->timeout = timeout;
-       set->gc_int = gc_int;
        set->handle = nf_tables_alloc_handle(table);
 
-       set->field_count = desc.field_count;
-       for (i = 0; i < desc.field_count; i++)
-               set->field_len[i] = desc.field_len[i];
-
-       err = ops->init(set, &desc, nla);
-       if (err < 0)
-               goto err_set_init;
-
        err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
        if (err < 0)
-               goto err_set_trans;
+               goto err_set_expr_alloc;
 
        list_add_tail_rcu(&set->list, &table->sets);
        table->use++;
        return 0;
 
-err_set_trans:
-       ops->destroy(set);
-err_set_init:
+err_set_expr_alloc:
        for (i = 0; i < set->num_exprs; i++)
                nft_expr_destroy(&ctx, set->exprs[i]);
-err_set_alloc_name:
+
+       ops->destroy(set);
+err_set_init:
        kfree(set->name);
 err_set_name:
        kvfree(set);
index ae906eb..330ba68 100644 (file)
@@ -2683,7 +2683,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
        }
        if (likely(saddr == NULL)) {
                dev     = packet_cached_dev_get(po);
-               proto   = po->num;
+               proto   = READ_ONCE(po->num);
        } else {
                err = -EINVAL;
                if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2896,7 +2896,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
        if (likely(saddr == NULL)) {
                dev     = packet_cached_dev_get(po);
-               proto   = po->num;
+               proto   = READ_ONCE(po->num);
        } else {
                err = -EINVAL;
                if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -3034,10 +3034,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
 
-       if (po->tx_ring.pg_vec)
+       /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
+        * tpacket_snd() will redo the check safely.
+        */
+       if (data_race(po->tx_ring.pg_vec))
                return tpacket_snd(po, msg);
-       else
-               return packet_snd(sock, msg, len);
+
+       return packet_snd(sock, msg, len);
 }
 
 /*
@@ -3168,7 +3171,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                        /* prevents packet_notifier() from calling
                         * register_prot_hook()
                         */
-                       po->num = 0;
+                       WRITE_ONCE(po->num, 0);
                        __unregister_prot_hook(sk, true);
                        rcu_read_lock();
                        dev_curr = po->prot_hook.dev;
@@ -3178,17 +3181,17 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                }
 
                BUG_ON(po->running);
-               po->num = proto;
+               WRITE_ONCE(po->num, proto);
                po->prot_hook.type = proto;
 
                if (unlikely(unlisted)) {
                        dev_put(dev);
                        po->prot_hook.dev = NULL;
-                       po->ifindex = -1;
+                       WRITE_ONCE(po->ifindex, -1);
                        packet_cached_dev_reset(po);
                } else {
                        po->prot_hook.dev = dev;
-                       po->ifindex = dev ? dev->ifindex : 0;
+                       WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
                        packet_cached_dev_assign(po, dev);
                }
        }
@@ -3502,7 +3505,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
        uaddr->sa_family = AF_PACKET;
        memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
        rcu_read_lock();
-       dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
+       dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
        if (dev)
                strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
        rcu_read_unlock();
@@ -3517,16 +3520,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
        DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
+       int ifindex;
 
        if (peer)
                return -EOPNOTSUPP;
 
+       ifindex = READ_ONCE(po->ifindex);
        sll->sll_family = AF_PACKET;
-       sll->sll_ifindex = po->ifindex;
-       sll->sll_protocol = po->num;
+       sll->sll_ifindex = ifindex;
+       sll->sll_protocol = READ_ONCE(po->num);
        sll->sll_pkttype = 0;
        rcu_read_lock();
-       dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
+       dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
        if (dev) {
                sll->sll_hatype = dev->type;
                sll->sll_halen = dev->addr_len;
@@ -4105,7 +4110,7 @@ static int packet_notifier(struct notifier_block *this,
                                }
                                if (msg == NETDEV_UNREGISTER) {
                                        packet_cached_dev_reset(po);
-                                       po->ifindex = -1;
+                                       WRITE_ONCE(po->ifindex, -1);
                                        if (po->prot_hook.dev)
                                                dev_put(po->prot_hook.dev);
                                        po->prot_hook.dev = NULL;
@@ -4411,7 +4416,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
        was_running = po->running;
        num = po->num;
        if (was_running) {
-               po->num = 0;
+               WRITE_ONCE(po->num, 0);
                __unregister_prot_hook(sk, false);
        }
        spin_unlock(&po->bind_lock);
@@ -4446,7 +4451,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 
        spin_lock(&po->bind_lock);
        if (was_running) {
-               po->num = num;
+               WRITE_ONCE(po->num, num);
                register_prot_hook(sk);
        }
        spin_unlock(&po->bind_lock);
@@ -4616,8 +4621,8 @@ static int packet_seq_show(struct seq_file *seq, void *v)
                           s,
                           refcount_read(&s->sk_refcnt),
                           s->sk_type,
-                          ntohs(po->num),
-                          po->ifindex,
+                          ntohs(READ_ONCE(po->num)),
+                          READ_ONCE(po->ifindex),
                           po->running,
                           atomic_read(&s->sk_rmem_alloc),
                           from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
index c0477be..f2efaa4 100644 (file)
@@ -436,7 +436,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
        struct qrtr_sock *ipc;
        struct sk_buff *skb;
        struct qrtr_cb *cb;
-       unsigned int size;
+       size_t size;
        unsigned int ver;
        size_t hdrlen;
 
index 4db109f..5b426dc 100644 (file)
@@ -714,7 +714,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
                if (rds_cmsg_recv(inc, msg, rs)) {
                        ret = -EFAULT;
-                       goto out;
+                       break;
                }
                rds_recvmsg_zcookie(rs, msg);
 
index 18edd9a..a656baa 100644 (file)
@@ -904,14 +904,19 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
        }
 
        err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
-       if (err == NF_ACCEPT &&
-           ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
-               if (maniptype == NF_NAT_MANIP_SRC)
-                       maniptype = NF_NAT_MANIP_DST;
-               else
-                       maniptype = NF_NAT_MANIP_SRC;
-
-               err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
+       if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
+               if (ct->status & IPS_SRC_NAT) {
+                       if (maniptype == NF_NAT_MANIP_SRC)
+                               maniptype = NF_NAT_MANIP_DST;
+                       else
+                               maniptype = NF_NAT_MANIP_SRC;
+
+                       err = ct_nat_execute(skb, ct, ctinfo, range,
+                                            maniptype);
+               } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+                       err = ct_nat_execute(skb, ct, ctinfo, NULL,
+                                            NF_NAT_MANIP_SRC);
+               }
        }
        return err;
 #else
index 7d37638..9515428 100644 (file)
@@ -943,7 +943,7 @@ static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
        }
 
        tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
-       if (!tcph)
+       if (!tcph || tcph->doff < 5)
                return NULL;
 
        return skb_header_pointer(skb, offset,
@@ -967,6 +967,8 @@ static const void *cake_get_tcpopt(const struct tcphdr *tcph,
                        length--;
                        continue;
                }
+               if (length < 2)
+                       break;
                opsize = *ptr++;
                if (opsize < 2 || opsize > length)
                        break;
@@ -1104,6 +1106,8 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph,
                        length--;
                        continue;
                }
+               if (length < 2)
+                       break;
                opsize = *ptr++;
                if (opsize < 2 || opsize > length)
                        break;
@@ -2338,7 +2342,7 @@ static int cake_config_precedence(struct Qdisc *sch)
 
 /*     List of known Diffserv codepoints:
  *
- *     Least Effort (CS1)
+ *     Least Effort (CS1, LE)
  *     Best Effort (CS0)
  *     Max Reliability & LLT "Lo" (TOS1)
  *     Max Throughput (TOS2)
@@ -2360,7 +2364,7 @@ static int cake_config_precedence(struct Qdisc *sch)
  *     Total 25 codepoints.
  */
 
-/*     List of traffic classes in RFC 4594:
+/*     List of traffic classes in RFC 4594, updated by RFC 8622:
  *             (roughly descending order of contended priority)
  *             (roughly ascending order of uncontended throughput)
  *
@@ -2375,7 +2379,7 @@ static int cake_config_precedence(struct Qdisc *sch)
  *     Ops, Admin, Management (CS2,TOS1) - eg. ssh
  *     Standard Service (CS0 & unrecognised codepoints)
  *     High Throughput Data (AF1x,TOS2)  - eg. web traffic
- *     Low Priority Data (CS1)           - eg. BitTorrent
+ *     Low Priority Data (CS1,LE)        - eg. BitTorrent
 
  *     Total 12 traffic classes.
  */
@@ -2391,7 +2395,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
  *             Video Streaming          (AF4x, AF3x, CS3)
  *             Bog Standard             (CS0 etc.)
  *             High Throughput          (AF1x, TOS2)
- *             Background Traffic       (CS1)
+ *             Background Traffic       (CS1, LE)
  *
  *             Total 8 traffic classes.
  */
@@ -2435,7 +2439,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
  *         Latency Sensitive  (CS7, CS6, EF, VA, CS5, CS4)
  *         Streaming Media    (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
  *         Best Effort        (CS0, AF1x, TOS2, and those not specified)
- *         Background Traffic (CS1)
+ *         Background Traffic (CS1, LE)
  *
  *             Total 4 traffic classes.
  */
@@ -2473,7 +2477,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
 static int cake_config_diffserv3(struct Qdisc *sch)
 {
 /*  Simplified Diffserv structure with 3 tins.
- *             Low Priority            (CS1)
+ *             Low Priority            (CS1, LE)
  *             Best Effort
  *             Latency Sensitive       (TOS4, VA, EF, CS6, CS7)
  */
index 27e3e7d..4f2c6d2 100644 (file)
@@ -1072,19 +1072,6 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *     what to do with it - that's up to the protocol still.
  */
 
-/**
- *     get_net_ns - increment the refcount of the network namespace
- *     @ns: common namespace (net)
- *
- *     Returns the net's common namespace.
- */
-
-struct ns_common *get_net_ns(struct ns_common *ns)
-{
-       return &get_net(container_of(ns, struct net, ns))->ns;
-}
-EXPORT_SYMBOL_GPL(get_net_ns);
-
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
        struct socket *sock;
index 5a31307..5d1192c 100644 (file)
@@ -535,12 +535,14 @@ static void unix_release_sock(struct sock *sk, int embrion)
        u->path.mnt = NULL;
        state = sk->sk_state;
        sk->sk_state = TCP_CLOSE;
+
+       skpair = unix_peer(sk);
+       unix_peer(sk) = NULL;
+
        unix_state_unlock(sk);
 
        wake_up_interruptible_all(&u->peer_wait);
 
-       skpair = unix_peer(sk);
-
        if (skpair != NULL) {
                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
                        unix_state_lock(skpair);
@@ -555,7 +557,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
 
                unix_dgram_peer_wake_disconnect(sk, skpair);
                sock_put(skpair); /* It may now die */
-               unix_peer(sk) = NULL;
        }
 
        /* Try to flush out this socket. Throw out buffers at least */
index 2eee939..af590ae 100644 (file)
@@ -28,7 +28,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
        @$(kecho) "  GEN     $@"
        @(echo '#include "reg.h"'; \
          echo 'const u8 shipped_regdb_certs[] = {'; \
-         cat $^ ; \
+         echo | cat - $^ ; \
          echo '};'; \
          echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
         ) > $@
index 6fbf753..8d0883e 100644 (file)
@@ -1340,6 +1340,11 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev,
        rdev->devlist_generation++;
        wdev->registered = true;
 
+       if (wdev->netdev &&
+           sysfs_create_link(&wdev->netdev->dev.kobj, &rdev->wiphy.dev.kobj,
+                             "phy80211"))
+               pr_err("failed to add phy80211 symlink to netdev!\n");
+
        nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 }
 
@@ -1365,14 +1370,6 @@ int cfg80211_register_netdevice(struct net_device *dev)
        if (ret)
                goto out;
 
-       if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
-                             "phy80211")) {
-               pr_err("failed to add phy80211 symlink to netdev!\n");
-               unregister_netdevice(dev);
-               ret = -EINVAL;
-               goto out;
-       }
-
        cfg80211_register_wdev(rdev, wdev);
        ret = 0;
 out:
index 6bdd964..d245968 100644 (file)
@@ -334,6 +334,7 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
                            gfp_t gfp)
 {
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+       struct cfg80211_pmsr_request *tmp, *prev, *to_free = NULL;
        struct sk_buff *msg;
        void *hdr;
 
@@ -364,9 +365,20 @@ free_msg:
        nlmsg_free(msg);
 free_request:
        spin_lock_bh(&wdev->pmsr_lock);
-       list_del(&req->list);
+       /*
+        * cfg80211_pmsr_process_abort() may have already moved this request
+        * to the free list, and will free it later. In this case, don't free
+        * it here.
+        */
+       list_for_each_entry_safe(tmp, prev, &wdev->pmsr_list, list) {
+               if (tmp == req) {
+                       list_del(&req->list);
+                       to_free = req;
+                       break;
+               }
+       }
        spin_unlock_bh(&wdev->pmsr_lock);
-       kfree(req);
+       kfree(to_free);
 }
 EXPORT_SYMBOL_GPL(cfg80211_pmsr_complete);
 
index 9b959e3..0c3f05c 100644 (file)
@@ -133,6 +133,10 @@ static int wiphy_resume(struct device *dev)
        if (rdev->wiphy.registered && rdev->ops->resume)
                ret = rdev_resume(rdev);
        wiphy_unlock(&rdev->wiphy);
+
+       if (ret)
+               cfg80211_shutdown_all_interfaces(&rdev->wiphy);
+
        rtnl_unlock();
 
        return ret;
index 7ec021a..18dba3d 100644 (file)
@@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
                case NL80211_IFTYPE_MESH_POINT:
                        /* mesh should be handled? */
                        break;
+               case NL80211_IFTYPE_OCB:
+                       cfg80211_leave_ocb(rdev, dev);
+                       break;
                default:
                        break;
                }
index f9b1952..1e9baa5 100644 (file)
@@ -192,15 +192,20 @@ static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab,
                                 Elf32_Word const *symtab_shndx)
 {
        unsigned long offset;
+       unsigned short shndx = w2(sym->st_shndx);
        int index;
 
-       if (sym->st_shndx != SHN_XINDEX)
-               return w2(sym->st_shndx);
+       if (shndx > SHN_UNDEF && shndx < SHN_LORESERVE)
+               return shndx;
 
-       offset = (unsigned long)sym - (unsigned long)symtab;
-       index = offset / sizeof(*sym);
+       if (shndx == SHN_XINDEX) {
+               offset = (unsigned long)sym - (unsigned long)symtab;
+               index = offset / sizeof(*sym);
 
-       return w(symtab_shndx[index]);
+               return w(symtab_shndx[index]);
+       }
+
+       return 0;
 }
 
 static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0)
index 6de5a7f..d2a9420 100644 (file)
@@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
 
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
index 7d66876..d1b3270 100644 (file)
@@ -289,6 +289,9 @@ struct sockaddr_in {
 /* Address indicating an error return. */
 #define        INADDR_NONE             ((unsigned long int) 0xffffffff)
 
+/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
+#define        INADDR_DUMMY            ((unsigned long int) 0xc0000008)
+
 /* Network number for local host loopback. */
 #define        IN_LOOPBACKNET          127
 
index 6061431..e9b619a 100644 (file)
@@ -1094,7 +1094,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
                        goto out_put_ctx;
                }
                if (xsk->fd == umem->fd)
-                       umem->rx_ring_setup_done = true;
+                       umem->tx_ring_setup_done = true;
        }
 
        err = xsk_get_mmap_offsets(xsk->fd, &off);
index 22eb31e..2f9948b 100755 (executable)
@@ -11,9 +11,9 @@ compare_number()
        second_num=$2
 
        # upper bound is first_num * 110%
-       upper=$(( $first_num + $first_num / 10 ))
+       upper=$(expr $first_num + $first_num / 10 )
        # lower bound is first_num * 90%
-       lower=$(( $first_num - $first_num / 10 ))
+       lower=$(expr $first_num - $first_num / 10 )
 
        if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
                echo "The difference between $first_num and $second_num are greater than 10%."
index b8fc5c5..0d8e3dc 100644 (file)
@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
                            int __user *usockvec);
 extern int __sys_shutdown_sock(struct socket *sock, int how);
 extern int __sys_shutdown(int fd, int how);
-
-extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
index 3ff4936..da19be7 100644 (file)
@@ -776,10 +776,10 @@ static int machine__process_ksymbol_register(struct machine *machine,
                if (dso) {
                        dso->kernel = DSO_SPACE__KERNEL;
                        map = map__new2(0, dso);
+                       dso__put(dso);
                }
 
                if (!dso || !map) {
-                       dso__put(dso);
                        return -ENOMEM;
                }
 
@@ -792,6 +792,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
                map->start = event->ksymbol.addr;
                map->end = map->start + event->ksymbol.len;
                maps__insert(&machine->kmaps, map);
+               map__put(map);
                dso__set_loaded(dso);
 
                if (is_bpf_image(event->ksymbol.name)) {
index 8336dd8..d3cf2de 100644 (file)
@@ -162,10 +162,10 @@ static bool contains_event(struct evsel **metric_events, int num_events,
        return false;
 }
 
-static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2)
+static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2)
 {
        if (!ev1->pmu_name || !ev2->pmu_name)
-               return false;
+               return true;
 
        return !strcmp(ev1->pmu_name, ev2->pmu_name);
 }
@@ -288,7 +288,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
                         */
                        if (!has_constraint &&
                            ev->leader != metric_events[i]->leader &&
-                           evsel_same_pmu(ev->leader, metric_events[i]->leader))
+                           evsel_same_pmu_or_none(ev->leader, metric_events[i]->leader))
                                break;
                        if (!strcmp(metric_events[i]->name, ev->name)) {
                                set_bit(ev->idx, evlist_used);
@@ -1073,16 +1073,18 @@ static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe,
 
        ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids);
        if (ret)
-               return ret;
+               goto out;
 
        ret = resolve_metric(d->metric_no_group,
                                     d->metric_list, NULL, d->ids);
        if (ret)
-               return ret;
+               goto out;
 
        *(d->has_match) = true;
 
-       return *d->ret;
+out:
+       *(d->ret) = ret;
+       return ret;
 }
 
 static int metricgroup__add_metric(const char *metric, bool metric_no_group,
index 1512092..3a9e332 100644 (file)
@@ -1147,7 +1147,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                }
        }
 
-       if (test->insn_processed) {
+       if (!unpriv && test->insn_processed) {
                uint32_t insn_processed;
                char *proc;
 
index ca8fdb1..7d7ebee 100644 (file)
@@ -61,6 +61,8 @@
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R1 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 0
 },
index 8a1caf4..e061e87 100644 (file)
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT
 },
 {
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT
 },
 {
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
index 17fe33a..2c8935b 100644 (file)
@@ -8,6 +8,8 @@
        BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 7,
 },
index bd5cae4..1c857b2 100644 (file)
@@ -87,6 +87,8 @@
        BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
        BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
        BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
index 8dcd4e0..11fc68d 100644 (file)
@@ -82,8 +82,8 @@
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-       .retval_unpriv = 1,
-       .result_unpriv = ACCEPT,
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .retval = 1,
        .result = ACCEPT,
 },
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-       .result_unpriv = ACCEPT,
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-       .result_unpriv = ACCEPT,
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
index bd436df..111801a 100644 (file)
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R7 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 0,
 },
index 7ae2859..a3e593d 100644 (file)
        .fixup_map_array_48b = { 1 },
        .result = ACCEPT,
        .result_unpriv = REJECT,
-       .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+       .errstr_unpriv = "R2 pointer comparison prohibited",
        .retval = 0,
 },
 {
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        // fake-dead code; targeted from branch A to
-       // prevent dead code sanitization
+       // prevent dead code sanitization, rejected
+       // via branch B however
        BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        .fixup_map_array_48b = { 1 },
        .result = ACCEPT,
        .result_unpriv = REJECT,
-       .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
        .retval = 0,
 },
 {
index 5c70596..a2b732c 100644 (file)
@@ -82,7 +82,7 @@ int kvm_check_cap(long cap)
 
        kvm_fd = open_kvm_dev_path_or_exit();
        ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
-       TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+       TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
                "  rc: %i errno: %i", ret, errno);
 
        close(kvm_fd);
index 6ad6c82..af1031f 100644 (file)
@@ -166,75 +166,75 @@ size_t get_def_hugetlb_pagesz(void)
        return 0;
 }
 
+#define ANON_FLAGS     (MAP_PRIVATE | MAP_ANONYMOUS)
+#define ANON_HUGE_FLAGS        (ANON_FLAGS | MAP_HUGETLB)
+
 const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
 {
-       static const int anon_flags = MAP_PRIVATE | MAP_ANONYMOUS;
-       static const int anon_huge_flags = anon_flags | MAP_HUGETLB;
-
        static const struct vm_mem_backing_src_alias aliases[] = {
                [VM_MEM_SRC_ANONYMOUS] = {
                        .name = "anonymous",
-                       .flag = anon_flags,
+                       .flag = ANON_FLAGS,
                },
                [VM_MEM_SRC_ANONYMOUS_THP] = {
                        .name = "anonymous_thp",
-                       .flag = anon_flags,
+                       .flag = ANON_FLAGS,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
                        .name = "anonymous_hugetlb",
-                       .flag = anon_huge_flags,
+                       .flag = ANON_HUGE_FLAGS,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
                        .name = "anonymous_hugetlb_16kb",
-                       .flag = anon_huge_flags | MAP_HUGE_16KB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
                        .name = "anonymous_hugetlb_64kb",
-                       .flag = anon_huge_flags | MAP_HUGE_64KB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
                        .name = "anonymous_hugetlb_512kb",
-                       .flag = anon_huge_flags | MAP_HUGE_512KB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
                        .name = "anonymous_hugetlb_1mb",
-                       .flag = anon_huge_flags | MAP_HUGE_1MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
                        .name = "anonymous_hugetlb_2mb",
-                       .flag = anon_huge_flags | MAP_HUGE_2MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
                        .name = "anonymous_hugetlb_8mb",
-                       .flag = anon_huge_flags | MAP_HUGE_8MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
                        .name = "anonymous_hugetlb_16mb",
-                       .flag = anon_huge_flags | MAP_HUGE_16MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
                        .name = "anonymous_hugetlb_32mb",
-                       .flag = anon_huge_flags | MAP_HUGE_32MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
                        .name = "anonymous_hugetlb_256mb",
-                       .flag = anon_huge_flags | MAP_HUGE_256MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
                        .name = "anonymous_hugetlb_512mb",
-                       .flag = anon_huge_flags | MAP_HUGE_512MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
                        .name = "anonymous_hugetlb_1gb",
-                       .flag = anon_huge_flags | MAP_HUGE_1GB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
                        .name = "anonymous_hugetlb_2gb",
-                       .flag = anon_huge_flags | MAP_HUGE_2GB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
                        .name = "anonymous_hugetlb_16gb",
-                       .flag = anon_huge_flags | MAP_HUGE_16GB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB,
                },
                [VM_MEM_SRC_SHMEM] = {
                        .name = "shmem",
index 76d9487..5abe92d 100755 (executable)
@@ -1384,12 +1384,37 @@ ipv4_rt_replace()
        ipv4_rt_replace_mpath
 }
 
+# checks that cached input route on VRF port is deleted
+# when VRF is deleted
+ipv4_local_rt_cache()
+{
+       run_cmd "ip addr add 10.0.0.1/32 dev lo"
+       run_cmd "ip netns add test-ns"
+       run_cmd "ip link add veth-outside type veth peer name veth-inside"
+       run_cmd "ip link add vrf-100 type vrf table 1100"
+       run_cmd "ip link set veth-outside master vrf-100"
+       run_cmd "ip link set veth-inside netns test-ns"
+       run_cmd "ip link set veth-outside up"
+       run_cmd "ip link set vrf-100 up"
+       run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
+       run_cmd "ip netns exec test-ns ip link set veth-inside up"
+       run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside"
+       run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside"
+       run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1"
+       run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1"
+       run_cmd "ip link delete vrf-100"
+
+       # if we do not hang test is a success
+       log_test $? 0 "Cached route removed from VRF port device"
+}
+
 ipv4_route_test()
 {
        route_setup
 
        ipv4_rt_add
        ipv4_rt_replace
+       ipv4_local_rt_cache
 
        route_cleanup
 }
diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh
new file mode 100755 (executable)
index 0000000..e4b04cd
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for checking ICMP response with dummy address instead of 0.0.0.0.
+# Sets up two namespaces like:
+# +----------------------+                          +--------------------+
+# | ns1                  |    v4-via-v6 routes:     | ns2                |
+# |                      |                  '       |                    |
+# |             +--------+   -> 172.16.1.0/24 ->    +--------+           |
+# |             | veth0  +--------------------------+  veth0 |           |
+# |             +--------+   <- 172.16.0.0/24 <-    +--------+           |
+# |           172.16.0.1 |                          | 2001:db8:1::2/64   |
+# |     2001:db8:1::2/64 |                          |                    |
+# +----------------------+                          +--------------------+
+#
+# And then tries to ping 172.16.1.1 from ns1. This results in a "net
+# unreachable" message being sent from ns2, but there is no IPv4 address set in
+# that address space, so the kernel should substitute the dummy address
+# 192.0.0.8 defined in RFC7600.
+
+NS1=ns1
+NS2=ns2
+H1_IP=172.16.0.1/32
+H1_IP6=2001:db8:1::1
+RT1=172.16.1.0/24
+PINGADDR=172.16.1.1
+RT2=172.16.0.0/24
+H2_IP6=2001:db8:1::2
+
+TMPFILE=$(mktemp)
+
+cleanup()
+{
+    rm -f "$TMPFILE"
+    ip netns del $NS1
+    ip netns del $NS2
+}
+
+trap cleanup EXIT
+
+# Namespaces
+ip netns add $NS1
+ip netns add $NS2
+
+# Connectivity
+ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2
+ip -netns $NS1 link set dev veth0 up
+ip -netns $NS2 link set dev veth0 up
+ip -netns $NS1 addr add $H1_IP dev veth0
+ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad
+ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad
+ip -netns $NS1 route add $RT1 via inet6 $H2_IP6
+ip -netns $NS2 route add $RT2 via inet6 $H1_IP6
+
+# Make sure ns2 will respond with ICMP unreachable
+ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1
+
+# Run the test - a ping runs in the background, and we capture ICMP responses
+# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout
+# in case something goes wrong
+ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null &
+ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null
+
+# Parse response and check for dummy address
+# tcpdump output looks like:
+# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92
+RESP_IP=$(awk '{print $2}' < $TMPFILE)
+if [[ "$RESP_IP" != "192.0.0.8" ]]; then
+    echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
index 9ca5f1b..2b495dc 100755 (executable)
@@ -197,9 +197,6 @@ ip -net "$ns4" link set ns4eth3 up
 ip -net "$ns4" route add default via 10.0.3.2
 ip -net "$ns4" route add default via dead:beef:3::2
 
-# use TCP syn cookies, even if no flooding was detected.
-ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
-
 set_ethtool_flags() {
        local ns="$1"
        local dev="$2"
@@ -737,6 +734,14 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
                exit $ret
        fi
 
+       # ns1<->ns2 is not subject to reordering/tc delays. Use it to test
+       # mptcp syncookie support.
+       if [ $sender = $ns1 ]; then
+               ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+       else
+               ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
+       fi
+
        run_tests "$ns2" $sender 10.0.1.2
        run_tests "$ns2" $sender dead:beef:1::2
        run_tests "$ns2" $sender 10.0.2.1
index a8fa641..7f26591 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 readonly BASE="ns-$(mktemp -u XXXXXX)"
index 2fedc07..11d7cdb 100755 (executable)
@@ -18,7 +18,8 @@ ret=0
 
 cleanup() {
        local ns
-       local -r jobs="$(jobs -p)"
+       local jobs
+       readonly jobs="$(jobs -p)"
        [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
        rm -f $STATS
 
@@ -108,7 +109,7 @@ chk_gro() {
 
 if [ ! -f ../bpf/xdp_dummy.o ]; then
        echo "Missing xdp_dummy helper. Build bpf selftest first"
-       exit -1
+       exit 1
 fi
 
 create_ns
index 3171069..cd6430b 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for netfilter selftests
 
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
+TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
        conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
        nft_concat_range.sh nft_conntrack_helper.sh \
        nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
new file mode 100755 (executable)
index 0000000..6caf6ac
--- /dev/null
@@ -0,0 +1,221 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+       ip netns del ${ns1}
+       ip netns del ${ns2}
+       ip netns del ${nsrouter}
+
+       [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without nft tool"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not create net namespace"
+       exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+dmesg | grep -q ' nft_rpfilter: '
+if [ $? -eq 0 ]; then
+       dmesg -c | grep ' nft_rpfilter: '
+       echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+load_ruleset() {
+       local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+       chain prerouting {
+               type filter hook prerouting priority 0; policy accept;
+               fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+       }
+}
+EOF
+}
+
+load_ruleset_count() {
+       local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+       chain prerouting {
+               type filter hook prerouting priority 0; policy accept;
+               ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+               ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+       }
+}
+EOF
+}
+
+check_drops() {
+       dmesg | grep -q ' nft_rpfilter: '
+       if [ $? -eq 0 ]; then
+               dmesg | grep ' nft_rpfilter: '
+               echo "FAIL: rpfilter did drop packets"
+               return 1
+       fi
+
+       return 0
+}
+
+check_fib_counter() {
+       local want=$1
+       local ns=$2
+       local address=$3
+
+       line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
+       ret=$?
+
+       if [ $ret -ne 0 ];then
+               echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+               ip netns exec ${ns} nft list table inet filter
+               return 1
+       fi
+
+       if [ $want -gt 0 ]; then
+               echo "PASS: fib expression did drop packets for $address"
+       fi
+
+       return 0
+}
+
+load_ruleset ${nsrouter}
+load_ruleset ${ns1}
+load_ruleset ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+test_ping() {
+  local daddr4=$1
+  local daddr6=$2
+
+  ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
+  ret=$?
+  if [ $ret -ne 0 ];then
+       check_drops
+       echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+       return 1
+  fi
+
+  ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
+  ret=$?
+  if [ $ret -ne 0 ];then
+       check_drops
+       echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+       return 1
+  fi
+
+  return 0
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+sleep 3
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec ${nsrouter} nft flush table inet filter
+
+ip -net ${ns1} route del default
+ip -net ${ns1} -6 route del default
+
+ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr del dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr add dead:2::99/64 dev eth0
+
+ip -net ${ns1} route add default via 10.0.2.1
+ip -net ${ns1} -6 route add default via dead:2::1
+
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count ${nsrouter}
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
+check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
+
+ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
+
+sleep 2
+ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+
+exit 0