Merge tag 'netfs-fixes-20210621' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Jun 2021 16:41:29 +0000 (09:41 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Jun 2021 16:41:29 +0000 (09:41 -0700)
Pull netfs fixes from David Howells:
 "This contains patches to fix netfs_write_begin() and afs_write_end()
  in the following ways:

  (1) In netfs_write_begin(), extract the decision about whether to skip
      a page out to its own helper and have that clear around the region
      to be written, but not clear that region. This requires the
      filesystem to patch it up afterwards if the hole doesn't get
      completely filled.

  (2) Use offset_in_thp() in (1) rather than manually calculating the
      offset into the page.

  (3) Due to (1), afs_write_end() now needs to handle short data write
      into the page by generic_perform_write(). I've adopted an
      analogous approach to ceph of just returning 0 in this case and
      letting the caller go round again.

  It also adds a note that (in the future) the len parameter may extend
  beyond the page allocated. This is because the page allocation is
  deferred to write_begin() and that gets to decide what size of THP to
  allocate."

Jeff Layton points out:
 "The netfs fix in particular fixes a data corruption bug in cephfs"

* tag 'netfs-fixes-20210621' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  netfs: fix test for whether we can skip read when writing beyond EOF
  afs: Fix afs_write_end() to handle short writes

273 files changed:
Documentation/riscv/vm-layout.rst
Documentation/vm/slub.rst
MAINTAINERS
Makefile
arch/arc/include/uapi/asm/sigcontext.h
arch/arc/kernel/signal.c
arch/arc/kernel/vmlinux.lds.S
arch/arm/kernel/setup.c
arch/powerpc/include/asm/jump_label.h
arch/powerpc/kernel/signal_64.c
arch/powerpc/mm/mem.c
arch/powerpc/perf/core-book3s.c
arch/riscv/Kconfig.socs
arch/riscv/Makefile
arch/riscv/boot/dts/sifive/fu740-c000.dtsi
arch/riscv/include/asm/pgtable.h
arch/riscv/mm/kasan_init.c
arch/s390/kernel/entry.S
arch/x86/entry/common.c
arch/x86/events/intel/lbr.c
arch/x86/include/asm/fpu/internal.h
arch/x86/include/asm/page_64.h
arch/x86/kernel/cpu/sgx/virt.c
arch/x86/kernel/fpu/signal.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/retpoline.S
arch/x86/mm/ioremap.c
arch/x86/mm/numa.c
arch/x86/pci/fixup.c
arch/x86/xen/enlighten_pv.c
drivers/cpufreq/Kconfig.arm
drivers/cpufreq/cppc_cpufreq.c
drivers/dma/Kconfig
drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
drivers/dma/idxd/cdev.c
drivers/dma/idxd/init.c
drivers/dma/ipu/ipu_irq.c
drivers/dma/mediatek/mtk-uart-apdma.c
drivers/dma/pl330.c
drivers/dma/qcom/Kconfig
drivers/dma/sf-pdma/Kconfig
drivers/dma/sh/rcar-dmac.c
drivers/dma/ste_dma40.c
drivers/dma/stm32-mdma.c
drivers/dma/xilinx/xilinx_dpdma.c
drivers/dma/xilinx/zynqmp_dma.c
drivers/gpio/Kconfig
drivers/gpio/gpio-mxc.c
drivers/gpio/gpiolib-cdev.c
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
drivers/gpu/drm/kmb/kmb_drv.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_prime.c
drivers/gpu/drm/panel/panel-samsung-ld9040.c
drivers/gpu/drm/radeon/radeon_prime.c
drivers/gpu/drm/vc4/vc4_hdmi.c
drivers/irqchip/irq-gic-v3.c
drivers/mmc/host/meson-gx-mmc.c
drivers/net/caif/caif_serial.c
drivers/net/can/usb/mcba_usb.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/atheros/alx/main.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/ec_bhf.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/freescale/fec_ptp.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/lantiq_xrx200.c
drivers/net/ethernet/mellanox/mlx5/core/dev.c
drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mr.c
drivers/net/ethernet/mellanox/mlx5/core/rdma.c
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
drivers/net/ethernet/mellanox/mlx5/core/transobj.c
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/hamradio/mkiss.c
drivers/net/mhi/net.c
drivers/net/phy/dp83867.c
drivers/net/usb/cdc_eem.c
drivers/net/usb/cdc_ncm.c
drivers/net/usb/qmi_wwan.c
drivers/net/usb/r8152.c
drivers/net/usb/smsc75xx.c
drivers/net/vrf.c
drivers/net/wireless/mac80211_hwsim.c
drivers/pci/controller/dwc/Makefile
drivers/pci/controller/dwc/pcie-tegra194-acpi.c [new file with mode: 0644]
drivers/pci/controller/dwc/pcie-tegra194.c
drivers/pci/controller/pci-aardvark.c
drivers/pci/of.c
drivers/pci/pci.c
drivers/pci/quirks.c
drivers/ptp/ptp_clock.c
drivers/s390/crypto/ap_queue.c
drivers/spi/spi-nxp-fspi.c
drivers/spi/spi-tegra20-slink.c
drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
drivers/usb/chipidea/usbmisc_imx.c
drivers/usb/core/hub.c
drivers/usb/dwc3/core.c
fs/afs/main.c
fs/afs/write.c
fs/btrfs/block-group.c
fs/hugetlbfs/inode.c
fs/notify/fanotify/fanotify_user.c
fs/proc/base.c
include/linux/arch_topology.h
include/linux/debug_locks.h
include/linux/huge_mm.h
include/linux/hugetlb.h
include/linux/mlx5/driver.h
include/linux/mlx5/transobj.h
include/linux/mm.h
include/linux/ptp_clock_kernel.h
include/linux/rmap.h
include/linux/socket.h
include/linux/swapops.h
include/net/mac80211.h
include/net/net_namespace.h
include/net/sock.h
include/uapi/asm-generic/unistd.h
include/uapi/linux/in.h
kernel/bpf/verifier.c
kernel/crash_core.c
kernel/dma/swiotlb.c
kernel/locking/lockdep.c
kernel/module.c
kernel/printk/printk_safe.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/signal.c
kernel/trace/trace.c
kernel/trace/trace_clock.c
lib/debug_locks.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/memory-failure.c
mm/memory.c
mm/migrate.c
mm/page_vma_mapped.c
mm/pgtable-generic.c
mm/rmap.c
mm/slab_common.c
mm/slub.c
mm/sparse.c
mm/swapfile.c
mm/truncate.c
net/appletalk/aarp.c
net/batman-adv/bat_iv_ogm.c
net/bluetooth/smp.c
net/bridge/br_private.h
net/bridge/br_vlan_tunnel.c
net/can/bcm.c
net/can/isotp.c
net/can/j1939/transport.c
net/can/raw.c
net/core/neighbour.c
net/core/net_namespace.c
net/core/rtnetlink.c
net/core/skbuff.c
net/ethtool/eeprom.c
net/ethtool/ioctl.c
net/ethtool/strset.c
net/ipv4/af_inet.c
net/ipv4/cipso_ipv4.c
net/ipv4/devinet.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/ping.c
net/ipv4/route.c
net/ipv4/udp.c
net/ipv6/addrconf.c
net/ipv6/netfilter/nft_fib_ipv6.c
net/ipv6/udp.c
net/kcm/kcmsock.c
net/mac80211/debugfs.c
net/mac80211/ieee80211_i.h
net/mac80211/iface.c
net/mac80211/main.c
net/mac80211/mlme.c
net/mac80211/rc80211_minstrel_ht.c
net/mac80211/rx.c
net/mac80211/scan.c
net/mac80211/tx.c
net/mac80211/util.c
net/mptcp/options.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/nf_synproxy_core.c
net/netfilter/nf_tables_api.c
net/packet/af_packet.c
net/qrtr/qrtr.c
net/rds/recv.c
net/sched/act_ct.c
net/sched/sch_cake.c
net/socket.c
net/unix/af_unix.c
net/wireless/Makefile
net/wireless/core.c
net/wireless/pmsr.c
net/wireless/sysfs.c
net/wireless/util.c
scripts/recordmcount.h
sound/soc/codecs/rt5645.c
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/linux/in.h
tools/lib/bpf/xsk.c
tools/perf/tests/shell/stat_bpf_counters.sh
tools/perf/trace/beauty/include/linux/socket.h
tools/perf/util/machine.c
tools/perf/util/metricgroup.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/verifier/and.c
tools/testing/selftests/bpf/verifier/bounds.c
tools/testing/selftests/bpf/verifier/dead_code.c
tools/testing/selftests/bpf/verifier/jmp32.c
tools/testing/selftests/bpf/verifier/jset.c
tools/testing/selftests/bpf/verifier/unpriv.c
tools/testing/selftests/bpf/verifier/value_ptr_arith.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/test_util.c
tools/testing/selftests/net/fib_tests.sh
tools/testing/selftests/net/icmp.sh [new file with mode: 0755]
tools/testing/selftests/net/mptcp/mptcp_connect.sh
tools/testing/selftests/net/udpgro_fwd.sh
tools/testing/selftests/net/veth.sh
tools/testing/selftests/netfilter/Makefile
tools/testing/selftests/netfilter/nft_fib.sh [new file with mode: 0755]

index 329d320..b7f9893 100644 (file)
@@ -58,6 +58,6 @@ RISC-V Linux Kernel SV39
                                                               |
   ____________________________________________________________|____________________________________________________________
                     |            |                  |         |
-   ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules
-   ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel, BPF
+   ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules, BPF
+   ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel
   __________________|____________|__________________|_________|____________________________________________________________
index 03f294a..d302855 100644 (file)
@@ -181,7 +181,7 @@ SLUB Debug output
 Here is a sample of slub debug output::
 
  ====================================================================
- BUG kmalloc-8: Redzone overwritten
+ BUG kmalloc-8: Right Redzone overwritten
  --------------------------------------------------------------------
 
  INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
@@ -189,10 +189,10 @@ Here is a sample of slub debug output::
  INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
  INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
 
- Bytes b4 0xc90f6d10:  00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
  Object 0xc90f6d20:  31 30 31 39 2e 30 30 35                         1019.005
 Redzone 0xc90f6d28:  00 cc cc cc                                     .
 Padding 0xc90f6d50:  5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
+ Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
Object   (0xc90f6d20): 31 30 31 39 2e 30 30 35                         1019.005
Redzone  (0xc90f6d28): 00 cc cc cc                                     .
Padding  (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
 
    [<c010523d>] dump_trace+0x63/0x1eb
    [<c01053df>] show_trace_log_lvl+0x1a/0x2f
index bc0ceef..b3b9a25 100644 (file)
@@ -7354,7 +7354,6 @@ F:        drivers/net/ethernet/freescale/fs_enet/
 F:     include/linux/fs_enet_pd.h
 
 FREESCALE SOC SOUND DRIVERS
-M:     Timur Tabi <timur@kernel.org>
 M:     Nicolin Chen <nicoleotsuka@gmail.com>
 M:     Xiubo Li <Xiubo.Lee@gmail.com>
 R:     Fabio Estevam <festevam@gmail.com>
@@ -16560,6 +16559,7 @@ F:      drivers/misc/sgi-xp/
 
 SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
 M:     Karsten Graul <kgraul@linux.ibm.com>
+M:     Guvenc Gulce <guvenc@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 S:     Supported
 W:     http://www.ibm.com/developerworks/linux/linux390/
index ed669b2..3e8dbe6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
-NAME = Frozen Wasteland
+EXTRAVERSION = -rc7
+NAME = Opossums on Parade
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
@@ -929,11 +929,14 @@ CC_FLAGS_LTO      += -fvisibility=hidden
 # Limit inlining across translation units to reduce binary size
 KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
 
-# Check for frame size exceeding threshold during prolog/epilog insertion.
+# Check for frame size exceeding threshold during prolog/epilog insertion
+# when using lld < 13.0.0.
 ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
 KBUILD_LDFLAGS += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN)
 endif
 endif
+endif
 
 ifdef CONFIG_LTO
 KBUILD_CFLAGS  += -fno-lto $(CC_FLAGS_LTO)
index 95f8a43..7a5449d 100644 (file)
@@ -18,6 +18,7 @@
  */
 struct sigcontext {
        struct user_regs_struct regs;
+       struct user_regs_arcv2 v2abi;
 };
 
 #endif /* _ASM_ARC_SIGCONTEXT_H */
index b3ccb9e..cb2f885 100644 (file)
@@ -61,6 +61,41 @@ struct rt_sigframe {
        unsigned int sigret_magic;
 };
 
+static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+       int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+       struct user_regs_arcv2 v2abi;
+
+       v2abi.r30 = regs->r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+       v2abi.r58 = regs->r58;
+       v2abi.r59 = regs->r59;
+#else
+       v2abi.r58 = v2abi.r59 = 0;
+#endif
+       err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi));
+#endif
+       return err;
+}
+
+static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+       int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+       struct user_regs_arcv2 v2abi;
+
+       err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi));
+
+       regs->r30 = v2abi.r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+       regs->r58 = v2abi.r58;
+       regs->r59 = v2abi.r59;
+#endif
+#endif
+       return err;
+}
+
 static int
 stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
               sigset_t *set)
@@ -94,6 +129,10 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
 
        err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
                             sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+       if (is_isa_arcv2())
+               err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
        err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
 
        return err ? -EFAULT : 0;
@@ -109,6 +148,10 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
        err |= __copy_from_user(&uregs.scratch,
                                &(sf->uc.uc_mcontext.regs.scratch),
                                sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+       if (is_isa_arcv2())
+               err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
        if (err)
                return -EFAULT;
 
index 33ce59d..e2146a8 100644 (file)
@@ -57,7 +57,6 @@ SECTIONS
        .init.ramfs : { INIT_RAM_FS }
 
        . = ALIGN(PAGE_SIZE);
-       _stext = .;
 
        HEAD_TEXT_SECTION
        INIT_TEXT_SECTION(L1_CACHE_BYTES)
@@ -83,6 +82,7 @@ SECTIONS
 
        .text : {
                _text = .;
+               _stext = .;
                TEXT_TEXT
                SCHED_TEXT
                CPUIDLE_TEXT
index 1a5edf5..73ca779 100644 (file)
@@ -545,9 +545,11 @@ void notrace cpu_init(void)
         * In Thumb-2, msr with an immediate value is not allowed.
         */
 #ifdef CONFIG_THUMB2_KERNEL
-#define PLC    "r"
+#define PLC_l  "l"
+#define PLC_r  "r"
 #else
-#define PLC    "I"
+#define PLC_l  "I"
+#define PLC_r  "I"
 #endif
 
        /*
@@ -569,15 +571,15 @@ void notrace cpu_init(void)
        "msr    cpsr_c, %9"
            :
            : "r" (stk),
-             PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
+             PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
              "I" (offsetof(struct stack, irq[0])),
-             PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
+             PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
              "I" (offsetof(struct stack, abt[0])),
-             PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
+             PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE),
              "I" (offsetof(struct stack, und[0])),
-             PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
+             PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
              "I" (offsetof(struct stack, fiq[0])),
-             PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
+             PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
            : "r14");
 #endif
 }
index 2d5c6be..93ce3ec 100644 (file)
@@ -50,7 +50,7 @@ l_yes:
 1098:  nop;                                    \
        .pushsection __jump_table, "aw";        \
        .long 1098b - ., LABEL - .;             \
-       FTR_ENTRY_LONG KEY;                     \
+       FTR_ENTRY_LONG KEY - .;                 \
        .popsection
 #endif
 
index dca6648..f9e1f54 100644 (file)
@@ -902,6 +902,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
        unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
        user_write_access_end();
 
+       /* Save the siginfo outside of the unsafe block. */
+       if (copy_siginfo_to_user(&frame->info, &ksig->info))
+               goto badframe;
+
        /* Make sure signal handler doesn't get spurious FP exceptions */
        tsk->thread.fp_state.fpscr = 0;
 
@@ -915,11 +919,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
                regs->nip = (unsigned long) &frame->tramp[0];
        }
 
-
-       /* Save the siginfo outside of the unsafe block. */
-       if (copy_siginfo_to_user(&frame->info, &ksig->info))
-               goto badframe;
-
        /* Allocate a dummy caller frame for the signal handler. */
        newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
        err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
index 043bbea..a6b36a4 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/machdep.h>
 #include <asm/rtas.h>
 #include <asm/kasan.h>
+#include <asm/sparsemem.h>
 #include <asm/svm.h>
 
 #include <mm/mmu_decl.h>
index 16d4d1b..5162241 100644 (file)
@@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs)
        bool use_siar = regs_use_siar(regs);
        unsigned long siar = mfspr(SPRN_SIAR);
 
-       if (ppmu->flags & PPMU_P10_DD1) {
+       if (ppmu && (ppmu->flags & PPMU_P10_DD1)) {
                if (siar)
                        return siar;
                else
index ed96376..30676eb 100644 (file)
@@ -14,6 +14,7 @@ config SOC_SIFIVE
        select CLK_SIFIVE
        select CLK_SIFIVE_PRCI
        select SIFIVE_PLIC
+       select RISCV_ERRATA_ALTERNATIVE
        select ERRATA_SIFIVE
        help
          This enables support for SiFive SoC platform hardware.
index 4be0206..99ecd8b 100644 (file)
@@ -16,7 +16,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
        CC_FLAGS_FTRACE := -fpatchable-function-entry=8
 endif
 
-ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy)
+ifeq ($(CONFIG_CMODEL_MEDLOW),y)
 KBUILD_CFLAGS_MODULE += -mcmodel=medany
 endif
 
index 8eef82e..abbb960 100644 (file)
                        cache-size = <2097152>;
                        cache-unified;
                        interrupt-parent = <&plic0>;
-                       interrupts = <19 20 21 22>;
+                       interrupts = <19 21 22 20>;
                        reg = <0x0 0x2010000 0x0 0x1000>;
                };
                gpio: gpio@10060000 {
index 9469f46..380cd3a 100644 (file)
@@ -30,9 +30,8 @@
 
 #define BPF_JIT_REGION_SIZE    (SZ_128M)
 #ifdef CONFIG_64BIT
-/* KASLR should leave at least 128MB for BPF after the kernel */
-#define BPF_JIT_REGION_START   PFN_ALIGN((unsigned long)&_end)
-#define BPF_JIT_REGION_END     (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_START   (BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_END     (MODULES_END)
 #else
 #define BPF_JIT_REGION_START   (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
 #define BPF_JIT_REGION_END     (VMALLOC_END)
index 9daacae..d7189c8 100644 (file)
@@ -169,7 +169,7 @@ static void __init kasan_shallow_populate(void *start, void *end)
 
 void __init kasan_init(void)
 {
-       phys_addr_t _start, _end;
+       phys_addr_t p_start, p_end;
        u64 i;
 
        /*
@@ -189,9 +189,9 @@ void __init kasan_init(void)
                        (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
 
        /* Populate the linear mapping */
-       for_each_mem_range(i, &_start, &_end) {
-               void *start = (void *)__va(_start);
-               void *end = (void *)__va(_end);
+       for_each_mem_range(i, &p_start, &p_end) {
+               void *start = (void *)__va(p_start);
+               void *end = (void *)__va(p_end);
 
                if (start >= end)
                        break;
@@ -201,7 +201,7 @@ void __init kasan_init(void)
 
        /* Populate kernel, BPF, modules mapping */
        kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR),
-                      kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END));
+                      kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G));
 
        for (i = 0; i < PTRS_PER_PTE; i++)
                set_pte(&kasan_early_shadow_pte[i],
index 12de7a9..9cc71ca 100644 (file)
@@ -651,9 +651,9 @@ ENDPROC(stack_overflow)
 .Lcleanup_sie_mcck:
        larl    %r13,.Lsie_entry
        slgr    %r9,%r13
-       larl    %r13,.Lsie_skip
+       lghi    %r13,.Lsie_skip - .Lsie_entry
        clgr    %r9,%r13
-       jh      .Lcleanup_sie_int
+       jhe     .Lcleanup_sie_int
        oi      __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
 .Lcleanup_sie_int:
        BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
index 7b2542b..04bce95 100644 (file)
@@ -130,8 +130,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
                /* User code screwed up. */
                regs->ax = -EFAULT;
 
-               instrumentation_end();
                local_irq_disable();
+               instrumentation_end();
                irqentry_exit_to_user_mode(regs);
                return false;
        }
@@ -269,15 +269,16 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
        irqentry_state_t state = irqentry_enter(regs);
        bool inhcall;
 
+       instrumentation_begin();
        run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
 
        inhcall = get_and_clear_inhcall();
        if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
-               instrumentation_begin();
                irqentry_exit_cond_resched();
                instrumentation_end();
                restore_inhcall(inhcall);
        } else {
+               instrumentation_end();
                irqentry_exit(regs, state);
        }
 }
index 4409d2c..e8453de 100644 (file)
@@ -731,7 +731,8 @@ void reserve_lbr_buffers(void)
                if (!kmem_cache || cpuc->lbr_xsave)
                        continue;
 
-               cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL,
+               cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
+                                                       GFP_KERNEL | __GFP_ZERO,
                                                        cpu_to_node(cpu));
        }
 }
index ceeba9f..fdee23e 100644 (file)
@@ -578,10 +578,17 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
         * PKRU state is switched eagerly because it needs to be valid before we
         * return to userland e.g. for a copy_to_user() operation.
         */
-       if (current->mm) {
+       if (!(current->flags & PF_KTHREAD)) {
+               /*
+                * If the PKRU bit in xsave.header.xfeatures is not set,
+                * then the PKRU component was in init state, which means
+                * XRSTOR will set PKRU to 0. If the bit is not set then
+                * get_xsave_addr() will return NULL because the PKRU value
+                * in memory is not valid. This means pkru_val has to be
+                * set to 0 and not to init_pkru_value.
+                */
                pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
-               if (pk)
-                       pkru_val = pk->pkru;
+               pkru_val = pk ? pk->pkru : 0;
        }
        __write_pkru(pkru_val);
 }
index ca840fe..4bde0dc 100644 (file)
@@ -75,7 +75,7 @@ void copy_page(void *to, void *from);
  *
  * With page table isolation enabled, we map the LDT in ... [stay tuned]
  */
-static inline unsigned long task_size_max(void)
+static __always_inline unsigned long task_size_max(void)
 {
        unsigned long ret;
 
index 6ad165a..64511c4 100644 (file)
@@ -212,6 +212,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
                list_splice_tail(&secs_pages, &zombie_secs_pages);
        mutex_unlock(&zombie_secs_pages_lock);
 
+       xa_destroy(&vepc->page_array);
        kfree(vepc);
 
        return 0;
index a4ec653..ec3ae30 100644 (file)
@@ -307,13 +307,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                return 0;
        }
 
-       if (!access_ok(buf, size))
-               return -EACCES;
+       if (!access_ok(buf, size)) {
+               ret = -EACCES;
+               goto out;
+       }
 
-       if (!static_cpu_has(X86_FEATURE_FPU))
-               return fpregs_soft_set(current, NULL,
-                                      0, sizeof(struct user_i387_ia32_struct),
-                                      NULL, buf) != 0;
+       if (!static_cpu_has(X86_FEATURE_FPU)) {
+               ret = fpregs_soft_set(current, NULL, 0,
+                                     sizeof(struct user_i387_ia32_struct),
+                                     NULL, buf);
+               goto out;
+       }
 
        if (use_xsave()) {
                struct _fpx_sw_bytes fx_sw_user;
@@ -369,6 +373,25 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                        fpregs_unlock();
                        return 0;
                }
+
+               /*
+                * The above did an FPU restore operation, restricted to
+                * the user portion of the registers, and failed, but the
+                * microcode might have modified the FPU registers
+                * nevertheless.
+                *
+                * If the FPU registers do not belong to current, then
+                * invalidate the FPU register state otherwise the task might
+                * preempt current and return to user space with corrupted
+                * FPU registers.
+                *
+                * In case current owns the FPU registers then no further
+                * action is required. The fixup below will handle it
+                * correctly.
+                */
+               if (test_thread_flag(TIF_NEED_FPU_LOAD))
+                       __cpu_invalidate_fpregs_state();
+
                fpregs_unlock();
        } else {
                /*
@@ -377,7 +400,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                 */
                ret = __copy_from_user(&env, buf, sizeof(env));
                if (ret)
-                       goto err_out;
+                       goto out;
                envp = &env;
        }
 
@@ -405,16 +428,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
        if (use_xsave() && !fx_only) {
                u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
 
-               if (using_compacted_format()) {
-                       ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
-               } else {
-                       ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
-
-                       if (!ret && state_size > offsetof(struct xregs_state, header))
-                               ret = validate_user_xstate_header(&fpu->state.xsave.header);
-               }
+               ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
                if (ret)
-                       goto err_out;
+                       goto out;
 
                sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
                                              fx_only);
@@ -434,7 +450,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
                if (ret) {
                        ret = -EFAULT;
-                       goto err_out;
+                       goto out;
                }
 
                sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
@@ -452,7 +468,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
        } else {
                ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size);
                if (ret)
-                       goto err_out;
+                       goto out;
 
                fpregs_lock();
                ret = copy_kernel_to_fregs_err(&fpu->state.fsave);
@@ -463,7 +479,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                fpregs_deactivate(fpu);
        fpregs_unlock();
 
-err_out:
+out:
        if (ret)
                fpu__clear_user_states(fpu);
        return ret;
index 9a48f13..b4da665 100644 (file)
@@ -655,6 +655,7 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
                if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
                        entry->ecx = F(RDPID);
                ++array->nent;
+               break;
        default:
                break;
        }
index 6d72d8f..17fa4ab 100644 (file)
@@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
        if (!apic_x2apic_mode(apic))
                valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
 
+       if (alignment + len > 4)
+               return 1;
+
        if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
                return 1;
 
index 0144c40..8d5876d 100644 (file)
@@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
        context->inject_page_fault = kvm_inject_page_fault;
 }
 
+static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
+{
+       union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false);
+
+       /*
+        * Nested MMUs are used only for walking L2's gva->gpa, they never have
+        * shadow pages of their own and so "direct" has no meaning.   Set it
+        * to "true" to try to detect bogus usage of the nested MMU.
+        */
+       role.base.direct = true;
+
+       if (!is_paging(vcpu))
+               role.base.level = 0;
+       else if (is_long_mode(vcpu))
+               role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL :
+                                                      PT64_ROOT_4LEVEL;
+       else if (is_pae(vcpu))
+               role.base.level = PT32E_ROOT_LEVEL;
+       else
+               role.base.level = PT32_ROOT_LEVEL;
+
+       return role;
+}
+
 static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 {
-       union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
+       union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu);
        struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
 
        if (new_role.as_u64 == g_context->mmu_role.as_u64)
index 0e62e6a..5e7e920 100644 (file)
@@ -221,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
        return &avic_physical_id_table[index];
 }
 
-/**
+/*
  * Note:
  * AVIC hardware walks the nested page table to check permissions,
  * but does not use the SPA address specified in the leaf page
@@ -764,7 +764,7 @@ out:
        return ret;
 }
 
-/**
+/*
  * Note:
  * The HW cannot support posting multicast/broadcast
  * interrupts to a vCPU. So, we still use legacy interrupt
@@ -1005,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 }
 
-/**
+/*
  * This function is called during VCPU halt/unhalt.
  */
 static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
index e0ce5da..8d36f0c 100644 (file)
@@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev)
        sev->misc_cg = NULL;
 }
 
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
 {
        struct sev_data_decommission decommission;
+
+       if (!handle)
+               return;
+
+       decommission.handle = handle;
+       sev_guest_decommission(&decommission, NULL);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
        struct sev_data_deactivate deactivate;
 
        if (!handle)
@@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
        sev_guest_deactivate(&deactivate, NULL);
        up_read(&sev_deactivate_lock);
 
-       /* decommission handle */
-       decommission.handle = handle;
-       sev_guest_decommission(&decommission, NULL);
+       sev_decommission(handle);
 }
 
 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
 
        /* Bind ASID to this guest */
        ret = sev_bind_asid(kvm, start.handle, error);
-       if (ret)
+       if (ret) {
+               sev_decommission(start.handle);
                goto e_free_session;
+       }
 
        /* return handle to userspace */
        params.handle = start.handle;
index 50b42d7..c2a779b 100644 (file)
@@ -6247,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
        switch (kvm_get_apic_mode(vcpu)) {
        case LAPIC_MODE_INVALID:
                WARN_ONCE(true, "Invalid local APIC state");
+               break;
        case LAPIC_MODE_DISABLED:
                break;
        case LAPIC_MODE_XAPIC:
index 6d3955a..e0f4a46 100644 (file)
@@ -7106,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
 
 static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
 {
-       emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+       vcpu->arch.hflags = emul_flags;
+       kvm_mmu_reset_context(vcpu);
 }
 
 static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
@@ -8258,6 +8261,7 @@ void kvm_arch_exit(void)
        kvm_x86_ops.hardware_enable = NULL;
        kvm_mmu_module_exit();
        free_percpu(user_return_msrs);
+       kmem_cache_destroy(x86_emulator_cache);
        kmem_cache_destroy(x86_fpu_cache);
 #ifdef CONFIG_KVM_XEN
        static_key_deferred_flush(&kvm_xen_enabled);
index 4d32cb0..ec9922c 100644 (file)
@@ -58,12 +58,16 @@ SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
 2:     .skip   5-(2b-1b), 0x90
 SYM_FUNC_END(__x86_indirect_alt_call_\reg)
 
+STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg)
+
 SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
        ANNOTATE_RETPOLINE_SAFE
 1:     jmp     *%\reg
 2:     .skip   5-(2b-1b), 0x90
 SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
 
+STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
+
 .endm
 
 /*
index 12c686c..60ade7d 100644 (file)
@@ -118,7 +118,9 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des
        if (!IS_ENABLED(CONFIG_EFI))
                return;
 
-       if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
+       if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
+           (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
+            efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
                desc->flags |= IORES_MAP_ENCRYPTED;
 }
 
index 5eb4dc2..e94da74 100644 (file)
@@ -254,7 +254,13 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 
                /* make sure all non-reserved blocks are inside the limits */
                bi->start = max(bi->start, low);
-               bi->end = min(bi->end, high);
+
+               /* preserve info for non-RAM areas above 'max_pfn': */
+               if (bi->end > high) {
+                       numa_add_memblk_to(bi->nid, high, bi->end,
+                                          &numa_reserved_meminfo);
+                       bi->end = high;
+               }
 
                /* and there's no empty block */
                if (bi->start >= bi->end)
index 02dc646..2edd866 100644 (file)
@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
 
+#define RS690_LOWER_TOP_OF_DRAM2       0x30
+#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1
+#define RS690_UPPER_TOP_OF_DRAM2       0x31
+#define RS690_HTIU_NB_INDEX            0xA8
+#define RS690_HTIU_NB_INDEX_WR_ENABLE  0x100
+#define RS690_HTIU_NB_DATA             0xAC
+
+/*
+ * Some BIOS implementations support RAM above 4GB, but do not configure the
+ * PCI host to respond to bus master accesses for these addresses. These
+ * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA
+ * works as expected for addresses below 4GB.
+ *
+ * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57)
+ * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf
+ */
+static void rs690_fix_64bit_dma(struct pci_dev *pdev)
+{
+       u32 val = 0;
+       phys_addr_t top_of_dram = __pa(high_memory - 1) + 1;
+
+       if (top_of_dram <= (1ULL << 32))
+               return;
+
+       pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+                               RS690_LOWER_TOP_OF_DRAM2);
+       pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val);
+
+       if (val)
+               return;
+
+       pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram);
+
+       pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+               RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+       pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32);
+
+       pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+               RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+       pci_write_config_dword(pdev, RS690_HTIU_NB_DATA,
+               top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+
 #endif
index e87699a..0314942 100644 (file)
@@ -592,8 +592,10 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
 DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
 {
        /* This should never happen and there is no way to handle it. */
+       instrumentation_begin();
        pr_err("Unknown trap in Xen PV mode.");
        BUG();
+       instrumentation_end();
 }
 
 #ifdef CONFIG_X86_MCE
index a5c5f70..e65e0a4 100644 (file)
@@ -19,16 +19,6 @@ config ACPI_CPPC_CPUFREQ
 
          If in doubt, say N.
 
-config ACPI_CPPC_CPUFREQ_FIE
-       bool "Frequency Invariance support for CPPC cpufreq driver"
-       depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY
-       default y
-       help
-         This extends frequency invariance support in the CPPC cpufreq driver,
-         by using CPPC delivered and reference performance counters.
-
-         If in doubt, say N.
-
 config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
        tristate "Allwinner nvmem based SUN50I CPUFreq driver"
        depends on ARCH_SUNXI
index 3848b4c..2f769b1 100644 (file)
 
 #define pr_fmt(fmt)    "CPPC Cpufreq:" fmt
 
-#include <linux/arch_topology.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/dmi.h>
-#include <linux/irq_work.h>
-#include <linux/kthread.h>
 #include <linux/time.h>
 #include <linux/vmalloc.h>
-#include <uapi/linux/sched/types.h>
 
 #include <asm/unaligned.h>
 
@@ -61,204 +57,6 @@ static struct cppc_workaround_oem_info wa_info[] = {
        }
 };
 
-#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
-
-/* Frequency invariance support */
-struct cppc_freq_invariance {
-       int cpu;
-       struct irq_work irq_work;
-       struct kthread_work work;
-       struct cppc_perf_fb_ctrs prev_perf_fb_ctrs;
-       struct cppc_cpudata *cpu_data;
-};
-
-static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
-static struct kthread_worker *kworker_fie;
-static bool fie_disabled;
-
-static struct cpufreq_driver cppc_cpufreq_driver;
-static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
-static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
-                                struct cppc_perf_fb_ctrs fb_ctrs_t0,
-                                struct cppc_perf_fb_ctrs fb_ctrs_t1);
-
-/**
- * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
- * @work: The work item.
- *
- * The CPPC driver register itself with the topology core to provide its own
- * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which
- * gets called by the scheduler on every tick.
- *
- * Note that the arch specific counters have higher priority than CPPC counters,
- * if available, though the CPPC driver doesn't need to have any special
- * handling for that.
- *
- * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we
- * reach here from hard-irq context), which then schedules a normal work item
- * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable
- * based on the counter updates since the last tick.
- */
-static void cppc_scale_freq_workfn(struct kthread_work *work)
-{
-       struct cppc_freq_invariance *cppc_fi;
-       struct cppc_perf_fb_ctrs fb_ctrs = {0};
-       struct cppc_cpudata *cpu_data;
-       unsigned long local_freq_scale;
-       u64 perf;
-
-       cppc_fi = container_of(work, struct cppc_freq_invariance, work);
-       cpu_data = cppc_fi->cpu_data;
-
-       if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) {
-               pr_warn("%s: failed to read perf counters\n", __func__);
-               return;
-       }
-
-       cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
-       perf = cppc_perf_from_fbctrs(cpu_data, cppc_fi->prev_perf_fb_ctrs,
-                                    fb_ctrs);
-
-       perf <<= SCHED_CAPACITY_SHIFT;
-       local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf);
-       if (WARN_ON(local_freq_scale > 1024))
-               local_freq_scale = 1024;
-
-       per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale;
-}
-
-static void cppc_irq_work(struct irq_work *irq_work)
-{
-       struct cppc_freq_invariance *cppc_fi;
-
-       cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work);
-       kthread_queue_work(kworker_fie, &cppc_fi->work);
-}
-
-static void cppc_scale_freq_tick(void)
-{
-       struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id());
-
-       /*
-        * cppc_get_perf_ctrs() can potentially sleep, call that from the right
-        * context.
-        */
-       irq_work_queue(&cppc_fi->irq_work);
-}
-
-static struct scale_freq_data cppc_sftd = {
-       .source = SCALE_FREQ_SOURCE_CPPC,
-       .set_freq_scale = cppc_scale_freq_tick,
-};
-
-static void cppc_freq_invariance_policy_init(struct cpufreq_policy *policy,
-                                            struct cppc_cpudata *cpu_data)
-{
-       struct cppc_perf_fb_ctrs fb_ctrs = {0};
-       struct cppc_freq_invariance *cppc_fi;
-       int i, ret;
-
-       if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
-               return;
-
-       if (fie_disabled)
-               return;
-
-       for_each_cpu(i, policy->cpus) {
-               cppc_fi = &per_cpu(cppc_freq_inv, i);
-               cppc_fi->cpu = i;
-               cppc_fi->cpu_data = cpu_data;
-               kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn);
-               init_irq_work(&cppc_fi->irq_work, cppc_irq_work);
-
-               ret = cppc_get_perf_ctrs(i, &fb_ctrs);
-               if (ret) {
-                       pr_warn("%s: failed to read perf counters: %d\n",
-                               __func__, ret);
-                       fie_disabled = true;
-               } else {
-                       cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
-               }
-       }
-}
-
-static void __init cppc_freq_invariance_init(void)
-{
-       struct sched_attr attr = {
-               .size           = sizeof(struct sched_attr),
-               .sched_policy   = SCHED_DEADLINE,
-               .sched_nice     = 0,
-               .sched_priority = 0,
-               /*
-                * Fake (unused) bandwidth; workaround to "fix"
-                * priority inheritance.
-                */
-               .sched_runtime  = 1000000,
-               .sched_deadline = 10000000,
-               .sched_period   = 10000000,
-       };
-       int ret;
-
-       if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
-               return;
-
-       if (fie_disabled)
-               return;
-
-       kworker_fie = kthread_create_worker(0, "cppc_fie");
-       if (IS_ERR(kworker_fie))
-               return;
-
-       ret = sched_setattr_nocheck(kworker_fie->task, &attr);
-       if (ret) {
-               pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__,
-                       ret);
-               kthread_destroy_worker(kworker_fie);
-               return;
-       }
-
-       /* Register for freq-invariance */
-       topology_set_scale_freq_source(&cppc_sftd, cpu_present_mask);
-}
-
-static void cppc_freq_invariance_exit(void)
-{
-       struct cppc_freq_invariance *cppc_fi;
-       int i;
-
-       if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
-               return;
-
-       if (fie_disabled)
-               return;
-
-       topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpu_present_mask);
-
-       for_each_possible_cpu(i) {
-               cppc_fi = &per_cpu(cppc_freq_inv, i);
-               irq_work_sync(&cppc_fi->irq_work);
-       }
-
-       kthread_destroy_worker(kworker_fie);
-       kworker_fie = NULL;
-}
-
-#else
-static inline void
-cppc_freq_invariance_policy_init(struct cpufreq_policy *policy,
-                                struct cppc_cpudata *cpu_data)
-{
-}
-
-static inline void cppc_freq_invariance_init(void)
-{
-}
-
-static inline void cppc_freq_invariance_exit(void)
-{
-}
-#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */
-
 /* Callback function used to retrieve the max frequency from DMI */
 static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
 {
@@ -547,12 +345,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
        cpu_data->perf_ctrls.desired_perf =  caps->highest_perf;
 
        ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
-       if (ret) {
+       if (ret)
                pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
                         caps->highest_perf, cpu, ret);
-       } else {
-               cppc_freq_invariance_policy_init(policy, cpu_data);
-       }
 
        return ret;
 }
@@ -565,12 +360,12 @@ static inline u64 get_delta(u64 t1, u64 t0)
        return (u32)t1 - (u32)t0;
 }
 
-static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
-                                struct cppc_perf_fb_ctrs fb_ctrs_t0,
-                                struct cppc_perf_fb_ctrs fb_ctrs_t1)
+static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
+                                    struct cppc_perf_fb_ctrs fb_ctrs_t0,
+                                    struct cppc_perf_fb_ctrs fb_ctrs_t1)
 {
        u64 delta_reference, delta_delivered;
-       u64 reference_perf;
+       u64 reference_perf, delivered_perf;
 
        reference_perf = fb_ctrs_t0.reference_perf;
 
@@ -579,21 +374,12 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
        delta_delivered = get_delta(fb_ctrs_t1.delivered,
                                    fb_ctrs_t0.delivered);
 
-       /* Check to avoid divide-by zero and invalid delivered_perf */
-       if (!delta_reference || !delta_delivered)
-               return cpu_data->perf_ctrls.desired_perf;
-
-       return (reference_perf * delta_delivered) / delta_reference;
-}
-
-static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
-                                    struct cppc_perf_fb_ctrs fb_ctrs_t0,
-                                    struct cppc_perf_fb_ctrs fb_ctrs_t1)
-{
-       u64 delivered_perf;
-
-       delivered_perf = cppc_perf_from_fbctrs(cpu_data, fb_ctrs_t0,
-                                              fb_ctrs_t1);
+       /* Check to avoid divide-by zero */
+       if (delta_reference || delta_delivered)
+               delivered_perf = (reference_perf * delta_delivered) /
+                                       delta_reference;
+       else
+               delivered_perf = cpu_data->perf_ctrls.desired_perf;
 
        return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
 }
@@ -718,8 +504,6 @@ static void cppc_check_hisi_workaround(void)
 
 static int __init cppc_cpufreq_init(void)
 {
-       int ret;
-
        if ((acpi_disabled) || !acpi_cpc_valid())
                return -ENODEV;
 
@@ -727,11 +511,7 @@ static int __init cppc_cpufreq_init(void)
 
        cppc_check_hisi_workaround();
 
-       ret = cpufreq_register_driver(&cppc_cpufreq_driver);
-       if (!ret)
-               cppc_freq_invariance_init();
-
-       return ret;
+       return cpufreq_register_driver(&cppc_cpufreq_driver);
 }
 
 static inline void free_cpu_data(void)
@@ -748,7 +528,6 @@ static inline void free_cpu_data(void)
 
 static void __exit cppc_cpufreq_exit(void)
 {
-       cppc_freq_invariance_exit();
        cpufreq_unregister_driver(&cppc_cpufreq_driver);
 
        free_cpu_data();
index 6ab9d9a..39b5b46 100644 (file)
@@ -59,6 +59,7 @@ config DMA_OF
 #devices
 config ALTERA_MSGDMA
        tristate "Altera / Intel mSGDMA Engine"
+       depends on HAS_IOMEM
        select DMA_ENGINE
        help
          Enable support for Altera / Intel mSGDMA controller.
@@ -701,6 +702,7 @@ config XILINX_ZYNQMP_DMA
 
 config XILINX_ZYNQMP_DPDMA
        tristate "Xilinx DPDMA Engine"
+       depends on HAS_IOMEM && OF
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
index 4ec909e..4ae0579 100644 (file)
@@ -332,6 +332,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
        }
 
        if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) {
+               err = -EINVAL;
                dev_err(dev, "DPDMAI major version mismatch\n"
                             "Found %u.%u, supported version is %u.%u\n",
                                priv->dpdmai_attr.version.major,
@@ -341,6 +342,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
        }
 
        if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) {
+               err = -EINVAL;
                dev_err(dev, "DPDMAI minor version mismatch\n"
                             "Found %u.%u, supported version is %u.%u\n",
                                priv->dpdmai_attr.version.major,
@@ -475,6 +477,7 @@ static int __cold dpaa2_qdma_dpio_setup(struct dpaa2_qdma_priv *priv)
                ppriv->store =
                        dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev);
                if (!ppriv->store) {
+                       err = -ENOMEM;
                        dev_err(dev, "dpaa2_io_store_create() failed\n");
                        goto err_store;
                }
index 302cba5..d4419bf 100644 (file)
@@ -110,6 +110,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
                pasid = iommu_sva_get_pasid(sva);
                if (pasid == IOMMU_PASID_INVALID) {
                        iommu_sva_unbind_device(sva);
+                       rc = -EINVAL;
                        goto failed;
                }
 
index 776fd44..442d55c 100644 (file)
@@ -168,6 +168,32 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
        return rc;
 }
 
+static void idxd_cleanup_interrupts(struct idxd_device *idxd)
+{
+       struct pci_dev *pdev = idxd->pdev;
+       struct idxd_irq_entry *irq_entry;
+       int i, msixcnt;
+
+       msixcnt = pci_msix_vec_count(pdev);
+       if (msixcnt <= 0)
+               return;
+
+       irq_entry = &idxd->irq_entries[0];
+       free_irq(irq_entry->vector, irq_entry);
+
+       for (i = 1; i < msixcnt; i++) {
+
+               irq_entry = &idxd->irq_entries[i];
+               if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))
+                       idxd_device_release_int_handle(idxd, idxd->int_handles[i],
+                                                      IDXD_IRQ_MSIX);
+               free_irq(irq_entry->vector, irq_entry);
+       }
+
+       idxd_mask_error_interrupts(idxd);
+       pci_free_irq_vectors(pdev);
+}
+
 static int idxd_setup_wqs(struct idxd_device *idxd)
 {
        struct device *dev = &idxd->pdev->dev;
@@ -242,6 +268,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
                engine->idxd = idxd;
                device_initialize(&engine->conf_dev);
                engine->conf_dev.parent = &idxd->conf_dev;
+               engine->conf_dev.bus = &dsa_bus_type;
                engine->conf_dev.type = &idxd_engine_device_type;
                rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
                if (rc < 0) {
@@ -303,6 +330,19 @@ static int idxd_setup_groups(struct idxd_device *idxd)
        return rc;
 }
 
+static void idxd_cleanup_internals(struct idxd_device *idxd)
+{
+       int i;
+
+       for (i = 0; i < idxd->max_groups; i++)
+               put_device(&idxd->groups[i]->conf_dev);
+       for (i = 0; i < idxd->max_engines; i++)
+               put_device(&idxd->engines[i]->conf_dev);
+       for (i = 0; i < idxd->max_wqs; i++)
+               put_device(&idxd->wqs[i]->conf_dev);
+       destroy_workqueue(idxd->wq);
+}
+
 static int idxd_setup_internals(struct idxd_device *idxd)
 {
        struct device *dev = &idxd->pdev->dev;
@@ -531,12 +571,12 @@ static int idxd_probe(struct idxd_device *idxd)
                dev_dbg(dev, "Loading RO device config\n");
                rc = idxd_device_load_config(idxd);
                if (rc < 0)
-                       goto err;
+                       goto err_config;
        }
 
        rc = idxd_setup_interrupts(idxd);
        if (rc)
-               goto err;
+               goto err_config;
 
        dev_dbg(dev, "IDXD interrupt setup complete.\n");
 
@@ -549,6 +589,8 @@ static int idxd_probe(struct idxd_device *idxd)
        dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
        return 0;
 
+ err_config:
+       idxd_cleanup_internals(idxd);
  err:
        if (device_pasid_enabled(idxd))
                idxd_disable_system_pasid(idxd);
@@ -556,6 +598,18 @@ static int idxd_probe(struct idxd_device *idxd)
        return rc;
 }
 
+static void idxd_cleanup(struct idxd_device *idxd)
+{
+       struct device *dev = &idxd->pdev->dev;
+
+       perfmon_pmu_remove(idxd);
+       idxd_cleanup_interrupts(idxd);
+       idxd_cleanup_internals(idxd);
+       if (device_pasid_enabled(idxd))
+               idxd_disable_system_pasid(idxd);
+       iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+}
+
 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        struct device *dev = &pdev->dev;
@@ -608,7 +662,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        rc = idxd_register_devices(idxd);
        if (rc) {
                dev_err(dev, "IDXD sysfs setup failed\n");
-               goto err;
+               goto err_dev_register;
        }
 
        idxd->state = IDXD_DEV_CONF_READY;
@@ -618,6 +672,8 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        return 0;
 
+ err_dev_register:
+       idxd_cleanup(idxd);
  err:
        pci_iounmap(pdev, idxd->reg_base);
  err_iomap:
@@ -787,6 +843,7 @@ module_init(idxd_init_module);
 
 static void __exit idxd_exit_module(void)
 {
+       idxd_unregister_driver();
        pci_unregister_driver(&idxd_pci_driver);
        idxd_cdev_remove();
        idxd_unregister_bus_type();
index 0d5c42f..97d9a6f 100644 (file)
@@ -230,7 +230,7 @@ out:
 }
 
 /**
- * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * ipu_irq_unmap() - unmap an IPU interrupt source
  * @source:    interrupt source bit position (see ipu_irq_map())
  * @return:    0 or negative error code
  */
index 27c0735..375e7e6 100644 (file)
@@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg)
 
 static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd)
 {
-       struct dma_chan *chan = vd->tx.chan;
-       struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
-
-       kfree(c->desc);
+       kfree(container_of(vd, struct mtk_uart_apdma_desc, vd));
 }
 
 static void mtk_uart_apdma_start_tx(struct mtk_chan *c)
@@ -207,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c)
 
 static void mtk_uart_apdma_tx_handler(struct mtk_chan *c)
 {
-       struct mtk_uart_apdma_desc *d = c->desc;
-
        mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);
        mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
        mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
-
-       list_del(&d->vd.node);
-       vchan_cookie_complete(&d->vd);
 }
 
 static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
@@ -245,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
 
        c->rx_status = d->avail_len - cnt;
        mtk_uart_apdma_write(c, VFF_RPT, wg);
+}
 
-       list_del(&d->vd.node);
-       vchan_cookie_complete(&d->vd);
+static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c)
+{
+       struct mtk_uart_apdma_desc *d = c->desc;
+
+       if (d) {
+               list_del(&d->vd.node);
+               vchan_cookie_complete(&d->vd);
+               c->desc = NULL;
+       }
 }
 
 static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
@@ -261,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
                mtk_uart_apdma_rx_handler(c);
        else if (c->dir == DMA_MEM_TO_DEV)
                mtk_uart_apdma_tx_handler(c);
+       mtk_uart_apdma_chan_complete_handler(c);
        spin_unlock_irqrestore(&c->vc.lock, flags);
 
        return IRQ_HANDLED;
@@ -348,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg
                return NULL;
 
        /* Now allocate and setup the descriptor */
-       d = kzalloc(sizeof(*d), GFP_ATOMIC);
+       d = kzalloc(sizeof(*d), GFP_NOWAIT);
        if (!d)
                return NULL;
 
@@ -366,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan)
        unsigned long flags;
 
        spin_lock_irqsave(&c->vc.lock, flags);
-       if (vchan_issue_pending(&c->vc)) {
+       if (vchan_issue_pending(&c->vc) && !c->desc) {
                vd = vchan_next_desc(&c->vc);
                c->desc = to_mtk_uart_apdma_desc(&vd->tx);
 
index fd8d2bc..110de8a 100644 (file)
@@ -2694,13 +2694,15 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
        for (i = 0; i < len / period_len; i++) {
                desc = pl330_get_desc(pch);
                if (!desc) {
+                       unsigned long iflags;
+
                        dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
                                __func__, __LINE__);
 
                        if (!first)
                                return NULL;
 
-                       spin_lock_irqsave(&pl330->pool_lock, flags);
+                       spin_lock_irqsave(&pl330->pool_lock, iflags);
 
                        while (!list_empty(&first->node)) {
                                desc = list_entry(first->node.next,
@@ -2710,7 +2712,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 
                        list_move_tail(&first->node, &pl330->desc_pool);
 
-                       spin_unlock_irqrestore(&pl330->pool_lock, flags);
+                       spin_unlock_irqrestore(&pl330->pool_lock, iflags);
 
                        return NULL;
                }
index 365f94e..3f926a6 100644 (file)
@@ -33,6 +33,7 @@ config QCOM_GPI_DMA
 
 config QCOM_HIDMA_MGMT
        tristate "Qualcomm Technologies HIDMA Management support"
+       depends on HAS_IOMEM
        select DMA_ENGINE
        help
          Enable support for the Qualcomm Technologies HIDMA Management.
index f8ffa02..ba46a0a 100644 (file)
@@ -1,5 +1,6 @@
 config SF_PDMA
        tristate "Sifive PDMA controller driver"
+       depends on HAS_IOMEM
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
index d530c1b..6885b3d 100644 (file)
@@ -1913,7 +1913,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 
        /* Enable runtime PM and initialize the device. */
        pm_runtime_enable(&pdev->dev);
-       ret = pm_runtime_get_sync(&pdev->dev);
+       ret = pm_runtime_resume_and_get(&pdev->dev);
        if (ret < 0) {
                dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
                return ret;
index 265d7c0..e182739 100644 (file)
@@ -3675,6 +3675,9 @@ static int __init d40_probe(struct platform_device *pdev)
 
        kfree(base->lcla_pool.base_unaligned);
 
+       if (base->lcpa_base)
+               iounmap(base->lcpa_base);
+
        if (base->phy_lcpa)
                release_mem_region(base->phy_lcpa,
                                   base->lcpa_size);
index 36ba8b4..18cbd1e 100644 (file)
@@ -1452,7 +1452,7 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c)
                return -ENOMEM;
        }
 
-       ret = pm_runtime_get_sync(dmadev->ddev.dev);
+       ret = pm_runtime_resume_and_get(dmadev->ddev.dev);
        if (ret < 0)
                return ret;
 
@@ -1718,7 +1718,7 @@ static int stm32_mdma_pm_suspend(struct device *dev)
        u32 ccr, id;
        int ret;
 
-       ret = pm_runtime_get_sync(dev);
+       ret = pm_runtime_resume_and_get(dev);
        if (ret < 0)
                return ret;
 
index 70b29bd..6c70980 100644 (file)
 #define XILINX_DPDMA_CH_VDO                            0x020
 #define XILINX_DPDMA_CH_PYLD_SZ                                0x024
 #define XILINX_DPDMA_CH_DESC_ID                                0x028
+#define XILINX_DPDMA_CH_DESC_ID_MASK                   GENMASK(15, 0)
 
 /* DPDMA descriptor fields */
 #define XILINX_DPDMA_DESC_CONTROL_PREEMBLE             0xa5
@@ -866,7 +867,8 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan)
         * will be used, but it should be enough.
         */
        list_for_each_entry(sw_desc, &desc->descriptors, node)
-               sw_desc->hw.desc_id = desc->vdesc.tx.cookie;
+               sw_desc->hw.desc_id = desc->vdesc.tx.cookie
+                                   & XILINX_DPDMA_CH_DESC_ID_MASK;
 
        sw_desc = list_first_entry(&desc->descriptors,
                                   struct xilinx_dpdma_sw_desc, node);
@@ -1086,7 +1088,8 @@ static void xilinx_dpdma_chan_vsync_irq(struct  xilinx_dpdma_chan *chan)
        if (!chan->running || !pending)
                goto out;
 
-       desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID);
+       desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID)
+               & XILINX_DPDMA_CH_DESC_ID_MASK;
 
        /* If the retrigger raced with vsync, retry at the next frame. */
        sw_desc = list_first_entry(&pending->descriptors,
@@ -1459,7 +1462,7 @@ static void xilinx_dpdma_enable_irq(struct xilinx_dpdma_device *xdev)
  */
 static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev)
 {
-       dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ERR_ALL);
+       dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL);
        dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL);
 }
 
@@ -1596,6 +1599,26 @@ static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
        return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan);
 }
 
+static void dpdma_hw_init(struct xilinx_dpdma_device *xdev)
+{
+       unsigned int i;
+       void __iomem *reg;
+
+       /* Disable all interrupts */
+       xilinx_dpdma_disable_irq(xdev);
+
+       /* Stop all channels */
+       for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) {
+               reg = xdev->reg + XILINX_DPDMA_CH_BASE
+                               + XILINX_DPDMA_CH_OFFSET * i;
+               dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE);
+       }
+
+       /* Clear the interrupt status registers */
+       dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL);
+       dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL);
+}
+
 static int xilinx_dpdma_probe(struct platform_device *pdev)
 {
        struct xilinx_dpdma_device *xdev;
@@ -1622,6 +1645,8 @@ static int xilinx_dpdma_probe(struct platform_device *pdev)
        if (IS_ERR(xdev->reg))
                return PTR_ERR(xdev->reg);
 
+       dpdma_hw_init(xdev);
+
        xdev->irq = platform_get_irq(pdev, 0);
        if (xdev->irq < 0) {
                dev_err(xdev->dev, "failed to get platform irq\n");
index d841956..5fecf5a 100644 (file)
@@ -468,7 +468,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan)
        struct zynqmp_dma_desc_sw *desc;
        int i, ret;
 
-       ret = pm_runtime_get_sync(chan->dev);
+       ret = pm_runtime_resume_and_get(chan->dev);
        if (ret < 0)
                return ret;
 
index 1dd0ec6..3c69b78 100644 (file)
@@ -1383,6 +1383,7 @@ config GPIO_TPS68470
 config GPIO_TQMX86
        tristate "TQ-Systems QTMX86 GPIO"
        depends on MFD_TQMX86 || COMPILE_TEST
+       depends on HAS_IOPORT_MAP
        select GPIOLIB_IRQCHIP
        help
          This driver supports GPIO on the TQMX86 IO controller.
@@ -1450,6 +1451,7 @@ menu "PCI GPIO expanders"
 config GPIO_AMD8111
        tristate "AMD 8111 GPIO driver"
        depends on X86 || COMPILE_TEST
+       depends on HAS_IOPORT_MAP
        help
          The AMD 8111 south bridge contains 32 GPIO pins which can be used.
 
index 157106e..b9fdf05 100644 (file)
@@ -334,7 +334,7 @@ static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base)
        ct->chip.irq_unmask = irq_gc_mask_set_bit;
        ct->chip.irq_set_type = gpio_set_irq_type;
        ct->chip.irq_set_wake = gpio_set_wake_irq;
-       ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND;
+       ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND;
        ct->regs.ack = GPIO_ISR;
        ct->regs.mask = GPIO_IMR;
 
index 1631727..c7b5446 100644 (file)
@@ -1880,6 +1880,7 @@ static void gpio_v2_line_info_changed_to_v1(
                struct gpio_v2_line_info_changed *lic_v2,
                struct gpioline_info_changed *lic_v1)
 {
+       memset(lic_v1, 0, sizeof(*lic_v1));
        gpio_v2_line_info_to_v1(&lic_v2->info, &lic_v1->info);
        lic_v1->timestamp = lic_v2->timestamp_ns;
        lic_v1->event_type = lic_v2->event_type;
index c13985f..2a4cd7d 100644 (file)
@@ -1047,11 +1047,12 @@ int amdgpu_display_gem_fb_init(struct drm_device *dev,
 
        rfb->base.obj[0] = obj;
        drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
-       ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+
+       ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
        if (ret)
                goto err;
 
-       ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
+       ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
        if (ret)
                goto err;
 
@@ -1071,9 +1072,6 @@ int amdgpu_display_gem_fb_verify_and_init(
 
        rfb->base.obj[0] = obj;
        drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
-       ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
-       if (ret)
-               goto err;
        /* Verify that the modifier is supported. */
        if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format,
                                      mode_cmd->modifier[0])) {
@@ -1092,6 +1090,10 @@ int amdgpu_display_gem_fb_verify_and_init(
        if (ret)
                goto err;
 
+       ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+       if (ret)
+               goto err;
+
        return 0;
 err:
        drm_dbg_kms(dev, "Failed to verify and init gem fb: %d\n", ret);
index baa980a..37ec593 100644 (file)
@@ -214,9 +214,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
 {
        struct drm_gem_object *obj = attach->dmabuf->priv;
        struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+       int r;
 
        /* pin buffer into GTT */
-       return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+       r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+       if (r)
+               return r;
+
+       if (bo->tbo.moving) {
+               r = dma_fence_wait(bo->tbo.moving, true);
+               if (r) {
+                       amdgpu_bo_unpin(bo);
+                       return r;
+               }
+       }
+       return 0;
 }
 
 /**
index 05ad75d..cfe4fc6 100644 (file)
@@ -232,7 +232,6 @@ static void atmel_hlcdc_crtc_atomic_enable(struct drm_crtc *c,
 
        pm_runtime_put_sync(dev->dev);
 
-       drm_crtc_vblank_on(c);
 }
 
 #define ATMEL_HLCDC_RGB444_OUTPUT      BIT(0)
@@ -343,8 +342,17 @@ static int atmel_hlcdc_crtc_atomic_check(struct drm_crtc *c,
 
 static void atmel_hlcdc_crtc_atomic_begin(struct drm_crtc *c,
                                          struct drm_atomic_state *state)
+{
+       drm_crtc_vblank_on(c);
+}
+
+static void atmel_hlcdc_crtc_atomic_flush(struct drm_crtc *c,
+                                         struct drm_atomic_state *state)
 {
        struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c);
+       unsigned long flags;
+
+       spin_lock_irqsave(&c->dev->event_lock, flags);
 
        if (c->state->event) {
                c->state->event->pipe = drm_crtc_index(c);
@@ -354,12 +362,7 @@ static void atmel_hlcdc_crtc_atomic_begin(struct drm_crtc *c,
                crtc->event = c->state->event;
                c->state->event = NULL;
        }
-}
-
-static void atmel_hlcdc_crtc_atomic_flush(struct drm_crtc *crtc,
-                                         struct drm_atomic_state *state)
-{
-       /* TODO: write common plane control register if available */
+       spin_unlock_irqrestore(&c->dev->event_lock, flags);
 }
 
 static const struct drm_crtc_helper_funcs lcdc_crtc_helper_funcs = {
index 65af56e..f09b6dd 100644 (file)
@@ -593,6 +593,7 @@ static int atmel_hlcdc_dc_modeset_init(struct drm_device *dev)
        dev->mode_config.max_width = dc->desc->max_width;
        dev->mode_config.max_height = dc->desc->max_height;
        dev->mode_config.funcs = &mode_config_funcs;
+       dev->mode_config.async_page_flip = true;
 
        return 0;
 }
index f64e06e..96ea1a2 100644 (file)
@@ -137,6 +137,7 @@ static int kmb_hw_init(struct drm_device *drm, unsigned long flags)
        /* Allocate LCD interrupt resources */
        irq_lcd = platform_get_irq(pdev, 0);
        if (irq_lcd < 0) {
+               ret = irq_lcd;
                drm_err(&kmb->drm, "irq_lcd not found");
                goto setup_fail;
        }
index 3e09df0..170aba9 100644 (file)
@@ -546,7 +546,7 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
        struct ttm_tt *ttm_dma = (struct ttm_tt *)nvbo->bo.ttm;
        int i, j;
 
-       if (!ttm_dma)
+       if (!ttm_dma || !ttm_dma->dma_address)
                return;
        if (!ttm_dma->pages) {
                NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma);
@@ -582,7 +582,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
        struct ttm_tt *ttm_dma = (struct ttm_tt *)nvbo->bo.ttm;
        int i, j;
 
-       if (!ttm_dma)
+       if (!ttm_dma || !ttm_dma->dma_address)
                return;
        if (!ttm_dma->pages) {
                NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma);
index 3474886..60019d0 100644 (file)
@@ -93,7 +93,22 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj)
        if (ret)
                return -EINVAL;
 
-       return 0;
+       ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
+       if (ret)
+               goto error;
+
+       if (nvbo->bo.moving)
+               ret = dma_fence_wait(nvbo->bo.moving, true);
+
+       ttm_bo_unreserve(&nvbo->bo);
+       if (ret)
+               goto error;
+
+       return ret;
+
+error:
+       nouveau_bo_unpin(nvbo);
+       return ret;
 }
 
 void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
index f484147..c4b3888 100644 (file)
@@ -383,6 +383,7 @@ MODULE_DEVICE_TABLE(spi, ld9040_ids);
 static struct spi_driver ld9040_driver = {
        .probe = ld9040_probe,
        .remove = ld9040_remove,
+       .id_table = ld9040_ids,
        .driver = {
                .name = "panel-samsung-ld9040",
                .of_match_table = ld9040_of_match,
index 42a8794..4a90807 100644 (file)
@@ -77,9 +77,19 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)
 
        /* pin buffer into GTT */
        ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL);
-       if (likely(ret == 0))
-               bo->prime_shared_count++;
-
+       if (unlikely(ret))
+               goto error;
+
+       if (bo->tbo.moving) {
+               ret = dma_fence_wait(bo->tbo.moving, false);
+               if (unlikely(ret)) {
+                       radeon_bo_unpin(bo);
+                       goto error;
+               }
+       }
+
+       bo->prime_shared_count++;
+error:
        radeon_bo_unreserve(bo);
        return ret;
 }
index 1fda574..8106b56 100644 (file)
@@ -159,6 +159,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
        struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector);
        bool connected = false;
 
+       WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev));
+
        if (vc4_hdmi->hpd_gpio) {
                if (gpio_get_value_cansleep(vc4_hdmi->hpd_gpio) ^
                    vc4_hdmi->hpd_active_low)
@@ -180,10 +182,12 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
                        }
                }
 
+               pm_runtime_put(&vc4_hdmi->pdev->dev);
                return connector_status_connected;
        }
 
        cec_phys_addr_invalidate(vc4_hdmi->cec_adap);
+       pm_runtime_put(&vc4_hdmi->pdev->dev);
        return connector_status_disconnected;
 }
 
@@ -473,7 +477,6 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder,
                   HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE);
 
        clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock);
-       clk_disable_unprepare(vc4_hdmi->hsm_clock);
        clk_disable_unprepare(vc4_hdmi->pixel_clock);
 
        ret = pm_runtime_put(&vc4_hdmi->pdev->dev);
@@ -784,13 +787,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
                return;
        }
 
-       ret = clk_prepare_enable(vc4_hdmi->hsm_clock);
-       if (ret) {
-               DRM_ERROR("Failed to turn on HSM clock: %d\n", ret);
-               clk_disable_unprepare(vc4_hdmi->pixel_clock);
-               return;
-       }
-
        vc4_hdmi_cec_update_clk_div(vc4_hdmi);
 
        /*
@@ -801,7 +797,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
                               (hsm_rate > VC4_HSM_MID_CLOCK ? 150000000 : 75000000));
        if (ret) {
                DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret);
-               clk_disable_unprepare(vc4_hdmi->hsm_clock);
                clk_disable_unprepare(vc4_hdmi->pixel_clock);
                return;
        }
@@ -809,7 +804,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
        ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock);
        if (ret) {
                DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret);
-               clk_disable_unprepare(vc4_hdmi->hsm_clock);
                clk_disable_unprepare(vc4_hdmi->pixel_clock);
                return;
        }
@@ -1929,6 +1923,29 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi)
        return 0;
 }
 
+#ifdef CONFIG_PM
+static int vc4_hdmi_runtime_suspend(struct device *dev)
+{
+       struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
+
+       clk_disable_unprepare(vc4_hdmi->hsm_clock);
+
+       return 0;
+}
+
+static int vc4_hdmi_runtime_resume(struct device *dev)
+{
+       struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
+       int ret;
+
+       ret = clk_prepare_enable(vc4_hdmi->hsm_clock);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+#endif
+
 static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
        const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev);
@@ -2165,11 +2182,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = {
        {}
 };
 
+static const struct dev_pm_ops vc4_hdmi_pm_ops = {
+       SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend,
+                          vc4_hdmi_runtime_resume,
+                          NULL)
+};
+
 struct platform_driver vc4_hdmi_driver = {
        .probe = vc4_hdmi_dev_probe,
        .remove = vc4_hdmi_dev_remove,
        .driver = {
                .name = "vc4_hdmi",
                .of_match_table = vc4_hdmi_dt_match,
+               .pm = &vc4_hdmi_pm_ops,
        },
 };
index 37a23aa..66d623f 100644 (file)
@@ -642,11 +642,45 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
                nmi_exit();
 }
 
+static u32 do_read_iar(struct pt_regs *regs)
+{
+       u32 iar;
+
+       if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) {
+               u64 pmr;
+
+               /*
+                * We were in a context with IRQs disabled. However, the
+                * entry code has set PMR to a value that allows any
+                * interrupt to be acknowledged, and not just NMIs. This can
+                * lead to surprising effects if the NMI has been retired in
+                * the meantime, and that there is an IRQ pending. The IRQ
+                * would then be taken in NMI context, something that nobody
+                * wants to debug twice.
+                *
+                * Until we sort this, drop PMR again to a level that will
+                * actually only allow NMIs before reading IAR, and then
+                * restore it to what it was.
+                */
+               pmr = gic_read_pmr();
+               gic_pmr_mask_irqs();
+               isb();
+
+               iar = gic_read_iar();
+
+               gic_write_pmr(pmr);
+       } else {
+               iar = gic_read_iar();
+       }
+
+       return iar;
+}
+
 static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 {
        u32 irqnr;
 
-       irqnr = gic_read_iar();
+       irqnr = do_read_iar(regs);
 
        /* Check for special IDs first */
        if ((irqnr >= 1020 && irqnr <= 1023))
index 016a610..3f28eb4 100644 (file)
@@ -165,6 +165,7 @@ struct meson_host {
 
        unsigned int bounce_buf_size;
        void *bounce_buf;
+       void __iomem *bounce_iomem_buf;
        dma_addr_t bounce_dma_addr;
        struct sd_emmc_desc *descs;
        dma_addr_t descs_dma_addr;
@@ -745,6 +746,47 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg)
        writel(start, host->regs + SD_EMMC_START);
 }
 
+/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */
+static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data,
+                                 size_t buflen, bool to_buffer)
+{
+       unsigned int sg_flags = SG_MITER_ATOMIC;
+       struct scatterlist *sgl = data->sg;
+       unsigned int nents = data->sg_len;
+       struct sg_mapping_iter miter;
+       unsigned int offset = 0;
+
+       if (to_buffer)
+               sg_flags |= SG_MITER_FROM_SG;
+       else
+               sg_flags |= SG_MITER_TO_SG;
+
+       sg_miter_start(&miter, sgl, nents, sg_flags);
+
+       while ((offset < buflen) && sg_miter_next(&miter)) {
+               unsigned int len;
+
+               len = min(miter.length, buflen - offset);
+
+               /* When dram_access_quirk, the bounce buffer is a iomem mapping */
+               if (host->dram_access_quirk) {
+                       if (to_buffer)
+                               memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len);
+                       else
+                               memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len);
+               } else {
+                       if (to_buffer)
+                               memcpy(host->bounce_buf + offset, miter.addr, len);
+                       else
+                               memcpy(miter.addr, host->bounce_buf + offset, len);
+               }
+
+               offset += len;
+       }
+
+       sg_miter_stop(&miter);
+}
+
 static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
 {
        struct meson_host *host = mmc_priv(mmc);
@@ -788,8 +830,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
                if (data->flags & MMC_DATA_WRITE) {
                        cmd_cfg |= CMD_CFG_DATA_WR;
                        WARN_ON(xfer_bytes > host->bounce_buf_size);
-                       sg_copy_to_buffer(data->sg, data->sg_len,
-                                         host->bounce_buf, xfer_bytes);
+                       meson_mmc_copy_buffer(host, data, xfer_bytes, true);
                        dma_wmb();
                }
 
@@ -958,8 +999,7 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id)
        if (meson_mmc_bounce_buf_read(data)) {
                xfer_bytes = data->blksz * data->blocks;
                WARN_ON(xfer_bytes > host->bounce_buf_size);
-               sg_copy_from_buffer(data->sg, data->sg_len,
-                                   host->bounce_buf, xfer_bytes);
+               meson_mmc_copy_buffer(host, data, xfer_bytes, false);
        }
 
        next_cmd = meson_mmc_get_next_command(cmd);
@@ -1179,7 +1219,7 @@ static int meson_mmc_probe(struct platform_device *pdev)
                 * instead of the DDR memory
                 */
                host->bounce_buf_size = SD_EMMC_SRAM_DATA_BUF_LEN;
-               host->bounce_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF;
+               host->bounce_iomem_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF;
                host->bounce_dma_addr = res->start + SD_EMMC_SRAM_DATA_BUF_OFF;
        } else {
                /* data bounce buffer */
index d174823..4ffbfd5 100644 (file)
@@ -350,6 +350,7 @@ static int ldisc_open(struct tty_struct *tty)
        rtnl_lock();
        result = register_netdevice(dev);
        if (result) {
+               tty_kref_put(tty);
                rtnl_unlock();
                free_netdev(dev);
                return -ENODEV;
index 029e77d..a45865b 100644 (file)
@@ -82,6 +82,8 @@ struct mcba_priv {
        bool can_ka_first_pass;
        bool can_speed_check;
        atomic_t free_ctx_cnt;
+       void *rxbuf[MCBA_MAX_RX_URBS];
+       dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS];
 };
 
 /* CAN frame */
@@ -633,6 +635,7 @@ static int mcba_usb_start(struct mcba_priv *priv)
        for (i = 0; i < MCBA_MAX_RX_URBS; i++) {
                struct urb *urb = NULL;
                u8 *buf;
+               dma_addr_t buf_dma;
 
                /* create a URB, and a buffer for it */
                urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -642,7 +645,7 @@ static int mcba_usb_start(struct mcba_priv *priv)
                }
 
                buf = usb_alloc_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE,
-                                        GFP_KERNEL, &urb->transfer_dma);
+                                        GFP_KERNEL, &buf_dma);
                if (!buf) {
                        netdev_err(netdev, "No memory left for USB buffer\n");
                        usb_free_urb(urb);
@@ -661,11 +664,14 @@ static int mcba_usb_start(struct mcba_priv *priv)
                if (err) {
                        usb_unanchor_urb(urb);
                        usb_free_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE,
-                                         buf, urb->transfer_dma);
+                                         buf, buf_dma);
                        usb_free_urb(urb);
                        break;
                }
 
+               priv->rxbuf[i] = buf;
+               priv->rxbuf_dma[i] = buf_dma;
+
                /* Drop reference, USB core will take care of freeing it */
                usb_free_urb(urb);
        }
@@ -708,7 +714,14 @@ static int mcba_usb_open(struct net_device *netdev)
 
 static void mcba_urb_unlink(struct mcba_priv *priv)
 {
+       int i;
+
        usb_kill_anchored_urbs(&priv->rx_submitted);
+
+       for (i = 0; i < MCBA_MAX_RX_URBS; ++i)
+               usb_free_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE,
+                                 priv->rxbuf[i], priv->rxbuf_dma[i]);
+
        usb_kill_anchored_urbs(&priv->tx_submitted);
 }
 
index 881f887..5257148 100644 (file)
@@ -236,36 +236,48 @@ static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
 static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
                                struct ena_tx_buffer *tx_info,
                                struct xdp_frame *xdpf,
-                               void **push_hdr,
-                               u32 *push_len)
+                               struct ena_com_tx_ctx *ena_tx_ctx)
 {
        struct ena_adapter *adapter = xdp_ring->adapter;
        struct ena_com_buf *ena_buf;
-       dma_addr_t dma = 0;
+       int push_len = 0;
+       dma_addr_t dma;
+       void *data;
        u32 size;
 
        tx_info->xdpf = xdpf;
+       data = tx_info->xdpf->data;
        size = tx_info->xdpf->len;
-       ena_buf = tx_info->bufs;
 
-       /* llq push buffer */
-       *push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
-       *push_hdr = tx_info->xdpf->data;
+       if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+               /* Designate part of the packet for LLQ */
+               push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
+
+               ena_tx_ctx->push_header = data;
+
+               size -= push_len;
+               data += push_len;
+       }
+
+       ena_tx_ctx->header_len = push_len;
 
-       if (size - *push_len > 0) {
+       if (size > 0) {
                dma = dma_map_single(xdp_ring->dev,
-                                    *push_hdr + *push_len,
-                                    size - *push_len,
+                                    data,
+                                    size,
                                     DMA_TO_DEVICE);
                if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
                        goto error_report_dma_error;
 
-               tx_info->map_linear_data = 1;
-               tx_info->num_of_bufs = 1;
-       }
+               tx_info->map_linear_data = 0;
 
-       ena_buf->paddr = dma;
-       ena_buf->len = size;
+               ena_buf = tx_info->bufs;
+               ena_buf->paddr = dma;
+               ena_buf->len = size;
+
+               ena_tx_ctx->ena_bufs = ena_buf;
+               ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
+       }
 
        return 0;
 
@@ -274,10 +286,6 @@ error_report_dma_error:
                          &xdp_ring->syncp);
        netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
 
-       xdp_return_frame_rx_napi(tx_info->xdpf);
-       tx_info->xdpf = NULL;
-       tx_info->num_of_bufs = 0;
-
        return -EINVAL;
 }
 
@@ -289,8 +297,6 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
        struct ena_com_tx_ctx ena_tx_ctx = {};
        struct ena_tx_buffer *tx_info;
        u16 next_to_use, req_id;
-       void *push_hdr;
-       u32 push_len;
        int rc;
 
        next_to_use = xdp_ring->next_to_use;
@@ -298,15 +304,11 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
        tx_info = &xdp_ring->tx_buffer_info[req_id];
        tx_info->num_of_bufs = 0;
 
-       rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len);
+       rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
        if (unlikely(rc))
                return rc;
 
-       ena_tx_ctx.ena_bufs = tx_info->bufs;
-       ena_tx_ctx.push_header = push_hdr;
-       ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
        ena_tx_ctx.req_id = req_id;
-       ena_tx_ctx.header_len = push_len;
 
        rc = ena_xmit_common(dev,
                             xdp_ring,
index b3d7433..7748b27 100644 (file)
@@ -1849,6 +1849,7 @@ out_free_netdev:
        free_netdev(netdev);
 out_pci_release:
        pci_release_mem_regions(pdev);
+       pci_disable_pcie_error_reporting(pdev);
 out_pci_disable:
        pci_disable_device(pdev);
        return err;
index fcc729d..aef3fcc 100644 (file)
@@ -7308,7 +7308,7 @@ skip_rdma:
        entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries +
                     2 * (extra_qps + ctx->qp_min_qp1_entries) + min;
        entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple);
-       entries = ctx->qp_max_l2_entries + extra_qps + ctx->qp_min_qp1_entries;
+       entries = ctx->qp_max_l2_entries + 2 * (extra_qps + ctx->qp_min_qp1_entries);
        entries = roundup(entries, ctx->tqm_entries_multiple);
        entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring);
        for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) {
@@ -11750,6 +11750,8 @@ static void bnxt_fw_init_one_p3(struct bnxt *bp)
        bnxt_hwrm_coal_params_qcaps(bp);
 }
 
+static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt);
+
 static int bnxt_fw_init_one(struct bnxt *bp)
 {
        int rc;
@@ -11764,6 +11766,9 @@ static int bnxt_fw_init_one(struct bnxt *bp)
                netdev_err(bp->dev, "Firmware init phase 2 failed\n");
                return rc;
        }
+       rc = bnxt_probe_phy(bp, false);
+       if (rc)
+               return rc;
        rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
        if (rc)
                return rc;
@@ -13155,6 +13160,7 @@ init_err_pci_clean:
        bnxt_hwrm_func_drv_unrgtr(bp);
        bnxt_free_hwrm_short_cmd_req(bp);
        bnxt_free_hwrm_resources(bp);
+       bnxt_ethtool_free(bp);
        kfree(bp->fw_health);
        bp->fw_health = NULL;
        bnxt_cleanup_pci(bp);
index 61ea3ec..83ed10a 100644 (file)
@@ -1337,13 +1337,27 @@ static int cxgb4_ethtool_flash_phy(struct net_device *netdev,
                return ret;
        }
 
-       spin_lock_bh(&adap->win0_lock);
+       /* We have to RESET the chip/firmware because we need the
+        * chip in uninitialized state for loading new PHY image.
+        * Otherwise, the running firmware will only store the PHY
+        * image in local RAM which will be lost after next reset.
+        */
+       ret = t4_fw_reset(adap, adap->mbox, PIORSTMODE_F | PIORST_F);
+       if (ret < 0) {
+               dev_err(adap->pdev_dev,
+                       "Set FW to RESET for flashing PHY FW failed. ret: %d\n",
+                       ret);
+               return ret;
+       }
+
        ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size);
-       spin_unlock_bh(&adap->win0_lock);
-       if (ret)
-               dev_err(adap->pdev_dev, "Failed to load PHY FW\n");
+       if (ret < 0) {
+               dev_err(adap->pdev_dev, "Failed to load PHY FW. ret: %d\n",
+                       ret);
+               return ret;
+       }
 
-       return ret;
+       return 0;
 }
 
 static int cxgb4_ethtool_flash_fw(struct net_device *netdev,
@@ -1610,16 +1624,14 @@ static struct filter_entry *cxgb4_get_filter_entry(struct adapter *adap,
                                                   u32 ftid)
 {
        struct tid_info *t = &adap->tids;
-       struct filter_entry *f;
 
-       if (ftid < t->nhpftids)
-               f = &adap->tids.hpftid_tab[ftid];
-       else if (ftid < t->nftids)
-               f = &adap->tids.ftid_tab[ftid - t->nhpftids];
-       else
-               f = lookup_tid(&adap->tids, ftid);
+       if (ftid >= t->hpftid_base && ftid < t->hpftid_base + t->nhpftids)
+               return &t->hpftid_tab[ftid - t->hpftid_base];
 
-       return f;
+       if (ftid >= t->ftid_base && ftid < t->ftid_base + t->nftids)
+               return &t->ftid_tab[ftid - t->ftid_base];
+
+       return lookup_tid(t, ftid);
 }
 
 static void cxgb4_fill_filter_rule(struct ethtool_rx_flow_spec *fs,
@@ -1826,6 +1838,11 @@ static int cxgb4_ntuple_del_filter(struct net_device *dev,
        filter_id = filter_info->loc_array[cmd->fs.location];
        f = cxgb4_get_filter_entry(adapter, filter_id);
 
+       if (f->fs.prio)
+               filter_id -= adapter->tids.hpftid_base;
+       else if (!f->fs.hash)
+               filter_id -= (adapter->tids.ftid_base - adapter->tids.nhpftids);
+
        ret = cxgb4_flow_rule_destroy(dev, f->fs.tc_prio, &f->fs, filter_id);
        if (ret)
                goto err;
@@ -1885,6 +1902,11 @@ static int cxgb4_ntuple_set_filter(struct net_device *netdev,
 
        filter_info = &adapter->ethtool_filters->port[pi->port_id];
 
+       if (fs.prio)
+               tid += adapter->tids.hpftid_base;
+       else if (!fs.hash)
+               tid += (adapter->tids.ftid_base - adapter->tids.nhpftids);
+
        filter_info->loc_array[cmd->fs.location] = tid;
        set_bit(cmd->fs.location, filter_info->bmap);
        filter_info->in_use++;
index 22c9ac9..6260b3b 100644 (file)
@@ -198,7 +198,7 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
                                      WORD_MASK, f->fs.nat_lip[3] |
                                      f->fs.nat_lip[2] << 8 |
                                      f->fs.nat_lip[1] << 16 |
-                                     (u64)f->fs.nat_lip[0] << 25, 1);
+                                     (u64)f->fs.nat_lip[0] << 24, 1);
                }
        }
 
index 1f601de..762113a 100644 (file)
@@ -4424,10 +4424,8 @@ static int adap_init0_phy(struct adapter *adap)
 
        /* Load PHY Firmware onto adapter.
         */
-       spin_lock_bh(&adap->win0_lock);
        ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version,
                             (u8 *)phyf->data, phyf->size);
-       spin_unlock_bh(&adap->win0_lock);
        if (ret < 0)
                dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n",
                        -ret);
index 9428ef1..a0555f4 100644 (file)
@@ -3060,16 +3060,19 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr,
  *     @addr: the start address to write
  *     @n: length of data to write in bytes
  *     @data: the data to write
+ *     @byte_oriented: whether to store data as bytes or as words
  *
  *     Writes up to a page of data (256 bytes) to the serial flash starting
  *     at the given address.  All the data must be written to the same page.
+ *     If @byte_oriented is set the write data is stored as byte stream
+ *     (i.e. matches what on disk), otherwise in big-endian.
  */
 static int t4_write_flash(struct adapter *adapter, unsigned int addr,
-                         unsigned int n, const u8 *data)
+                         unsigned int n, const u8 *data, bool byte_oriented)
 {
-       int ret;
-       u32 buf[64];
        unsigned int i, c, left, val, offset = addr & 0xff;
+       u32 buf[64];
+       int ret;
 
        if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE)
                return -EINVAL;
@@ -3080,10 +3083,14 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
            (ret = sf1_write(adapter, 4, 1, 1, val)) != 0)
                goto unlock;
 
-       for (left = n; left; left -= c) {
+       for (left = n; left; left -= c, data += c) {
                c = min(left, 4U);
-               for (val = 0, i = 0; i < c; ++i)
-                       val = (val << 8) + *data++;
+               for (val = 0, i = 0; i < c; ++i) {
+                       if (byte_oriented)
+                               val = (val << 8) + data[i];
+                       else
+                               val = (val << 8) + data[c - i - 1];
+               }
 
                ret = sf1_write(adapter, c, c != left, 1, val);
                if (ret)
@@ -3096,7 +3103,8 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
        t4_write_reg(adapter, SF_OP_A, 0);    /* unlock SF */
 
        /* Read the page to verify the write succeeded */
-       ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1);
+       ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf,
+                           byte_oriented);
        if (ret)
                return ret;
 
@@ -3692,7 +3700,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
         */
        memcpy(first_page, fw_data, SF_PAGE_SIZE);
        ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
-       ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
+       ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true);
        if (ret)
                goto out;
 
@@ -3700,14 +3708,14 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
        for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
                addr += SF_PAGE_SIZE;
                fw_data += SF_PAGE_SIZE;
-               ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data);
+               ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true);
                if (ret)
                        goto out;
        }
 
-       ret = t4_write_flash(adap,
-                            fw_start + offsetof(struct fw_hdr, fw_ver),
-                            sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
+       ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver),
+                            sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver,
+                            true);
 out:
        if (ret)
                dev_err(adap->pdev_dev, "firmware download failed, error %d\n",
@@ -3812,9 +3820,11 @@ int t4_load_phy_fw(struct adapter *adap, int win,
        /* Copy the supplied PHY Firmware image to the adapter memory location
         * allocated by the adapter firmware.
         */
+       spin_lock_bh(&adap->win0_lock);
        ret = t4_memory_rw(adap, win, mtype, maddr,
                           phy_fw_size, (__be32 *)phy_fw_data,
                           T4_MEMORY_WRITE);
+       spin_unlock_bh(&adap->win0_lock);
        if (ret)
                return ret;
 
@@ -10208,7 +10218,7 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
                        n = size - i;
                else
                        n = SF_PAGE_SIZE;
-               ret = t4_write_flash(adap, addr, n, cfg_data);
+               ret = t4_write_flash(adap, addr, n, cfg_data, true);
                if (ret)
                        goto out;
 
@@ -10677,13 +10687,14 @@ int t4_load_boot(struct adapter *adap, u8 *boot_data,
        for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
                addr += SF_PAGE_SIZE;
                boot_data += SF_PAGE_SIZE;
-               ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data);
+               ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data,
+                                    false);
                if (ret)
                        goto out;
        }
 
        ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE,
-                            (const u8 *)header);
+                            (const u8 *)header, false);
 
 out:
        if (ret)
@@ -10758,7 +10769,7 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
        for (i = 0; i < size; i += SF_PAGE_SIZE) {
                n = min_t(u32, size - i, SF_PAGE_SIZE);
 
-               ret = t4_write_flash(adap, addr, n, cfg_data);
+               ret = t4_write_flash(adap, addr, n, cfg_data, false);
                if (ret)
                        goto out;
 
@@ -10770,7 +10781,8 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
        for (i = 0; i < npad; i++) {
                u8 data = 0;
 
-               ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data);
+               ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data,
+                                    false);
                if (ret)
                        goto out;
        }
index 46b0dba..7c99217 100644 (file)
@@ -576,10 +576,12 @@ static void ec_bhf_remove(struct pci_dev *dev)
        struct ec_bhf_priv *priv = netdev_priv(net_dev);
 
        unregister_netdev(net_dev);
-       free_netdev(net_dev);
 
        pci_iounmap(dev, priv->dma_io);
        pci_iounmap(dev, priv->io);
+
+       free_netdev(net_dev);
+
        pci_release_regions(dev);
        pci_clear_master(dev);
        pci_disable_device(dev);
index b6eba29..7968568 100644 (file)
@@ -5897,6 +5897,7 @@ drv_cleanup:
 unmap_bars:
        be_unmap_pci_bars(adapter);
 free_netdev:
+       pci_disable_pcie_error_reporting(pdev);
        free_netdev(netdev);
 rel_reg:
        pci_release_regions(pdev);
index 1753807..d71eac7 100644 (file)
@@ -215,15 +215,13 @@ static u64 fec_ptp_read(const struct cyclecounter *cc)
 {
        struct fec_enet_private *fep =
                container_of(cc, struct fec_enet_private, cc);
-       const struct platform_device_id *id_entry =
-               platform_get_device_id(fep->pdev);
        u32 tempval;
 
        tempval = readl(fep->hwp + FEC_ATIME_CTRL);
        tempval |= FEC_T_CTRL_CAPTURE;
        writel(tempval, fep->hwp + FEC_ATIME_CTRL);
 
-       if (id_entry->driver_data & FEC_QUIRK_BUG_CAPTURE)
+       if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
                udelay(1);
 
        return readl(fep->hwp + FEC_ATIME);
@@ -604,6 +602,10 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx)
        fep->ptp_caps.enable = fec_ptp_enable;
 
        fep->cycle_speed = clk_get_rate(fep->clk_ptp);
+       if (!fep->cycle_speed) {
+               fep->cycle_speed = NSEC_PER_SEC;
+               dev_err(&fep->pdev->dev, "clk_ptp clock rate is zero\n");
+       }
        fep->ptp_inc = NSEC_PER_SEC / fep->cycle_speed;
 
        spin_lock_init(&fep->tmreg_lock);
index d70ee57..27f9dac 100644 (file)
@@ -1717,12 +1717,13 @@ setup_rings:
  * ice_vsi_cfg_txqs - Configure the VSI for Tx
  * @vsi: the VSI being configured
  * @rings: Tx ring array to be configured
+ * @count: number of Tx ring array elements
  *
  * Return 0 on success and a negative value on error
  * Configure the Tx VSI for operation.
  */
 static int
-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count)
 {
        struct ice_aqc_add_tx_qgrp *qg_buf;
        u16 q_idx = 0;
@@ -1734,7 +1735,7 @@ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
 
        qg_buf->num_txqs = 1;
 
-       for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+       for (q_idx = 0; q_idx < count; q_idx++) {
                err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
                if (err)
                        goto err_cfg_txqs;
@@ -1754,7 +1755,7 @@ err_cfg_txqs:
  */
 int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
 {
-       return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
+       return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
 }
 
 /**
@@ -1769,7 +1770,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
        int ret;
        int i;
 
-       ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+       ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
        if (ret)
                return ret;
 
@@ -2009,17 +2010,18 @@ int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi)
  * @rst_src: reset source
  * @rel_vmvf_num: Relative ID of VF/VM
  * @rings: Tx ring array to be stopped
+ * @count: number of Tx ring array elements
  */
 static int
 ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-                     u16 rel_vmvf_num, struct ice_ring **rings)
+                     u16 rel_vmvf_num, struct ice_ring **rings, u16 count)
 {
        u16 q_idx;
 
        if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
                return -EINVAL;
 
-       for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+       for (q_idx = 0; q_idx < count; q_idx++) {
                struct ice_txq_meta txq_meta = { };
                int status;
 
@@ -2047,7 +2049,7 @@ int
 ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
                          u16 rel_vmvf_num)
 {
-       return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
+       return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
 }
 
 /**
@@ -2056,7 +2058,7 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
  */
 int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
 {
-       return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+       return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
 }
 
 /**
index 4ee85a2..0eb2307 100644 (file)
@@ -2555,6 +2555,20 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
        return (ret || xdp_ring_err) ? -ENOMEM : 0;
 }
 
+/**
+ * ice_xdp_safe_mode - XDP handler for safe mode
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
+                            struct netdev_bpf *xdp)
+{
+       NL_SET_ERR_MSG_MOD(xdp->extack,
+                          "Please provide working DDP firmware package in order to use XDP\n"
+                          "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
+       return -EOPNOTSUPP;
+}
+
 /**
  * ice_xdp - implements XDP handler
  * @dev: netdevice
@@ -6937,6 +6951,7 @@ static const struct net_device_ops ice_netdev_safe_mode_ops = {
        .ndo_change_mtu = ice_change_mtu,
        .ndo_get_stats64 = ice_get_stats64,
        .ndo_tx_timeout = ice_tx_timeout,
+       .ndo_bpf = ice_xdp_safe_mode,
 };
 
 static const struct net_device_ops ice_netdev_ops = {
index 36dc3e5..21ef2f1 100644 (file)
@@ -154,6 +154,7 @@ static int xrx200_close(struct net_device *net_dev)
 
 static int xrx200_alloc_skb(struct xrx200_chan *ch)
 {
+       struct sk_buff *skb = ch->skb[ch->dma.desc];
        dma_addr_t mapping;
        int ret = 0;
 
@@ -168,6 +169,7 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch)
                                 XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE);
        if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) {
                dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+               ch->skb[ch->dma.desc] = skb;
                ret = -ENOMEM;
                goto skip;
        }
@@ -198,7 +200,6 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
        ch->dma.desc %= LTQ_DESC_NUM;
 
        if (ret) {
-               ch->skb[ch->dma.desc] = skb;
                net_dev->stats.rx_dropped++;
                netdev_err(net_dev, "failed to allocate new rx buffer\n");
                return ret;
@@ -352,8 +353,8 @@ static irqreturn_t xrx200_dma_irq(int irq, void *ptr)
        struct xrx200_chan *ch = ptr;
 
        if (napi_schedule_prep(&ch->napi)) {
-               __napi_schedule(&ch->napi);
                ltq_dma_disable_irq(&ch->dma);
+               __napi_schedule(&ch->napi);
        }
 
        ltq_dma_ack_irq(&ch->dma);
index a9166cd..ceebfc2 100644 (file)
@@ -303,6 +303,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
        int ret = 0, i;
 
        mutex_lock(&mlx5_intf_mutex);
+       priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
        for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
                if (!priv->adev[i]) {
                        bool is_supported = false;
@@ -320,6 +321,16 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
                        }
                } else {
                        adev = &priv->adev[i]->adev;
+
+                       /* Pay attention that this is not PCI driver that
+                        * mlx5_core_dev is connected, but auxiliary driver.
+                        *
+                        * Here we can race of module unload with devlink
+                        * reload, but we don't need to take extra lock because
+                        * we are holding global mlx5_intf_mutex.
+                        */
+                       if (!adev->dev.driver)
+                               continue;
                        adrv = to_auxiliary_drv(adev->dev.driver);
 
                        if (adrv->resume)
@@ -350,6 +361,10 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
                        continue;
 
                adev = &priv->adev[i]->adev;
+               /* Auxiliary driver was unbind manually through sysfs */
+               if (!adev->dev.driver)
+                       goto skip_suspend;
+
                adrv = to_auxiliary_drv(adev->dev.driver);
 
                if (adrv->suspend) {
@@ -357,9 +372,11 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
                        continue;
                }
 
+skip_suspend:
                del_adev(&priv->adev[i]->adev);
                priv->adev[i] = NULL;
        }
+       priv->flags |= MLX5_PRIV_FLAGS_DETACH;
        mutex_unlock(&mlx5_intf_mutex);
 }
 
@@ -448,6 +465,8 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
        struct mlx5_priv *priv = &dev->priv;
 
        lockdep_assert_held(&mlx5_intf_mutex);
+       if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
+               return 0;
 
        delete_drivers(dev);
        if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
index 0dd7615..bc33eaa 100644 (file)
@@ -64,6 +64,8 @@ struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
        struct mlx5e_priv *priv = netdev_priv(dev);
        struct devlink_port *port;
 
+       if (!netif_device_present(dev))
+               return NULL;
        port = mlx5e_devlink_get_dl_port(priv);
        if (port->registered)
                return port;
index d907c1a..778e229 100644 (file)
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 // Copyright (c) 2020 Mellanox Technologies
 
-#include <linux/ptp_classify.h>
 #include "en/ptp.h"
 #include "en/txrx.h"
 #include "en/params.h"
index ab935cc..c96668b 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "en.h"
 #include "en_stats.h"
+#include <linux/ptp_classify.h>
 
 struct mlx5e_ptpsq {
        struct mlx5e_txqsq       txqsq;
@@ -43,6 +44,27 @@ struct mlx5e_ptp {
        DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES);
 };
 
+static inline bool mlx5e_use_ptpsq(struct sk_buff *skb)
+{
+       struct flow_keys fk;
+
+       if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+               return false;
+
+       if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+               return false;
+
+       if (fk.basic.n_proto == htons(ETH_P_1588))
+               return true;
+
+       if (fk.basic.n_proto != htons(ETH_P_IP) &&
+           fk.basic.n_proto != htons(ETH_P_IPV6))
+               return false;
+
+       return (fk.basic.ip_proto == IPPROTO_UDP &&
+               fk.ports.dst == htons(PTP_EV_PORT));
+}
+
 int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
                   u8 lag_port, struct mlx5e_ptp **cp);
 void mlx5e_ptp_close(struct mlx5e_ptp *c);
index be0ee03..2e9bee4 100644 (file)
@@ -129,10 +129,9 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
                                                             work);
        struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
        struct neighbour *n = update_work->n;
+       struct mlx5e_encap_entry *e = NULL;
        bool neigh_connected, same_dev;
-       struct mlx5e_encap_entry *e;
        unsigned char ha[ETH_ALEN];
-       struct mlx5e_priv *priv;
        u8 nud_state, dead;
 
        rtnl_lock();
@@ -156,14 +155,12 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
        if (!same_dev)
                goto out;
 
-       list_for_each_entry(e, &nhe->encap_list, encap_list) {
-               if (!mlx5e_encap_take(e))
-                       continue;
+       /* mlx5e_get_next_init_encap() releases previous encap before returning
+        * the next one.
+        */
+       while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
+               mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
 
-               priv = netdev_priv(e->out_dev);
-               mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
-               mlx5e_encap_put(priv, e);
-       }
 out:
        rtnl_unlock();
        mlx5e_release_neigh_update_work(update_work);
index 3113822..85eaadc 100644 (file)
@@ -94,13 +94,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
 
        ASSERT_RTNL();
 
-       /* wait for encap to be fully initialized */
-       wait_for_completion(&e->res_ready);
-
        mutex_lock(&esw->offloads.encap_tbl_lock);
        encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
-       if (e->compl_result < 0 || (encap_connected == neigh_connected &&
-                                   ether_addr_equal(e->h_dest, ha)))
+       if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
                goto unlock;
 
        mlx5e_take_all_encap_flows(e, &flow_list);
index f1fb116..490131e 100644 (file)
@@ -251,9 +251,12 @@ static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
                mlx5e_take_tmp_flow(flow, flow_list, 0);
 }
 
+typedef bool (match_cb)(struct mlx5e_encap_entry *);
+
 static struct mlx5e_encap_entry *
-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
-                          struct mlx5e_encap_entry *e)
+mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
+                             struct mlx5e_encap_entry *e,
+                             match_cb match)
 {
        struct mlx5e_encap_entry *next = NULL;
 
@@ -288,7 +291,7 @@ retry:
        /* wait for encap to be fully initialized */
        wait_for_completion(&next->res_ready);
        /* continue searching if encap entry is not in valid state after completion */
-       if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
+       if (!match(next)) {
                e = next;
                goto retry;
        }
@@ -296,6 +299,30 @@ retry:
        return next;
 }
 
+static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
+{
+       return e->flags & MLX5_ENCAP_ENTRY_VALID;
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+                          struct mlx5e_encap_entry *e)
+{
+       return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
+}
+
+static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
+{
+       return e->compl_result >= 0;
+}
+
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+                         struct mlx5e_encap_entry *e)
+{
+       return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
+}
+
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 {
        struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
index 3d45341..26f7fab 100644 (file)
@@ -532,9 +532,6 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
        struct mlx5_core_dev *mdev = priv->mdev;
        struct net_device *netdev = priv->netdev;
 
-       if (!priv->ipsec)
-               return;
-
        if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
            !MLX5_CAP_ETH(mdev, swp)) {
                mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
index 5cd466e..25403af 100644 (file)
@@ -356,7 +356,7 @@ err:
 
 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
 {
-       int err = 0;
+       int err = -ENOMEM;
        int i;
 
        if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
index ec6bafe..d26b8ed 100644 (file)
@@ -2705,8 +2705,6 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
        nch = priv->channels.params.num_channels;
        ntc = priv->channels.params.num_tc;
        num_rxqs = nch * priv->profile->rq_groups;
-       if (priv->channels.params.ptp_rx)
-               num_rxqs++;
 
        mlx5e_netdev_set_tcs(netdev, nch, ntc);
 
@@ -4824,22 +4822,15 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        }
 
        if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
-               netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
-                                          NETIF_F_GSO_UDP_TUNNEL_CSUM;
-               netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
-                                          NETIF_F_GSO_UDP_TUNNEL_CSUM;
-               netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
-               netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
-                                        NETIF_F_GSO_UDP_TUNNEL_CSUM;
+               netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL;
+               netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+               netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL;
        }
 
        if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
-               netdev->hw_features     |= NETIF_F_GSO_GRE |
-                                          NETIF_F_GSO_GRE_CSUM;
-               netdev->hw_enc_features |= NETIF_F_GSO_GRE |
-                                          NETIF_F_GSO_GRE_CSUM;
-               netdev->gso_partial_features |= NETIF_F_GSO_GRE |
-                                               NETIF_F_GSO_GRE_CSUM;
+               netdev->hw_features     |= NETIF_F_GSO_GRE;
+               netdev->hw_enc_features |= NETIF_F_GSO_GRE;
+               netdev->gso_partial_features |= NETIF_F_GSO_GRE;
        }
 
        if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
index dd64878..d4b0f27 100644 (file)
@@ -4765,7 +4765,7 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
        list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
                wait_for_completion(&hpe->res_ready);
                if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
-                       hpe->hp->pair->peer_gone = true;
+                       mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
 
                mlx5e_hairpin_put(priv, hpe);
        }
index 25c0917..1702753 100644 (file)
@@ -178,6 +178,9 @@ void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *f
 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
 
 struct mlx5e_neigh_hash_entry;
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+                         struct mlx5e_encap_entry *e);
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
 
 void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
index 8ba6267..320fe0c 100644 (file)
@@ -32,7 +32,6 @@
 
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
-#include <linux/ptp_classify.h>
 #include <net/geneve.h>
 #include <net/dsfield.h>
 #include "en.h"
@@ -67,24 +66,6 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb
 }
 #endif
 
-static bool mlx5e_use_ptpsq(struct sk_buff *skb)
-{
-       struct flow_keys fk;
-
-       if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
-               return false;
-
-       if (fk.basic.n_proto == htons(ETH_P_1588))
-               return true;
-
-       if (fk.basic.n_proto != htons(ETH_P_IP) &&
-           fk.basic.n_proto != htons(ETH_P_IPV6))
-               return false;
-
-       return (fk.basic.ip_proto == IPPROTO_UDP &&
-               fk.ports.dst == htons(PTP_EV_PORT));
-}
-
 static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
@@ -145,9 +126,9 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
                }
 
                ptp_channel = READ_ONCE(priv->channels.ptp);
-               if (unlikely(ptp_channel) &&
-                   test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
-                   mlx5e_use_ptpsq(skb))
+               if (unlikely(ptp_channel &&
+                            test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
+                            mlx5e_use_ptpsq(skb)))
                        return mlx5e_select_ptpsq(dev, skb);
 
                txq_ix = netdev_pick_tx(dev, skb, NULL);
index 77c0ca6..9403334 100644 (file)
@@ -136,7 +136,7 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
 
        eqe = next_eqe_sw(eq);
        if (!eqe)
-               return 0;
+               goto out;
 
        do {
                struct mlx5_core_cq *cq;
@@ -161,6 +161,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
                ++eq->cons_index;
 
        } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+
+out:
        eq_update_ci(eq, 1);
 
        if (cqn != -1)
@@ -248,9 +250,9 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
                ++eq->cons_index;
 
        } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
-       eq_update_ci(eq, 1);
 
 out:
+       eq_update_ci(eq, 1);
        mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
 
        return unlikely(recovery) ? num_eqes : 0;
index b88705a..97e6cb6 100644 (file)
@@ -1054,6 +1054,12 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
                        goto err_vhca_mapping;
        }
 
+       /* External controller host PF has factory programmed MAC.
+        * Read it from the device.
+        */
+       if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF)
+               mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac);
+
        esw_vport_change_handle_locked(vport);
 
        esw->enabled_vports++;
index a1d67bd..0d0f63a 100644 (file)
@@ -1161,7 +1161,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
        err = mlx5_core_set_hca_defaults(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to set hca defaults\n");
-               goto err_sriov;
+               goto err_set_hca;
        }
 
        mlx5_vhca_event_start(dev);
@@ -1194,6 +1194,7 @@ err_ec:
        mlx5_sf_hw_table_destroy(dev);
 err_vhca:
        mlx5_vhca_event_stop(dev);
+err_set_hca:
        mlx5_cleanup_fs(dev);
 err_fs:
        mlx5_accel_tls_cleanup(dev);
index 50af84e..174f71e 100644 (file)
@@ -54,7 +54,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
        mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
        mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
        mkey->size = MLX5_GET64(mkc, mkc, len);
-       mkey->key |= mlx5_idx_to_mkey(mkey_index);
+       mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
        mkey->pd = MLX5_GET(mkc, mkc, pd);
        init_waitqueue_head(&mkey->wait);
 
index 441b545..540cf05 100644 (file)
@@ -156,6 +156,9 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
 {
        int err;
 
+       if (!MLX5_CAP_GEN(dev, roce))
+               return;
+
        err = mlx5_nic_vport_enable_roce(dev);
        if (err) {
                mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
index 6a0c6f9..fa0288a 100644 (file)
@@ -163,6 +163,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
        sf_index = event->function_id - base_id;
        sf_dev = xa_load(&table->devices, sf_index);
        switch (event->new_vhca_state) {
+       case MLX5_VHCA_STATE_INVALID:
        case MLX5_VHCA_STATE_ALLOCATED:
                if (sf_dev)
                        mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
index 054c2e2..7466f01 100644 (file)
@@ -694,7 +694,11 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
        if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM)
                return -EINVAL;
 
-       memcpy(padded_data, data, data_sz);
+       inline_data_sz =
+               MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+
+       /* Add an alignment padding  */
+       memcpy(padded_data + data_sz % inline_data_sz, data, data_sz);
 
        /* Remove L2L3 outer headers */
        MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id,
@@ -706,32 +710,34 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
        hw_action += DR_STE_ACTION_DOUBLE_SZ;
        used_actions++; /* Remove and NOP are a single double action */
 
-       inline_data_sz =
-               MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+       /* Point to the last dword of the header */
+       data_ptr += (data_sz / inline_data_sz) * inline_data_sz;
 
-       /* Add the new header inline + 2 extra bytes */
+       /* Add the new header using inline action 4Byte at a time, the header
+        * is added in reversed order to the beginning of the packet to avoid
+        * incorrect parsing by the HW. Since header is 14B or 18B an extra
+        * two bytes are padded and later removed.
+        */
        for (i = 0; i < data_sz / inline_data_sz + 1; i++) {
                void *addr_inline;
 
                MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id,
                         DR_STE_V1_ACTION_ID_INSERT_INLINE);
                /* The hardware expects here offset to words (2 bytes) */
-               MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset,
-                        i * 2);
+               MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0);
 
                /* Copy bytes one by one to avoid endianness problem */
                addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1,
                                           hw_action, inline_data);
-               memcpy(addr_inline, data_ptr, inline_data_sz);
+               memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz);
                hw_action += DR_STE_ACTION_DOUBLE_SZ;
-               data_ptr += inline_data_sz;
                used_actions++;
        }
 
-       /* Remove 2 extra bytes */
+       /* Remove first 2 extra bytes */
        MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id,
                 DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
-       MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, data_sz / 2);
+       MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0);
        /* The hardware expects here size in words (2 bytes) */
        MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1);
        used_actions++;
index 612b0ac..9737565 100644 (file)
@@ -124,10 +124,11 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action);
 static inline bool
 mlx5dr_is_supported(struct mlx5_core_dev *dev)
 {
-       return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
-              (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
-               (MLX5_CAP_GEN(dev, steering_format_version) <=
-                MLX5_STEERING_FORMAT_CONNECTX_6DX));
+       return MLX5_CAP_GEN(dev, roce) &&
+              (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
+               (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
+                (MLX5_CAP_GEN(dev, steering_format_version) <=
+                 MLX5_STEERING_FORMAT_CONNECTX_6DX)));
 }
 
 /* buddy functions & structure */
index 01cc00a..b6931bb 100644 (file)
@@ -424,6 +424,15 @@ err_modify_sq:
        return err;
 }
 
+static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp)
+{
+       int i;
+
+       for (i = 0; i < hp->num_channels; i++)
+               mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
+                                      MLX5_SQC_STATE_RST, 0, 0);
+}
+
 static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
 {
        int i;
@@ -432,13 +441,9 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
        for (i = 0; i < hp->num_channels; i++)
                mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY,
                                       MLX5_RQC_STATE_RST, 0, 0);
-
        /* unset peer SQs */
-       if (hp->peer_gone)
-               return;
-       for (i = 0; i < hp->num_channels; i++)
-               mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
-                                      MLX5_SQC_STATE_RST, 0, 0);
+       if (!hp->peer_gone)
+               mlx5_hairpin_unpair_peer_sq(hp);
 }
 
 struct mlx5_hairpin *
@@ -485,3 +490,16 @@ void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp)
        mlx5_hairpin_destroy_queues(hp);
        kfree(hp);
 }
+
+void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp)
+{
+       int i;
+
+       mlx5_hairpin_unpair_peer_sq(hp);
+
+       /* destroy peer SQ */
+       for (i = 0; i < hp->num_channels; i++)
+               mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+
+       hp->peer_gone = true;
+}
index 457ad42..4c1440a 100644 (file)
@@ -465,8 +465,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
        void *in;
        int err;
 
-       if (!vport)
-               return -EINVAL;
        if (!MLX5_CAP_GEN(mdev, vport_group_manager))
                return -EACCES;
 
index dfea143..85f0ce2 100644 (file)
@@ -693,7 +693,8 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
                                                        MLXSW_THERMAL_TRIP_MASK,
                                                        module_tz,
                                                        &mlxsw_thermal_module_ops,
-                                                       NULL, 0, 0);
+                                                       NULL, 0,
+                                                       module_tz->parent->polling_delay);
        if (IS_ERR(module_tz->tzdev)) {
                err = PTR_ERR(module_tz->tzdev);
                return err;
@@ -815,7 +816,8 @@ mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
                                                MLXSW_THERMAL_TRIP_MASK,
                                                gearbox_tz,
                                                &mlxsw_thermal_gearbox_ops,
-                                               NULL, 0, 0);
+                                               NULL, 0,
+                                               gearbox_tz->parent->polling_delay);
        if (IS_ERR(gearbox_tz->tzdev))
                return PTR_ERR(gearbox_tz->tzdev);
 
index 900b4bf..2bc5a90 100644 (file)
@@ -3907,7 +3907,7 @@ MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6);
 #define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS       25
 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1    5
 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2    11
-#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3    5
+#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3    11
 
 static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
                                       enum mlxsw_reg_qeec_hr hr, u8 index,
index 04672eb..9958d50 100644 (file)
@@ -1332,6 +1332,7 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
                           u8 band, u32 child_handle)
 {
        struct mlxsw_sp_qdisc *old_qdisc;
+       u32 parent;
 
        if (band < mlxsw_sp_qdisc->num_classes &&
            mlxsw_sp_qdisc->qdiscs[band].handle == child_handle)
@@ -1352,7 +1353,9 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
        if (old_qdisc)
                mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
 
-       mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, band);
+       parent = TC_H_MAKE(mlxsw_sp_qdisc->handle, band + 1);
+       mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc,
+                                                        parent);
        if (!WARN_ON(!mlxsw_sp_qdisc))
                mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
 
index 0c42833..adfb978 100644 (file)
@@ -379,6 +379,7 @@ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
 
 int ocelot_port_flush(struct ocelot *ocelot, int port)
 {
+       unsigned int pause_ena;
        int err, val;
 
        /* Disable dequeuing from the egress queues */
@@ -387,6 +388,7 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
                       QSYS_PORT_MODE, port);
 
        /* Disable flow control */
+       ocelot_fields_read(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, &pause_ena);
        ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
 
        /* Disable priority flow control */
@@ -422,6 +424,9 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
        /* Clear flushing again. */
        ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port);
 
+       /* Re-enable flow control */
+       ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, pause_ena);
+
        return err;
 }
 EXPORT_SYMBOL(ocelot_port_flush);
index 7e6bac8..344ea11 100644 (file)
@@ -1602,6 +1602,8 @@ err_out_free_netdev:
        free_netdev(netdev);
 
 err_out_free_res:
+       if (NX_IS_REVISION_P3(pdev->revision))
+               pci_disable_pcie_error_reporting(pdev);
        pci_release_regions(pdev);
 
 err_out_disable_pdev:
index 17d5b64..e81dd34 100644 (file)
@@ -1266,9 +1266,11 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn,
                p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC;
 
        p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled;
+       BUILD_BUG_ON(sizeof(dcbx_info->operational.params) !=
+                    sizeof(p_hwfn->p_dcbx_info->set.config.params));
        memcpy(&p_hwfn->p_dcbx_info->set.config.params,
               &dcbx_info->operational.params,
-              sizeof(struct qed_dcbx_admin_params));
+              sizeof(p_hwfn->p_dcbx_info->set.config.params));
        p_hwfn->p_dcbx_info->set.config.valid = true;
 
        memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set));
index 96b947f..3beafc6 100644 (file)
@@ -2690,6 +2690,7 @@ err_out_free_hw_res:
        kfree(ahw);
 
 err_out_free_res:
+       pci_disable_pcie_error_reporting(pdev);
        pci_release_regions(pdev);
 
 err_out_disable_pdev:
index 41fbd2c..ab1e0fc 100644 (file)
@@ -126,24 +126,24 @@ static void rmnet_get_stats64(struct net_device *dev,
                              struct rtnl_link_stats64 *s)
 {
        struct rmnet_priv *priv = netdev_priv(dev);
-       struct rmnet_vnd_stats total_stats;
+       struct rmnet_vnd_stats total_stats = { };
        struct rmnet_pcpu_stats *pcpu_ptr;
+       struct rmnet_vnd_stats snapshot;
        unsigned int cpu, start;
 
-       memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
-
        for_each_possible_cpu(cpu) {
                pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
 
                do {
                        start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
-                       total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts;
-                       total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes;
-                       total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts;
-                       total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes;
+                       snapshot = pcpu_ptr->stats;     /* struct assignment */
                } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start));
 
-               total_stats.tx_drops += pcpu_ptr->stats.tx_drops;
+               total_stats.rx_pkts += snapshot.rx_pkts;
+               total_stats.rx_bytes += snapshot.rx_bytes;
+               total_stats.tx_pkts += snapshot.tx_pkts;
+               total_stats.tx_bytes += snapshot.tx_bytes;
+               total_stats.tx_drops += snapshot.tx_drops;
        }
 
        s->rx_packets = total_stats.rx_pkts;
@@ -354,4 +354,4 @@ int rmnet_vnd_update_dev_mtu(struct rmnet_port *port,
        }
 
        return 0;
-}
\ No newline at end of file
+}
index 2c89cde..2ee72dc 100644 (file)
@@ -1671,7 +1671,7 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
        switch(stringset) {
        case ETH_SS_STATS:
-               memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings));
+               memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings));
                break;
        }
 }
index c5b1548..713d362 100644 (file)
@@ -2287,7 +2287,7 @@ static void sh_eth_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 {
        switch (stringset) {
        case ETH_SS_STATS:
-               memcpy(data, *sh_eth_gstrings_stats,
+               memcpy(data, sh_eth_gstrings_stats,
                       sizeof(sh_eth_gstrings_stats));
                break;
        }
index b70d44a..3c73453 100644 (file)
@@ -76,10 +76,10 @@ enum power_event {
 #define LPI_CTRL_STATUS_TLPIEN 0x00000001      /* Transmit LPI Entry */
 
 /* GMAC HW ADDR regs */
-#define GMAC_ADDR_HIGH(reg)    (((reg > 15) ? 0x00000800 : 0x00000040) + \
-                               (reg * 8))
-#define GMAC_ADDR_LOW(reg)     (((reg > 15) ? 0x00000804 : 0x00000044) + \
-                               (reg * 8))
+#define GMAC_ADDR_HIGH(reg)    ((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \
+                                0x00000040 + (reg * 8))
+#define GMAC_ADDR_LOW(reg)     ((reg > 15) ? 0x00000804 + (reg - 16) * 8 : \
+                                0x00000044 + (reg * 8))
 #define GMAC_MAX_PERFECT_ADDRESSES     1
 
 #define GMAC_PCS_BASE          0x000000c0      /* PCS register base */
index 1e17a23..a696ada 100644 (file)
@@ -622,6 +622,8 @@ error_pclk_get:
 void stmmac_remove_config_dt(struct platform_device *pdev,
                             struct plat_stmmacenet_data *plat)
 {
+       clk_disable_unprepare(plat->stmmac_clk);
+       clk_disable_unprepare(plat->pclk);
        of_node_put(plat->phy_node);
        of_node_put(plat->mdio_node);
 }
index a1f5f07..9a13953 100644 (file)
@@ -774,12 +774,15 @@ static void temac_start_xmit_done(struct net_device *ndev)
        stat = be32_to_cpu(cur_p->app0);
 
        while (stat & STS_CTRL_APP0_CMPLT) {
+               /* Make sure that the other fields are read after bd is
+                * released by dma
+                */
+               rmb();
                dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
                                 be32_to_cpu(cur_p->len), DMA_TO_DEVICE);
                skb = (struct sk_buff *)ptr_from_txbd(cur_p);
                if (skb)
                        dev_consume_skb_irq(skb);
-               cur_p->app0 = 0;
                cur_p->app1 = 0;
                cur_p->app2 = 0;
                cur_p->app3 = 0;
@@ -788,6 +791,12 @@ static void temac_start_xmit_done(struct net_device *ndev)
                ndev->stats.tx_packets++;
                ndev->stats.tx_bytes += be32_to_cpu(cur_p->len);
 
+               /* app0 must be visible last, as it is used to flag
+                * availability of the bd
+                */
+               smp_mb();
+               cur_p->app0 = 0;
+
                lp->tx_bd_ci++;
                if (lp->tx_bd_ci >= lp->tx_bd_num)
                        lp->tx_bd_ci = 0;
@@ -814,6 +823,9 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag)
                if (cur_p->app0)
                        return NETDEV_TX_BUSY;
 
+               /* Make sure to read next bd app0 after this one */
+               rmb();
+
                tail++;
                if (tail >= lp->tx_bd_num)
                        tail = 0;
@@ -849,7 +861,7 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                smp_mb();
 
                /* Space might have just been freed - check again */
-               if (temac_check_tx_bd_space(lp, num_frag))
+               if (temac_check_tx_bd_space(lp, num_frag + 1))
                        return NETDEV_TX_BUSY;
 
                netif_wake_queue(ndev);
@@ -876,7 +888,6 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                return NETDEV_TX_OK;
        }
        cur_p->phys = cpu_to_be32(skb_dma_addr);
-       ptr_to_txbd((void *)skb, cur_p);
 
        for (ii = 0; ii < num_frag; ii++) {
                if (++lp->tx_bd_tail >= lp->tx_bd_num)
@@ -915,6 +926,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        }
        cur_p->app0 |= cpu_to_be32(STS_CTRL_APP0_EOP);
 
+       /* Mark last fragment with skb address, so it can be consumed
+        * in temac_start_xmit_done()
+        */
+       ptr_to_txbd((void *)skb, cur_p);
+
        tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail;
        lp->tx_bd_tail++;
        if (lp->tx_bd_tail >= lp->tx_bd_num)
@@ -926,6 +942,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
        wmb();
        lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */
 
+       if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
+               netdev_info(ndev, "%s -> netif_stop_queue\n", __func__);
+               netif_stop_queue(ndev);
+       }
+
        return NETDEV_TX_OK;
 }
 
index 6515422..7685a17 100644 (file)
@@ -799,6 +799,7 @@ static void mkiss_close(struct tty_struct *tty)
        ax->tty = NULL;
 
        unregister_netdev(ax->dev);
+       free_netdev(ax->dev);
 }
 
 /* Perform I/O control on an active ax25 channel. */
index 0d8293a..b806f2f 100644 (file)
@@ -49,7 +49,7 @@ static int mhi_ndo_stop(struct net_device *ndev)
        return 0;
 }
 
-static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
        struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
        const struct mhi_net_proto *proto = mhi_netdev->proto;
index 9bd9a5c..6bbc81a 100644 (file)
@@ -826,16 +826,12 @@ static int dp83867_phy_reset(struct phy_device *phydev)
 {
        int err;
 
-       err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET);
+       err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART);
        if (err < 0)
                return err;
 
        usleep_range(10, 20);
 
-       /* After reset FORCE_LINK_GOOD bit is set. Although the
-        * default value should be unset. Disable FORCE_LINK_GOOD
-        * for the phy to work properly.
-        */
        return phy_modify(phydev, MII_DP83867_PHYCTRL,
                         DP83867_PHYCR_FORCE_LINK_GOOD, 0);
 }
index 2e60bc1..359ea0d 100644 (file)
@@ -123,10 +123,10 @@ static struct sk_buff *eem_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
        }
 
        skb2 = skb_copy_expand(skb, EEM_HEAD, ETH_FCS_LEN + padlen, flags);
+       dev_kfree_skb_any(skb);
        if (!skb2)
                return NULL;
 
-       dev_kfree_skb_any(skb);
        skb = skb2;
 
 done:
index b04055f..df0d183 100644 (file)
@@ -1880,7 +1880,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 static const struct driver_info cdc_ncm_info = {
        .description = "CDC NCM",
        .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
-                       | FLAG_LINK_INTR,
+                       | FLAG_LINK_INTR | FLAG_ETHER,
        .bind = cdc_ncm_bind,
        .unbind = cdc_ncm_unbind,
        .manage_power = usbnet_manage_power,
index 6700f19..bc55ec7 100644 (file)
@@ -575,7 +575,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
        if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) {
                skb->protocol = htons(ETH_P_MAP);
-               return (netif_rx(skb) == NET_RX_SUCCESS);
+               return 1;
        }
 
        switch (skb->data[0] & 0xf0) {
index f6abb2f..e25bfb7 100644 (file)
@@ -8678,7 +8678,7 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
        switch (stringset) {
        case ETH_SS_STATS:
-               memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings));
+               memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings));
                break;
        }
 }
index b286993..13141db 100644 (file)
@@ -1483,7 +1483,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
        ret = smsc75xx_wait_ready(dev, 0);
        if (ret < 0) {
                netdev_warn(dev->net, "device not ready in smsc75xx_bind\n");
-               goto err;
+               goto free_pdata;
        }
 
        smsc75xx_init_mac_address(dev);
@@ -1492,7 +1492,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
        ret = smsc75xx_reset(dev);
        if (ret < 0) {
                netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret);
-               goto err;
+               goto cancel_work;
        }
 
        dev->net->netdev_ops = &smsc75xx_netdev_ops;
@@ -1503,8 +1503,11 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE;
        return 0;
 
-err:
+cancel_work:
+       cancel_work_sync(&pdata->set_multicast);
+free_pdata:
        kfree(pdata);
+       dev->data[0] = 0;
        return ret;
 }
 
@@ -1515,7 +1518,6 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
                cancel_work_sync(&pdata->set_multicast);
                netif_dbg(dev, ifdown, dev->net, "free pdata\n");
                kfree(pdata);
-               pdata = NULL;
                dev->data[0] = 0;
        }
 }
index 503e2fd..28a6c4c 100644 (file)
@@ -1183,9 +1183,6 @@ static int vrf_dev_init(struct net_device *dev)
 
        dev->flags = IFF_MASTER | IFF_NOARP;
 
-       /* MTU is irrelevant for VRF device; set to 64k similar to lo */
-       dev->mtu = 64 * 1024;
-
        /* similarly, oper state is irrelevant; set to up to avoid confusion */
        dev->operstate = IF_OPER_UP;
        netdev_lockdep_set_classes(dev);
@@ -1685,7 +1682,8 @@ static void vrf_setup(struct net_device *dev)
         * which breaks networking.
         */
        dev->min_mtu = IPV6_MIN_MTU;
-       dev->max_mtu = ETH_MAX_MTU;
+       dev->max_mtu = IP6_MAX_MTU;
+       dev->mtu = dev->max_mtu;
 }
 
 static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
index 51ce767..7a6fd46 100644 (file)
@@ -1693,8 +1693,13 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw)
 static void mac80211_hwsim_stop(struct ieee80211_hw *hw)
 {
        struct mac80211_hwsim_data *data = hw->priv;
+
        data->started = false;
        hrtimer_cancel(&data->beacon_timer);
+
+       while (!skb_queue_empty(&data->pending))
+               ieee80211_free_txskb(hw, skb_dequeue(&data->pending));
+
        wiphy_dbg(hw->wiphy, "%s\n", __func__);
 }
 
index eca805c..9e6ce0d 100644 (file)
@@ -18,6 +18,7 @@ obj-$(CONFIG_PCIE_INTEL_GW) += pcie-intel-gw.o
 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
 obj-$(CONFIG_PCI_MESON) += pci-meson.o
+obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
 
@@ -38,6 +39,6 @@ ifdef CONFIG_ACPI
 ifdef CONFIG_PCI_QUIRKS
 obj-$(CONFIG_ARM64) += pcie-al.o
 obj-$(CONFIG_ARM64) += pcie-hisi.o
-obj-$(CONFIG_ARM64) += pcie-tegra194.o
+obj-$(CONFIG_ARM64) += pcie-tegra194-acpi.o
 endif
 endif
diff --git a/drivers/pci/controller/dwc/pcie-tegra194-acpi.c b/drivers/pci/controller/dwc/pcie-tegra194-acpi.c
new file mode 100644 (file)
index 0000000..c2de6ed
--- /dev/null
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * ACPI quirks for Tegra194 PCIe host controller
+ *
+ * Copyright (C) 2021 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
+
+#include "pcie-designware.h"
+
+struct tegra194_pcie_ecam  {
+       void __iomem *config_base;
+       void __iomem *iatu_base;
+       void __iomem *dbi_base;
+};
+
+static int tegra194_acpi_init(struct pci_config_window *cfg)
+{
+       struct device *dev = cfg->parent;
+       struct tegra194_pcie_ecam *pcie_ecam;
+
+       pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
+       if (!pcie_ecam)
+               return -ENOMEM;
+
+       pcie_ecam->config_base = cfg->win;
+       pcie_ecam->iatu_base = cfg->win + SZ_256K;
+       pcie_ecam->dbi_base = cfg->win + SZ_512K;
+       cfg->priv = pcie_ecam;
+
+       return 0;
+}
+
+static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
+                         u32 val, u32 reg)
+{
+       u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);
+
+       writel(val, pcie_ecam->iatu_base + offset + reg);
+}
+
+static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
+                                int index, int type, u64 cpu_addr,
+                                u64 pci_addr, u64 size)
+{
+       atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
+                     PCIE_ATU_LOWER_BASE);
+       atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
+                     PCIE_ATU_UPPER_BASE);
+       atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
+                     PCIE_ATU_LOWER_TARGET);
+       atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
+                     PCIE_ATU_LIMIT);
+       atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
+                     PCIE_ATU_UPPER_TARGET);
+       atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1);
+       atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
+}
+
+static void __iomem *tegra194_map_bus(struct pci_bus *bus,
+                                     unsigned int devfn, int where)
+{
+       struct pci_config_window *cfg = bus->sysdata;
+       struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
+       u32 busdev;
+       int type;
+
+       if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
+               return NULL;
+
+       if (bus->number == cfg->busr.start) {
+               if (PCI_SLOT(devfn) == 0)
+                       return pcie_ecam->dbi_base + where;
+               else
+                       return NULL;
+       }
+
+       busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
+                PCIE_ATU_FUNC(PCI_FUNC(devfn));
+
+       if (bus->parent->number == cfg->busr.start) {
+               if (PCI_SLOT(devfn) == 0)
+                       type = PCIE_ATU_TYPE_CFG0;
+               else
+                       return NULL;
+       } else {
+               type = PCIE_ATU_TYPE_CFG1;
+       }
+
+       program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
+                            SZ_256K);
+
+       return pcie_ecam->config_base + where;
+}
+
+const struct pci_ecam_ops tegra194_pcie_ops = {
+       .init           = tegra194_acpi_init,
+       .pci_ops        = {
+               .map_bus        = tegra194_map_bus,
+               .read           = pci_generic_config_read,
+               .write          = pci_generic_config_write,
+       }
+};
index bafd2c6..504669e 100644 (file)
@@ -22,8 +22,6 @@
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
-#include <linux/pci-acpi.h>
-#include <linux/pci-ecam.h>
 #include <linux/phy/phy.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
@@ -247,24 +245,6 @@ static const unsigned int pcie_gen_freq[] = {
        GEN4_CORE_CLK_FREQ
 };
 
-static const u32 event_cntr_ctrl_offset[] = {
-       0x1d8,
-       0x1a8,
-       0x1a8,
-       0x1a8,
-       0x1c4,
-       0x1d8
-};
-
-static const u32 event_cntr_data_offset[] = {
-       0x1dc,
-       0x1ac,
-       0x1ac,
-       0x1ac,
-       0x1c8,
-       0x1dc
-};
-
 struct tegra_pcie_dw {
        struct device *dev;
        struct resource *appl_res;
@@ -313,104 +293,6 @@ struct tegra_pcie_dw_of_data {
        enum dw_pcie_device_mode mode;
 };
 
-#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
-struct tegra194_pcie_ecam  {
-       void __iomem *config_base;
-       void __iomem *iatu_base;
-       void __iomem *dbi_base;
-};
-
-static int tegra194_acpi_init(struct pci_config_window *cfg)
-{
-       struct device *dev = cfg->parent;
-       struct tegra194_pcie_ecam *pcie_ecam;
-
-       pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
-       if (!pcie_ecam)
-               return -ENOMEM;
-
-       pcie_ecam->config_base = cfg->win;
-       pcie_ecam->iatu_base = cfg->win + SZ_256K;
-       pcie_ecam->dbi_base = cfg->win + SZ_512K;
-       cfg->priv = pcie_ecam;
-
-       return 0;
-}
-
-static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
-                         u32 val, u32 reg)
-{
-       u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);
-
-       writel(val, pcie_ecam->iatu_base + offset + reg);
-}
-
-static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
-                                int index, int type, u64 cpu_addr,
-                                u64 pci_addr, u64 size)
-{
-       atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
-                     PCIE_ATU_LOWER_BASE);
-       atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
-                     PCIE_ATU_UPPER_BASE);
-       atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
-                     PCIE_ATU_LOWER_TARGET);
-       atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
-                     PCIE_ATU_LIMIT);
-       atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
-                     PCIE_ATU_UPPER_TARGET);
-       atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1);
-       atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
-}
-
-static void __iomem *tegra194_map_bus(struct pci_bus *bus,
-                                     unsigned int devfn, int where)
-{
-       struct pci_config_window *cfg = bus->sysdata;
-       struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
-       u32 busdev;
-       int type;
-
-       if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
-               return NULL;
-
-       if (bus->number == cfg->busr.start) {
-               if (PCI_SLOT(devfn) == 0)
-                       return pcie_ecam->dbi_base + where;
-               else
-                       return NULL;
-       }
-
-       busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
-                PCIE_ATU_FUNC(PCI_FUNC(devfn));
-
-       if (bus->parent->number == cfg->busr.start) {
-               if (PCI_SLOT(devfn) == 0)
-                       type = PCIE_ATU_TYPE_CFG0;
-               else
-                       return NULL;
-       } else {
-               type = PCIE_ATU_TYPE_CFG1;
-       }
-
-       program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
-                            SZ_256K);
-
-       return pcie_ecam->config_base + where;
-}
-
-const struct pci_ecam_ops tegra194_pcie_ops = {
-       .init           = tegra194_acpi_init,
-       .pci_ops        = {
-               .map_bus        = tegra194_map_bus,
-               .read           = pci_generic_config_read,
-               .write          = pci_generic_config_write,
-       }
-};
-#endif /* defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) */
-
-#ifdef CONFIG_PCIE_TEGRA194
-
 static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci)
 {
        return container_of(pci, struct tegra_pcie_dw, pci);
@@ -694,6 +576,24 @@ static struct pci_ops tegra_pci_ops = {
 };
 
 #if defined(CONFIG_PCIEASPM)
+static const u32 event_cntr_ctrl_offset[] = {
+       0x1d8,
+       0x1a8,
+       0x1a8,
+       0x1a8,
+       0x1c4,
+       0x1d8
+};
+
+static const u32 event_cntr_data_offset[] = {
+       0x1dc,
+       0x1ac,
+       0x1ac,
+       0x1ac,
+       0x1c8,
+       0x1dc
+};
+
 static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
 {
        u32 val;
@@ -2411,5 +2311,3 @@ MODULE_DEVICE_TABLE(of, tegra_pcie_dw_of_match);
 MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
 MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
 MODULE_LICENSE("GPL v2");
-
-#endif /* CONFIG_PCIE_TEGRA194 */
index 051b48b..e3f5e7a 100644 (file)
@@ -514,7 +514,7 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie)
                udelay(PIO_RETRY_DELAY);
        }
 
-       dev_err(dev, "config read/write timed out\n");
+       dev_err(dev, "PIO read/write transfer time out\n");
        return -ETIMEDOUT;
 }
 
@@ -657,6 +657,35 @@ static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus,
        return true;
 }
 
+static bool advk_pcie_pio_is_running(struct advk_pcie *pcie)
+{
+       struct device *dev = &pcie->pdev->dev;
+
+       /*
+        * Trying to start a new PIO transfer when previous has not completed
+        * cause External Abort on CPU which results in kernel panic:
+        *
+        *     SError Interrupt on CPU0, code 0xbf000002 -- SError
+        *     Kernel panic - not syncing: Asynchronous SError Interrupt
+        *
+        * Functions advk_pcie_rd_conf() and advk_pcie_wr_conf() are protected
+        * by raw_spin_lock_irqsave() at pci_lock_config() level to prevent
+        * concurrent calls at the same time. But because PIO transfer may take
+        * about 1.5s when link is down or card is disconnected, it means that
+        * advk_pcie_wait_pio() does not always have to wait for completion.
+        *
+        * Some versions of ARM Trusted Firmware handles this External Abort at
+        * EL3 level and mask it to prevent kernel panic. Relevant TF-A commit:
+        * https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/commit/?id=3c7dcdac5c50
+        */
+       if (advk_readl(pcie, PIO_START)) {
+               dev_err(dev, "Previous PIO read/write transfer is still running\n");
+               return true;
+       }
+
+       return false;
+}
+
 static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
                             int where, int size, u32 *val)
 {
@@ -673,9 +702,10 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
                return pci_bridge_emul_conf_read(&pcie->bridge, where,
                                                 size, val);
 
-       /* Start PIO */
-       advk_writel(pcie, 0, PIO_START);
-       advk_writel(pcie, 1, PIO_ISR);
+       if (advk_pcie_pio_is_running(pcie)) {
+               *val = 0xffffffff;
+               return PCIBIOS_SET_FAILED;
+       }
 
        /* Program the control register */
        reg = advk_readl(pcie, PIO_CTRL);
@@ -694,7 +724,8 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
        /* Program the data strobe */
        advk_writel(pcie, 0xf, PIO_WR_DATA_STRB);
 
-       /* Start the transfer */
+       /* Clear PIO DONE ISR and start the transfer */
+       advk_writel(pcie, 1, PIO_ISR);
        advk_writel(pcie, 1, PIO_START);
 
        ret = advk_pcie_wait_pio(pcie);
@@ -734,9 +765,8 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
        if (where % size)
                return PCIBIOS_SET_FAILED;
 
-       /* Start PIO */
-       advk_writel(pcie, 0, PIO_START);
-       advk_writel(pcie, 1, PIO_ISR);
+       if (advk_pcie_pio_is_running(pcie))
+               return PCIBIOS_SET_FAILED;
 
        /* Program the control register */
        reg = advk_readl(pcie, PIO_CTRL);
@@ -763,7 +793,8 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
        /* Program the data strobe */
        advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB);
 
-       /* Start the transfer */
+       /* Clear PIO DONE ISR and start the transfer */
+       advk_writel(pcie, 1, PIO_ISR);
        advk_writel(pcie, 1, PIO_START);
 
        ret = advk_pcie_wait_pio(pcie);
index 85dcb70..a143b02 100644 (file)
@@ -353,6 +353,8 @@ static int devm_of_pci_get_host_bridge_resources(struct device *dev,
                                dev_warn(dev, "More than one I/O resource converted for %pOF. CPU base address for old range lost!\n",
                                         dev_node);
                        *io_base = range.cpu_addr;
+               } else if (resource_type(res) == IORESOURCE_MEM) {
+                       res->flags &= ~IORESOURCE_MEM_64;
                }
 
                pci_add_resource_offset(resources, res, res->start - range.pci_addr);
index b717680..8d4ebe0 100644 (file)
@@ -1900,11 +1900,21 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
        int err;
        int i, bars = 0;
 
-       if (atomic_inc_return(&dev->enable_cnt) > 1) {
-               pci_update_current_state(dev, dev->current_state);
-               return 0;               /* already enabled */
+       /*
+        * Power state could be unknown at this point, either due to a fresh
+        * boot or a device removal call.  So get the current power state
+        * so that things like MSI message writing will behave as expected
+        * (e.g. if the device really is in D0 at enable time).
+        */
+       if (dev->pm_cap) {
+               u16 pmcsr;
+               pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+               dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
        }
 
+       if (atomic_inc_return(&dev->enable_cnt) > 1)
+               return 0;               /* already enabled */
+
        bridge = pci_upstream_bridge(dev);
        if (bridge)
                pci_enable_bridge(bridge);
index dcb229d..22b2bb1 100644 (file)
@@ -3546,6 +3546,18 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
        dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
 }
 
+/*
+ * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
+ * prevented for those affected devices.
+ */
+static void quirk_nvidia_no_bus_reset(struct pci_dev *dev)
+{
+       if ((dev->device & 0xffc0) == 0x2340)
+               quirk_no_bus_reset(dev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+                        quirk_nvidia_no_bus_reset);
+
 /*
  * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
  * The device will throw a Link Down error on AER-capable systems and
@@ -3566,6 +3578,16 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0034, quirk_no_bus_reset);
  */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
 
+/*
+ * Some TI KeyStone C667X devices do not support bus/hot reset.  The PCIESS
+ * automatically disables LTSSM when Secondary Bus Reset is received and
+ * the device stops working.  Prevent bus reset for these devices.  With
+ * this change, the device can be assigned to VMs with VFIO, but it will
+ * leak state between VMs.  Reference
+ * https://e2e.ti.com/support/processors/f/791/t/954382
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0xb005, quirk_no_bus_reset);
+
 static void quirk_no_pm_reset(struct pci_dev *dev)
 {
        /*
@@ -3901,6 +3923,69 @@ static int delay_250ms_after_flr(struct pci_dev *dev, int probe)
        return 0;
 }
 
+#define PCI_DEVICE_ID_HINIC_VF      0x375E
+#define HINIC_VF_FLR_TYPE           0x1000
+#define HINIC_VF_FLR_CAP_BIT        (1UL << 30)
+#define HINIC_VF_OP                 0xE80
+#define HINIC_VF_FLR_PROC_BIT       (1UL << 18)
+#define HINIC_OPERATION_TIMEOUT     15000      /* 15 seconds */
+
+/* Device-specific reset method for Huawei Intelligent NIC virtual functions */
+static int reset_hinic_vf_dev(struct pci_dev *pdev, int probe)
+{
+       unsigned long timeout;
+       void __iomem *bar;
+       u32 val;
+
+       if (probe)
+               return 0;
+
+       bar = pci_iomap(pdev, 0, 0);
+       if (!bar)
+               return -ENOTTY;
+
+       /* Get and check firmware capabilities */
+       val = ioread32be(bar + HINIC_VF_FLR_TYPE);
+       if (!(val & HINIC_VF_FLR_CAP_BIT)) {
+               pci_iounmap(pdev, bar);
+               return -ENOTTY;
+       }
+
+       /* Set HINIC_VF_FLR_PROC_BIT for the start of FLR */
+       val = ioread32be(bar + HINIC_VF_OP);
+       val = val | HINIC_VF_FLR_PROC_BIT;
+       iowrite32be(val, bar + HINIC_VF_OP);
+
+       pcie_flr(pdev);
+
+       /*
+        * The device must recapture its Bus and Device Numbers after FLR
+        * in order generate Completions.  Issue a config write to let the
+        * device capture this information.
+        */
+       pci_write_config_word(pdev, PCI_VENDOR_ID, 0);
+
+       /* Firmware clears HINIC_VF_FLR_PROC_BIT when reset is complete */
+       timeout = jiffies + msecs_to_jiffies(HINIC_OPERATION_TIMEOUT);
+       do {
+               val = ioread32be(bar + HINIC_VF_OP);
+               if (!(val & HINIC_VF_FLR_PROC_BIT))
+                       goto reset_complete;
+               msleep(20);
+       } while (time_before(jiffies, timeout));
+
+       val = ioread32be(bar + HINIC_VF_OP);
+       if (!(val & HINIC_VF_FLR_PROC_BIT))
+               goto reset_complete;
+
+       pci_warn(pdev, "Reset dev timeout, FLR ack reg: %#010x\n", val);
+
+reset_complete:
+       pci_iounmap(pdev, bar);
+
+       return 0;
+}
+
 static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
        { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
                 reset_intel_82599_sfp_virtfn },
@@ -3913,6 +3998,8 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
        { PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr },
        { PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
                reset_chelsio_generic_dev },
+       { PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF,
+               reset_hinic_vf_dev },
        { 0 }
 };
 
@@ -4753,6 +4840,8 @@ static const struct pci_dev_acs_enabled {
        { PCI_VENDOR_ID_AMPERE, 0xE00A, pci_quirk_xgene_acs },
        { PCI_VENDOR_ID_AMPERE, 0xE00B, pci_quirk_xgene_acs },
        { PCI_VENDOR_ID_AMPERE, 0xE00C, pci_quirk_xgene_acs },
+       /* Broadcom multi-function device */
+       { PCI_VENDOR_ID_BROADCOM, 0x16D7, pci_quirk_mf_endpoint_acs },
        { PCI_VENDOR_ID_BROADCOM, 0xD714, pci_quirk_brcm_acs },
        /* Amazon Annapurna Labs */
        { PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031, pci_quirk_al_acs },
@@ -5154,7 +5243,8 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
 static void quirk_amd_harvest_no_ats(struct pci_dev *pdev)
 {
        if ((pdev->device == 0x7312 && pdev->revision != 0x00) ||
-           (pdev->device == 0x7340 && pdev->revision != 0xc5))
+           (pdev->device == 0x7340 && pdev->revision != 0xc5) ||
+           (pdev->device == 0x7341 && pdev->revision != 0x00))
                return;
 
        if (pdev->device == 0x15d8) {
@@ -5181,6 +5271,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats);
 /* AMD Navi14 dGPU */
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats);
 /* AMD Raven platform iGPU */
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats);
 #endif /* CONFIG_PCI_ATS */
index 03a246e..21c4c34 100644 (file)
@@ -63,7 +63,7 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
        spin_unlock_irqrestore(&queue->lock, flags);
 }
 
-s32 scaled_ppm_to_ppb(long ppm)
+long scaled_ppm_to_ppb(long ppm)
 {
        /*
         * The 'freq' field in the 'struct timex' is in parts per
@@ -80,7 +80,7 @@ s32 scaled_ppm_to_ppb(long ppm)
        s64 ppb = 1 + ppm;
        ppb *= 125;
        ppb >>= 13;
-       return (s32) ppb;
+       return (long) ppb;
 }
 EXPORT_SYMBOL(scaled_ppm_to_ppb);
 
@@ -138,7 +138,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
                delta = ktime_to_ns(kt);
                err = ops->adjtime(ops, delta);
        } else if (tx->modes & ADJ_FREQUENCY) {
-               s32 ppb = scaled_ppm_to_ppb(tx->freq);
+               long ppb = scaled_ppm_to_ppb(tx->freq);
                if (ppb > ops->max_adj || ppb < -ops->max_adj)
                        return -ERANGE;
                if (ops->adjfine)
index ecefc25..337353c 100644 (file)
@@ -135,12 +135,13 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
 {
        struct ap_queue_status status;
        struct ap_message *ap_msg;
+       bool found = false;
 
        status = ap_dqap(aq->qid, &aq->reply->psmid,
                         aq->reply->msg, aq->reply->len);
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
-               aq->queue_count--;
+               aq->queue_count = max_t(int, 0, aq->queue_count - 1);
                if (aq->queue_count > 0)
                        mod_timer(&aq->timeout,
                                  jiffies + aq->request_timeout);
@@ -150,8 +151,14 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
                        list_del_init(&ap_msg->list);
                        aq->pendingq_count--;
                        ap_msg->receive(aq, ap_msg, aq->reply);
+                       found = true;
                        break;
                }
+               if (!found) {
+                       AP_DBF_WARN("%s unassociated reply psmid=0x%016llx on 0x%02x.%04x\n",
+                                   __func__, aq->reply->psmid,
+                                   AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
+               }
                fallthrough;
        case AP_RESPONSE_NO_PENDING_REPLY:
                if (!status.queue_empty || aq->queue_count <= 0)
@@ -232,7 +239,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
                           ap_msg->flags & AP_MSG_FLAG_SPECIAL);
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
-               aq->queue_count++;
+               aq->queue_count = max_t(int, 1, aq->queue_count + 1);
                if (aq->queue_count == 1)
                        mod_timer(&aq->timeout, jiffies + aq->request_timeout);
                list_move_tail(&ap_msg->list, &aq->pendingq);
index 6e6c240..a66fa97 100644 (file)
@@ -1124,12 +1124,6 @@ static int nxp_fspi_probe(struct platform_device *pdev)
                goto err_put_ctrl;
        }
 
-       /* Clear potential interrupts */
-       reg = fspi_readl(f, f->iobase + FSPI_INTR);
-       if (reg)
-               fspi_writel(f, reg, f->iobase + FSPI_INTR);
-
-
        /* find the resources - controller memory mapped space */
        if (is_acpi_node(f->dev->fwnode))
                res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
@@ -1167,6 +1161,11 @@ static int nxp_fspi_probe(struct platform_device *pdev)
                }
        }
 
+       /* Clear potential interrupts */
+       reg = fspi_readl(f, f->iobase + FSPI_INTR);
+       if (reg)
+               fspi_writel(f, reg, f->iobase + FSPI_INTR);
+
        /* find the irq */
        ret = platform_get_irq(pdev, 0);
        if (ret < 0)
index f7c832f..6a726c9 100644 (file)
@@ -1118,6 +1118,11 @@ static int tegra_slink_probe(struct platform_device *pdev)
                pm_runtime_put_noidle(&pdev->dev);
                goto exit_pm_disable;
        }
+
+       reset_control_assert(tspi->rst);
+       udelay(2);
+       reset_control_deassert(tspi->rst);
+
        tspi->def_command_reg  = SLINK_M_S;
        tspi->def_command2_reg = SLINK_CS_ACTIVE_BETWEEN;
        tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
index ffa1cf4..4378592 100644 (file)
@@ -2284,7 +2284,7 @@ static int rtw_cfg80211_add_monitor_if(struct adapter *padapter, char *name, str
        mon_wdev->iftype = NL80211_IFTYPE_MONITOR;
        mon_ndev->ieee80211_ptr = mon_wdev;
 
-       ret = register_netdevice(mon_ndev);
+       ret = cfg80211_register_netdevice(mon_ndev);
        if (ret) {
                goto out;
        }
@@ -2360,7 +2360,7 @@ static int cfg80211_rtw_del_virtual_intf(struct wiphy *wiphy,
        adapter = rtw_netdev_priv(ndev);
        pwdev_priv = adapter_wdev_data(adapter);
 
-       unregister_netdevice(ndev);
+       cfg80211_unregister_netdevice(ndev);
 
        if (ndev == pwdev_priv->pmon_ndev) {
                pwdev_priv->pmon_ndev = NULL;
index 4545b23..bac0f54 100644 (file)
@@ -686,6 +686,16 @@ static int imx7d_charger_secondary_detection(struct imx_usbmisc_data *data)
        int val;
        unsigned long flags;
 
+       /* Clear VDATSRCENB0 to disable VDP_SRC and IDM_SNK required by BC 1.2 spec */
+       spin_lock_irqsave(&usbmisc->lock, flags);
+       val = readl(usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
+       val &= ~MX7D_USB_OTG_PHY_CFG2_CHRG_VDATSRCENB0;
+       writel(val, usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
+       spin_unlock_irqrestore(&usbmisc->lock, flags);
+
+       /* TVDMSRC_DIS */
+       msleep(20);
+
        /* VDM_SRC is connected to D- and IDP_SINK is connected to D+ */
        spin_lock_irqsave(&usbmisc->lock, flags);
        val = readl(usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
@@ -695,7 +705,8 @@ static int imx7d_charger_secondary_detection(struct imx_usbmisc_data *data)
                                usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
        spin_unlock_irqrestore(&usbmisc->lock, flags);
 
-       usleep_range(1000, 2000);
+       /* TVDMSRC_ON */
+       msleep(40);
 
        /*
         * Per BC 1.2, check voltage of D+:
@@ -798,7 +809,8 @@ static int imx7d_charger_primary_detection(struct imx_usbmisc_data *data)
                                usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
        spin_unlock_irqrestore(&usbmisc->lock, flags);
 
-       usleep_range(1000, 2000);
+       /* TVDPSRC_ON */
+       msleep(40);
 
        /* Check if D- is less than VDAT_REF to determine an SDP per BC 1.2 */
        val = readl(usbmisc->base + MX7D_USB_OTG_PHY_STATUS);
index fc7d6cd..df8e69e 100644 (file)
@@ -41,6 +41,8 @@
 #define USB_VENDOR_GENESYS_LOGIC               0x05e3
 #define USB_VENDOR_SMSC                                0x0424
 #define USB_PRODUCT_USB5534B                   0x5534
+#define USB_VENDOR_CYPRESS                     0x04b4
+#define USB_PRODUCT_CY7C65632                  0x6570
 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND       0x01
 #define HUB_QUIRK_DISABLE_AUTOSUSPEND          0x02
 
@@ -5697,6 +5699,11 @@ static const struct usb_device_id hub_id_table[] = {
       .idProduct = USB_PRODUCT_USB5534B,
       .bInterfaceClass = USB_CLASS_HUB,
       .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
+    { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
+                   | USB_DEVICE_ID_MATCH_PRODUCT,
+      .idVendor = USB_VENDOR_CYPRESS,
+      .idProduct = USB_PRODUCT_CY7C65632,
+      .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
     { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
                        | USB_DEVICE_ID_MATCH_INT_CLASS,
       .idVendor = USB_VENDOR_GENESYS_LOGIC,
index 21129d3..4ac397e 100644 (file)
@@ -1671,8 +1671,8 @@ static int dwc3_remove(struct platform_device *pdev)
 
        pm_runtime_get_sync(&pdev->dev);
 
-       dwc3_debugfs_exit(dwc);
        dwc3_core_exit_mode(dwc);
+       dwc3_debugfs_exit(dwc);
 
        dwc3_core_exit(dwc);
        dwc3_ulpi_exit(dwc);
index b297525..179004b 100644 (file)
@@ -203,8 +203,8 @@ static int __init afs_init(void)
                goto error_fs;
 
        afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs");
-       if (IS_ERR(afs_proc_symlink)) {
-               ret = PTR_ERR(afs_proc_symlink);
+       if (!afs_proc_symlink) {
+               ret = -ENOMEM;
                goto error_proc;
        }
 
index 641c546..3104b62 100644 (file)
@@ -844,6 +844,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
        struct inode *inode = file_inode(file);
        struct afs_vnode *vnode = AFS_FS_I(inode);
        unsigned long priv;
+       vm_fault_t ret = VM_FAULT_RETRY;
 
        _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index);
 
@@ -855,14 +856,14 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 #ifdef CONFIG_AFS_FSCACHE
        if (PageFsCache(page) &&
            wait_on_page_fscache_killable(page) < 0)
-               return VM_FAULT_RETRY;
+               goto out;
 #endif
 
        if (wait_on_page_writeback_killable(page))
-               return VM_FAULT_RETRY;
+               goto out;
 
        if (lock_page_killable(page) < 0)
-               return VM_FAULT_RETRY;
+               goto out;
 
        /* We mustn't change page->private until writeback is complete as that
         * details the portion of the page we need to write back and we might
@@ -870,7 +871,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
         */
        if (wait_on_page_writeback_killable(page) < 0) {
                unlock_page(page);
-               return VM_FAULT_RETRY;
+               goto out;
        }
 
        priv = afs_page_dirty(page, 0, thp_size(page));
@@ -884,8 +885,10 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
        }
        file_update_time(file);
 
+       ret = VM_FAULT_LOCKED;
+out:
        sb_end_pagefault(inode->i_sb);
-       return VM_FAULT_LOCKED;
+       return ret;
 }
 
 /*
index aa57bdc..6d5c4e4 100644 (file)
@@ -2442,16 +2442,16 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
        spin_lock(&sinfo->lock);
        spin_lock(&cache->lock);
        if (!--cache->ro) {
-               num_bytes = cache->length - cache->reserved -
-                           cache->pinned - cache->bytes_super -
-                           cache->zone_unusable - cache->used;
-               sinfo->bytes_readonly -= num_bytes;
                if (btrfs_is_zoned(cache->fs_info)) {
                        /* Migrate zone_unusable bytes back */
                        cache->zone_unusable = cache->alloc_offset - cache->used;
                        sinfo->bytes_zone_unusable += cache->zone_unusable;
                        sinfo->bytes_readonly -= cache->zone_unusable;
                }
+               num_bytes = cache->length - cache->reserved -
+                           cache->pinned - cache->bytes_super -
+                           cache->zone_unusable - cache->used;
+               sinfo->bytes_readonly -= num_bytes;
                list_del_init(&cache->ro_list);
        }
        spin_unlock(&cache->lock);
index 55efd3d..30dee68 100644 (file)
@@ -735,6 +735,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
                __SetPageUptodate(page);
                error = huge_add_to_page_cache(page, mapping, index);
                if (unlikely(error)) {
+                       restore_reserve_on_error(h, &pseudo_vma, addr, page);
                        put_page(page);
                        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                        goto out;
index be5b6d2..64864fb 100644 (file)
@@ -471,7 +471,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
                                        info_type, fanotify_info_name(info),
                                        info->name_len, buf, count);
                if (ret < 0)
-                       return ret;
+                       goto out_close_fd;
 
                buf += ret;
                count -= ret;
@@ -519,7 +519,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
                                        fanotify_event_object_fh(event),
                                        info_type, dot, dot_len, buf, count);
                if (ret < 0)
-                       return ret;
+                       goto out_close_fd;
 
                buf += ret;
                count -= ret;
index 7118ebe..9cbd915 100644 (file)
@@ -2676,7 +2676,9 @@ out:
 #ifdef CONFIG_SECURITY
 static int proc_pid_attr_open(struct inode *inode, struct file *file)
 {
-       return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+       file->private_data = NULL;
+       __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+       return 0;
 }
 
 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
index f180240..11e555c 100644 (file)
@@ -37,7 +37,6 @@ bool topology_scale_freq_invariant(void);
 enum scale_freq_source {
        SCALE_FREQ_SOURCE_CPUFREQ = 0,
        SCALE_FREQ_SOURCE_ARCH,
-       SCALE_FREQ_SOURCE_CPPC,
 };
 
 struct scale_freq_data {
index 2915f56..edb5c18 100644 (file)
@@ -27,8 +27,10 @@ extern int debug_locks_off(void);
        int __ret = 0;                                                  \
                                                                        \
        if (!oops_in_progress && unlikely(c)) {                         \
+               instrumentation_begin();                                \
                if (debug_locks_off() && !debug_locks_silent)           \
                        WARN(1, "DEBUG_LOCKS_WARN_ON(%s)", #c);         \
+               instrumentation_end();                                  \
                __ret = 1;                                              \
        }                                                               \
        __ret;                                                          \
index 9626fda..2a8ebe6 100644 (file)
@@ -286,6 +286,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
+extern unsigned long huge_zero_pfn;
 
 static inline bool is_huge_zero_page(struct page *page)
 {
@@ -294,7 +295,7 @@ static inline bool is_huge_zero_page(struct page *page)
 
 static inline bool is_huge_zero_pmd(pmd_t pmd)
 {
-       return is_huge_zero_page(pmd_page(pmd));
+       return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
 }
 
 static inline bool is_huge_zero_pud(pud_t pud)
@@ -440,6 +441,11 @@ static inline bool is_huge_zero_page(struct page *page)
        return false;
 }
 
+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+       return false;
+}
+
 static inline bool is_huge_zero_pud(pud_t pud)
 {
        return false;
index b92f25c..6504346 100644 (file)
@@ -149,6 +149,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
                                                long freed);
 bool isolate_huge_page(struct page *page, struct list_head *list);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
 void putback_active_hugepage(struct page *page);
 void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
 void free_huge_page(struct page *page);
@@ -339,6 +340,11 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list)
        return false;
 }
 
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+       return 0;
+}
+
 static inline void putback_active_hugepage(struct page *page)
 {
 }
@@ -604,6 +610,8 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
                                unsigned long address);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
                        pgoff_t idx);
+void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
+                               unsigned long address, struct page *page);
 
 /* arch callback */
 int __init __alloc_bootmem_huge_page(struct hstate *h);
index 020a8f7..f8902bc 100644 (file)
@@ -542,6 +542,10 @@ struct mlx5_core_roce {
 enum {
        MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0,
        MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1,
+       /* Set during device detach to block any further devices
+        * creation/deletion on drivers rescan. Unset during device attach.
+        */
+       MLX5_PRIV_FLAGS_DETACH = 1 << 2,
 };
 
 struct mlx5_adev {
index 028f442..60ffeb6 100644 (file)
@@ -85,4 +85,5 @@ mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev,
                         struct mlx5_hairpin_params *params);
 
 void mlx5_core_hairpin_destroy(struct mlx5_hairpin *pair);
+void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp);
 #endif /* __TRANSOBJ_H__ */
index c274f75..8ae3162 100644 (file)
@@ -1719,6 +1719,7 @@ struct zap_details {
        struct address_space *check_mapping;    /* Check page->mapping if set */
        pgoff_t first_index;                    /* Lowest page->index to unmap */
        pgoff_t last_index;                     /* Highest page->index to unmap */
+       struct page *single_page;               /* Locked page to be unmapped */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1766,6 +1767,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 extern int fixup_user_fault(struct mm_struct *mm,
                            unsigned long address, unsigned int fault_flags,
                            bool *unlocked);
+void unmap_mapping_page(struct page *page);
 void unmap_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t nr, bool even_cows);
 void unmap_mapping_range(struct address_space *mapping,
@@ -1786,6 +1788,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
        BUG();
        return -EFAULT;
 }
+static inline void unmap_mapping_page(struct page *page) { }
 static inline void unmap_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t nr, bool even_cows) { }
 static inline void unmap_mapping_range(struct address_space *mapping,
index 0d47fd3..51d7f1b 100644 (file)
@@ -235,7 +235,7 @@ extern int ptp_clock_index(struct ptp_clock *ptp);
  * @ppm:    Parts per million, but with a 16 bit binary fractional field
  */
 
-extern s32 scaled_ppm_to_ppb(long ppm);
+extern long scaled_ppm_to_ppb(long ppm);
 
 /**
  * ptp_find_pin() - obtain the pin index of a given auxiliary function
index def5c62..8d04e7d 100644 (file)
@@ -91,6 +91,7 @@ enum ttu_flags {
 
        TTU_SPLIT_HUGE_PMD      = 0x4,  /* split huge PMD if any */
        TTU_IGNORE_MLOCK        = 0x8,  /* ignore mlock */
+       TTU_SYNC                = 0x10, /* avoid racy checks with PVMW_SYNC */
        TTU_IGNORE_HWPOISON     = 0x20, /* corrupted page is recoverable */
        TTU_BATCH_FLUSH         = 0x40, /* Batch TLB flushes where possible
                                         * and caller guarantees they will
index b8fc5c5..0d8e3dc 100644 (file)
@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
                            int __user *usockvec);
 extern int __sys_shutdown_sock(struct socket *sock, int how);
 extern int __sys_shutdown(int fd, int how);
-
-extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
index d9b7c91..6430a94 100644 (file)
 #define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
 #define SWP_OFFSET_MASK        ((1UL << SWP_TYPE_SHIFT) - 1)
 
+/* Clear all flags but only keep swp_entry_t related information */
+static inline pte_t pte_swp_clear_flags(pte_t pte)
+{
+       if (pte_swp_soft_dirty(pte))
+               pte = pte_swp_clear_soft_dirty(pte);
+       if (pte_swp_uffd_wp(pte))
+               pte = pte_swp_clear_uffd_wp(pte);
+       return pte;
+}
+
 /*
  * Store a type+offset into a swp_entry_t in an arch-independent format
  */
@@ -66,10 +76,7 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
 {
        swp_entry_t arch_entry;
 
-       if (pte_swp_soft_dirty(pte))
-               pte = pte_swp_clear_soft_dirty(pte);
-       if (pte_swp_uffd_wp(pte))
-               pte = pte_swp_clear_uffd_wp(pte);
+       pte = pte_swp_clear_flags(pte);
        arch_entry = __pte_to_swp_entry(pte);
        return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
index 445b66c..e89530d 100644 (file)
@@ -5537,7 +5537,7 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
  *
  * This function iterates over the interfaces associated with a given
  * hardware that are currently active and calls the callback for them.
- * This version can only be used while holding the RTNL.
+ * This version can only be used while holding the wiphy mutex.
  *
  * @hw: the hardware struct of which the interfaces should be iterated over
  * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags
@@ -6392,7 +6392,12 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
 
 /**
  * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header
- *                              of injected frames
+ *                              of injected frames.
+ *
+ * To accurately parse and take into account rate and retransmission fields,
+ * you must initialize the chandef field in the ieee80211_tx_info structure
+ * of the skb before calling this function.
+ *
  * @skb: packet injected by userspace
  * @dev: the &struct device of this 802.11 device
  */
index fa58871..bdc0459 100644 (file)
@@ -184,6 +184,9 @@ struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
 void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
 
 void net_ns_barrier(void);
+
+struct ns_common *get_net_ns(struct ns_common *ns);
+struct net *get_net_ns_by_fd(int fd);
 #else /* CONFIG_NET_NS */
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
@@ -203,13 +206,22 @@ static inline void net_ns_get_ownership(const struct net *net,
 }
 
 static inline void net_ns_barrier(void) {}
+
+static inline struct ns_common *get_net_ns(struct ns_common *ns)
+{
+       return ERR_PTR(-EINVAL);
+}
+
+static inline struct net *get_net_ns_by_fd(int fd)
+{
+       return ERR_PTR(-EINVAL);
+}
 #endif /* CONFIG_NET_NS */
 
 
 extern struct list_head net_namespace_list;
 
 struct net *get_net_ns_by_pid(pid_t pid);
-struct net *get_net_ns_by_fd(int fd);
 
 #ifdef CONFIG_SYSCTL
 void ipx_register_sysctl(void);
index 0e962d8..7a7058f 100644 (file)
@@ -1934,7 +1934,8 @@ static inline u32 net_tx_rndhash(void)
 
 static inline void sk_set_txhash(struct sock *sk)
 {
-       sk->sk_txhash = net_tx_rndhash();
+       /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */
+       WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
 }
 
 static inline bool sk_rethink_txhash(struct sock *sk)
@@ -2206,9 +2207,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock,
 
 static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
 {
-       if (sk->sk_txhash) {
+       /* This pairs with WRITE_ONCE() in sk_set_txhash() */
+       u32 txhash = READ_ONCE(sk->sk_txhash);
+
+       if (txhash) {
                skb->l4_hash = 1;
-               skb->hash = sk->sk_txhash;
+               skb->hash = txhash;
        }
 }
 
@@ -2266,8 +2270,13 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
 static inline int sock_error(struct sock *sk)
 {
        int err;
-       if (likely(!sk->sk_err))
+
+       /* Avoid an atomic operation for the common case.
+        * This is racy since another cpu/thread can change sk_err under us.
+        */
+       if (likely(data_race(!sk->sk_err)))
                return 0;
+
        err = xchg(&sk->sk_err, 0);
        return -err;
 }
index 6de5a7f..d2a9420 100644 (file)
@@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
 
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
index 7d66876..d1b3270 100644 (file)
@@ -289,6 +289,9 @@ struct sockaddr_in {
 /* Address indicating an error return. */
 #define        INADDR_NONE             ((unsigned long int) 0xffffffff)
 
+/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
+#define        INADDR_DUMMY            ((unsigned long int) 0xc0000008)
+
 /* Network number for local host loopback. */
 #define        IN_LOOPBACKNET          127
 
index 94ba516..c6a2757 100644 (file)
@@ -6483,6 +6483,27 @@ struct bpf_sanitize_info {
        bool mask_to_left;
 };
 
+static struct bpf_verifier_state *
+sanitize_speculative_path(struct bpf_verifier_env *env,
+                         const struct bpf_insn *insn,
+                         u32 next_idx, u32 curr_idx)
+{
+       struct bpf_verifier_state *branch;
+       struct bpf_reg_state *regs;
+
+       branch = push_stack(env, next_idx, curr_idx, true);
+       if (branch && insn) {
+               regs = branch->frame[branch->curframe]->regs;
+               if (BPF_SRC(insn->code) == BPF_K) {
+                       mark_reg_unknown(env, regs, insn->dst_reg);
+               } else if (BPF_SRC(insn->code) == BPF_X) {
+                       mark_reg_unknown(env, regs, insn->dst_reg);
+                       mark_reg_unknown(env, regs, insn->src_reg);
+               }
+       }
+       return branch;
+}
+
 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
                            struct bpf_insn *insn,
                            const struct bpf_reg_state *ptr_reg,
@@ -6566,12 +6587,26 @@ do_sim:
                tmp = *dst_reg;
                *dst_reg = *ptr_reg;
        }
-       ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
+       ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
+                                       env->insn_idx);
        if (!ptr_is_dst_reg && ret)
                *dst_reg = tmp;
        return !ret ? REASON_STACK : 0;
 }
 
+static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
+{
+       struct bpf_verifier_state *vstate = env->cur_state;
+
+       /* If we simulate paths under speculation, we don't update the
+        * insn as 'seen' such that when we verify unreachable paths in
+        * the non-speculative domain, sanitize_dead_code() can still
+        * rewrite/sanitize them.
+        */
+       if (!vstate->speculative)
+               env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+}
+
 static int sanitize_err(struct bpf_verifier_env *env,
                        const struct bpf_insn *insn, int reason,
                        const struct bpf_reg_state *off_reg,
@@ -8750,14 +8785,28 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
                if (err)
                        return err;
        }
+
        if (pred == 1) {
-               /* only follow the goto, ignore fall-through */
+               /* Only follow the goto, ignore fall-through. If needed, push
+                * the fall-through branch for simulation under speculative
+                * execution.
+                */
+               if (!env->bypass_spec_v1 &&
+                   !sanitize_speculative_path(env, insn, *insn_idx + 1,
+                                              *insn_idx))
+                       return -EFAULT;
                *insn_idx += insn->off;
                return 0;
        } else if (pred == 0) {
-               /* only follow fall-through branch, since
-                * that's where the program will go
+               /* Only follow the fall-through branch, since that's where the
+                * program will go. If needed, push the goto branch for
+                * simulation under speculative execution.
                 */
+               if (!env->bypass_spec_v1 &&
+                   !sanitize_speculative_path(env, insn,
+                                              *insn_idx + insn->off + 1,
+                                              *insn_idx))
+                       return -EFAULT;
                return 0;
        }
 
@@ -10630,7 +10679,7 @@ static int do_check(struct bpf_verifier_env *env)
                }
 
                regs = cur_regs(env);
-               env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+               sanitize_mark_insn_seen(env);
                prev_insn_idx = env->insn_idx;
 
                if (class == BPF_ALU || class == BPF_ALU64) {
@@ -10857,7 +10906,7 @@ process_bpf_exit:
                                        return err;
 
                                env->insn_idx++;
-                               env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+                               sanitize_mark_insn_seen(env);
                        } else {
                                verbose(env, "invalid BPF_LD mode\n");
                                return -EINVAL;
@@ -11366,6 +11415,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
 {
        struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
        struct bpf_insn *insn = new_prog->insnsi;
+       u32 old_seen = old_data[off].seen;
        u32 prog_len;
        int i;
 
@@ -11386,7 +11436,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
        memcpy(new_data + off + cnt - 1, old_data + off,
               sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
        for (i = off; i < off + cnt - 1; i++) {
-               new_data[i].seen = env->pass_cnt;
+               /* Expand insni[off]'s seen count to the patched range. */
+               new_data[i].seen = old_seen;
                new_data[i].zext_dst = insn_has_def32(env, insn + i);
        }
        env->insn_aux_data = new_data;
@@ -12710,6 +12761,9 @@ static void free_states(struct bpf_verifier_env *env)
  * insn_aux_data was touched. These variables are compared to clear temporary
  * data from failed pass. For testing and experiments do_check_common() can be
  * run multiple times even when prior attempt to verify is unsuccessful.
+ *
+ * Note that special handling is needed on !env->bypass_spec_v1 if this is
+ * ever called outside of error path with subsequent program rejection.
  */
 static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
 {
index 825284b..684a606 100644 (file)
@@ -464,6 +464,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
        VMCOREINFO_STRUCT_SIZE(mem_section);
        VMCOREINFO_OFFSET(mem_section, section_mem_map);
+       VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
        VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
 #endif
        VMCOREINFO_STRUCT_SIZE(page);
index 8ca7d50..e50df8d 100644 (file)
@@ -334,6 +334,14 @@ void __init swiotlb_exit(void)
        io_tlb_default_mem = NULL;
 }
 
+/*
+ * Return the offset into a iotlb slot required to keep the device happy.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+{
+       return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+}
+
 /*
  * Bounce: copy the swiotlb buffer from or back to the original dma location
  */
@@ -346,10 +354,17 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
        size_t alloc_size = mem->slots[index].alloc_size;
        unsigned long pfn = PFN_DOWN(orig_addr);
        unsigned char *vaddr = phys_to_virt(tlb_addr);
+       unsigned int tlb_offset;
 
        if (orig_addr == INVALID_PHYS_ADDR)
                return;
 
+       tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+                    swiotlb_align_offset(dev, orig_addr);
+
+       orig_addr += tlb_offset;
+       alloc_size -= tlb_offset;
+
        if (size > alloc_size) {
                dev_WARN_ONCE(dev, 1,
                        "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n",
@@ -390,14 +405,6 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
 
 #define slot_addr(start, idx)  ((start) + ((idx) << IO_TLB_SHIFT))
 
-/*
- * Return the offset into a iotlb slot required to keep the device happy.
- */
-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
-{
-       return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
-}
-
 /*
  * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
  */
index 7641bd4..e323130 100644 (file)
@@ -843,7 +843,7 @@ static int count_matching_names(struct lock_class *new_class)
 }
 
 /* used from NMI context -- must be lockless */
-static __always_inline struct lock_class *
+static noinstr struct lock_class *
 look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
 {
        struct lockdep_subclass_key *key;
@@ -851,12 +851,14 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
        struct lock_class *class;
 
        if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
+               instrumentation_begin();
                debug_locks_off();
                printk(KERN_ERR
                        "BUG: looking up invalid subclass: %u\n", subclass);
                printk(KERN_ERR
                        "turning off the locking correctness validator.\n");
                dump_stack();
+               instrumentation_end();
                return NULL;
        }
 
index 7e78dfa..927d46c 100644 (file)
@@ -266,9 +266,18 @@ static void module_assert_mutex_or_preempt(void)
 #endif
 }
 
+#ifdef CONFIG_MODULE_SIG
 static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
 module_param(sig_enforce, bool_enable_only, 0644);
 
+void set_module_sig_enforced(void)
+{
+       sig_enforce = true;
+}
+#else
+#define sig_enforce false
+#endif
+
 /*
  * Export sig_enforce kernel cmdline parameter to allow other subsystems rely
  * on that instead of directly to CONFIG_MODULE_SIG_FORCE config.
@@ -279,11 +288,6 @@ bool is_module_sig_enforced(void)
 }
 EXPORT_SYMBOL(is_module_sig_enforced);
 
-void set_module_sig_enforced(void)
-{
-       sig_enforce = true;
-}
-
 /* Block module loading/unloading? */
 int modules_disabled = 0;
 core_param(nomodule, modules_disabled, bint, 0);
index 7a14146..9423218 100644 (file)
@@ -391,6 +391,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
        /* No obstacles. */
        return vprintk_default(fmt, args);
 }
+EXPORT_SYMBOL(vprintk);
 
 void __init printk_safe_init(void)
 {
@@ -411,4 +412,3 @@ void __init printk_safe_init(void)
        /* Flush pending messages that did not have scheduled IRQ works. */
        printk_safe_flush();
 }
-EXPORT_SYMBOL(vprintk);
index 5226cc2..4ca80df 100644 (file)
@@ -6389,7 +6389,6 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
 {
        return __sched_setscheduler(p, attr, false, true);
 }
-EXPORT_SYMBOL_GPL(sched_setattr_nocheck);
 
 /**
  * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
index 2c8a935..2366331 100644 (file)
@@ -3298,6 +3298,52 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * Because list_add_leaf_cfs_rq always places a child cfs_rq on the list
+ * immediately before a parent cfs_rq, and cfs_rqs are removed from the list
+ * bottom-up, we only have to test whether the cfs_rq before us on the list
+ * is our child.
+ * If cfs_rq is not on the list, test whether a child needs its to be added to
+ * connect a branch to the tree  * (see list_add_leaf_cfs_rq() for details).
+ */
+static inline bool child_cfs_rq_on_list(struct cfs_rq *cfs_rq)
+{
+       struct cfs_rq *prev_cfs_rq;
+       struct list_head *prev;
+
+       if (cfs_rq->on_list) {
+               prev = cfs_rq->leaf_cfs_rq_list.prev;
+       } else {
+               struct rq *rq = rq_of(cfs_rq);
+
+               prev = rq->tmp_alone_branch;
+       }
+
+       prev_cfs_rq = container_of(prev, struct cfs_rq, leaf_cfs_rq_list);
+
+       return (prev_cfs_rq->tg->parent == cfs_rq->tg);
+}
+
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+       if (cfs_rq->load.weight)
+               return false;
+
+       if (cfs_rq->avg.load_sum)
+               return false;
+
+       if (cfs_rq->avg.util_sum)
+               return false;
+
+       if (cfs_rq->avg.runnable_sum)
+               return false;
+
+       if (child_cfs_rq_on_list(cfs_rq))
+               return false;
+
+       return true;
+}
+
 /**
  * update_tg_load_avg - update the tg's load avg
  * @cfs_rq: the cfs_rq whose avg changed
@@ -4091,6 +4137,11 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
 
 #else /* CONFIG_SMP */
 
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+       return true;
+}
+
 #define UPDATE_TG      0x0
 #define SKIP_AGE_LOAD  0x0
 #define DO_ATTACH      0x0
@@ -4749,8 +4800,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
                cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
                                             cfs_rq->throttled_clock_task;
 
-               /* Add cfs_rq with already running entity in the list */
-               if (cfs_rq->nr_running >= 1)
+               /* Add cfs_rq with load or one or more already running entities to the list */
+               if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
                        list_add_leaf_cfs_rq(cfs_rq);
        }
 
@@ -7996,23 +8047,6 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
-{
-       if (cfs_rq->load.weight)
-               return false;
-
-       if (cfs_rq->avg.load_sum)
-               return false;
-
-       if (cfs_rq->avg.util_sum)
-               return false;
-
-       if (cfs_rq->avg.runnable_sum)
-               return false;
-
-       return true;
-}
-
 static bool __update_blocked_fair(struct rq *rq, bool *done)
 {
        struct cfs_rq *cfs_rq, *pos;
index f7c6ffc..f1ecd8f 100644 (file)
@@ -435,6 +435,12 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
                 * Preallocation does not hold sighand::siglock so it can't
                 * use the cache. The lockless caching requires that only
                 * one consumer and only one producer run at a time.
+                *
+                * For the regular allocation case it is sufficient to
+                * check @q for NULL because this code can only be called
+                * if the target task @t has not been reaped yet; which
+                * means this code can never observe the error pointer which is
+                * written to @t->sigqueue_cache in exit_task_sigqueue_cache().
                 */
                q = READ_ONCE(t->sigqueue_cache);
                if (!q || sigqueue_flags)
@@ -463,13 +469,18 @@ void exit_task_sigqueue_cache(struct task_struct *tsk)
        struct sigqueue *q = tsk->sigqueue_cache;
 
        if (q) {
-               tsk->sigqueue_cache = NULL;
                /*
                 * Hand it back to the cache as the task might
                 * be self reaping which would leak the object.
                 */
                 kmem_cache_free(sigqueue_cachep, q);
        }
+
+       /*
+        * Set an error pointer to ensure that @tsk will not cache a
+        * sigqueue when it is reaping it's child tasks
+        */
+       tsk->sigqueue_cache = ERR_PTR(-1);
 }
 
 static void sigqueue_cache_or_free(struct sigqueue *q)
@@ -481,6 +492,10 @@ static void sigqueue_cache_or_free(struct sigqueue *q)
         * is intentional when run without holding current->sighand->siglock,
         * which is fine as current obviously cannot run __sigqueue_free()
         * concurrently.
+        *
+        * The NULL check is safe even if current has been reaped already,
+        * in which case exit_task_sigqueue_cache() wrote an error pointer
+        * into current->sigqueue_cache.
         */
        if (!READ_ONCE(current->sigqueue_cache))
                WRITE_ONCE(current->sigqueue_cache, q);
index 9299057..d23a09d 100644 (file)
@@ -2198,9 +2198,6 @@ struct saved_cmdlines_buffer {
 };
 static struct saved_cmdlines_buffer *savedcmd;
 
-/* temporary disable recording */
-static atomic_t trace_record_taskinfo_disabled __read_mostly;
-
 static inline char *get_saved_cmdlines(int idx)
 {
        return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
@@ -2486,8 +2483,6 @@ static bool tracing_record_taskinfo_skip(int flags)
 {
        if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
                return true;
-       if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
-               return true;
        if (!__this_cpu_read(trace_taskinfo_save))
                return true;
        return false;
@@ -3998,9 +3993,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
                return ERR_PTR(-EBUSY);
 #endif
 
-       if (!iter->snapshot)
-               atomic_inc(&trace_record_taskinfo_disabled);
-
        if (*pos != iter->pos) {
                iter->ent = NULL;
                iter->cpu = 0;
@@ -4043,9 +4035,6 @@ static void s_stop(struct seq_file *m, void *p)
                return;
 #endif
 
-       if (!iter->snapshot)
-               atomic_dec(&trace_record_taskinfo_disabled);
-
        trace_access_unlock(iter->cpu_file);
        trace_event_read_unlock();
 }
index c1637f9..4702efb 100644 (file)
@@ -115,9 +115,9 @@ u64 notrace trace_clock_global(void)
        prev_time = READ_ONCE(trace_clock_struct.prev_time);
        now = sched_clock_cpu(this_cpu);
 
-       /* Make sure that now is always greater than prev_time */
+       /* Make sure that now is always greater than or equal to prev_time */
        if ((s64)(now - prev_time) < 0)
-               now = prev_time + 1;
+               now = prev_time;
 
        /*
         * If in an NMI context then dont risk lockups and simply return
@@ -131,7 +131,7 @@ u64 notrace trace_clock_global(void)
                /* Reread prev_time in case it was already updated */
                prev_time = READ_ONCE(trace_clock_struct.prev_time);
                if ((s64)(now - prev_time) < 0)
-                       now = prev_time + 1;
+                       now = prev_time;
 
                trace_clock_struct.prev_time = now;
 
index 06d3135..a75ee30 100644 (file)
@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
 /*
  * Generic 'turn off all lock debugging' function:
  */
-noinstr int debug_locks_off(void)
+int debug_locks_off(void)
 {
        if (debug_locks && __debug_locks_off()) {
                if (!debug_locks_silent) {
index 63ed6b2..6d2a011 100644 (file)
@@ -62,6 +62,7 @@ static struct shrinker deferred_split_shrinker;
 
 static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
+unsigned long huge_zero_pfn __read_mostly = ~0UL;
 
 bool transparent_hugepage_enabled(struct vm_area_struct *vma)
 {
@@ -98,6 +99,7 @@ retry:
                __free_pages(zero_page, compound_order(zero_page));
                goto retry;
        }
+       WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
 
        /* We take additional reference here. It will be put back by shrinker */
        atomic_set(&huge_zero_refcount, 2);
@@ -147,6 +149,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
        if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
                struct page *zero_page = xchg(&huge_zero_page, NULL);
                BUG_ON(zero_page == NULL);
+               WRITE_ONCE(huge_zero_pfn, ~0UL);
                __free_pages(zero_page, compound_order(zero_page));
                return HPAGE_PMD_NR;
        }
@@ -2044,7 +2047,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
        count_vm_event(THP_SPLIT_PMD);
 
        if (!vma_is_anonymous(vma)) {
-               _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+               old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
                /*
                 * We are going to unmap this huge page. So
                 * just go ahead and zap it
@@ -2053,16 +2056,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                        zap_deposited_table(mm, pmd);
                if (vma_is_special_huge(vma))
                        return;
-               page = pmd_page(_pmd);
-               if (!PageDirty(page) && pmd_dirty(_pmd))
-                       set_page_dirty(page);
-               if (!PageReferenced(page) && pmd_young(_pmd))
-                       SetPageReferenced(page);
-               page_remove_rmap(page, true);
-               put_page(page);
+               if (unlikely(is_pmd_migration_entry(old_pmd))) {
+                       swp_entry_t entry;
+
+                       entry = pmd_to_swp_entry(old_pmd);
+                       page = migration_entry_to_page(entry);
+               } else {
+                       page = pmd_page(old_pmd);
+                       if (!PageDirty(page) && pmd_dirty(old_pmd))
+                               set_page_dirty(page);
+                       if (!PageReferenced(page) && pmd_young(old_pmd))
+                               SetPageReferenced(page);
+                       page_remove_rmap(page, true);
+                       put_page(page);
+               }
                add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
                return;
-       } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+       }
+
+       if (is_huge_zero_pmd(*pmd)) {
                /*
                 * FIXME: Do we want to invalidate secondary mmu by calling
                 * mmu_notifier_invalidate_range() see comments below inside
@@ -2338,17 +2350,17 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
 static void unmap_page(struct page *page)
 {
-       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
+       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC |
                TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
-       bool unmap_success;
 
        VM_BUG_ON_PAGE(!PageHead(page), page);
 
        if (PageAnon(page))
                ttu_flags |= TTU_SPLIT_FREEZE;
 
-       unmap_success = try_to_unmap(page, ttu_flags);
-       VM_BUG_ON_PAGE(!unmap_success, page);
+       try_to_unmap(page, ttu_flags);
+
+       VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
 }
 
 static void remap_page(struct page *page, unsigned int nr)
@@ -2659,7 +2671,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        struct deferred_split *ds_queue = get_deferred_split_queue(head);
        struct anon_vma *anon_vma = NULL;
        struct address_space *mapping = NULL;
-       int count, mapcount, extra_pins, ret;
+       int extra_pins, ret;
        pgoff_t end;
 
        VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
@@ -2718,7 +2730,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        }
 
        unmap_page(head);
-       VM_BUG_ON_PAGE(compound_mapcount(head), head);
 
        /* block interrupt reentry in xa_lock and spinlock */
        local_irq_disable();
@@ -2736,9 +2747,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 
        /* Prevent deferred_split_scan() touching ->_refcount */
        spin_lock(&ds_queue->split_queue_lock);
-       count = page_count(head);
-       mapcount = total_mapcount(head);
-       if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
+       if (page_ref_freeze(head, 1 + extra_pins)) {
                if (!list_empty(page_deferred_list(head))) {
                        ds_queue->split_queue_len--;
                        list_del(page_deferred_list(head));
@@ -2758,16 +2767,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                __split_huge_page(page, list, end);
                ret = 0;
        } else {
-               if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
-                       pr_alert("total_mapcount: %u, page_count(): %u\n",
-                                       mapcount, count);
-                       if (PageTail(page))
-                               dump_page(head, NULL);
-                       dump_page(page, "total_mapcount(head) > 0");
-                       BUG();
-               }
                spin_unlock(&ds_queue->split_queue_lock);
-fail:          if (mapping)
+fail:
+               if (mapping)
                        xa_unlock(&mapping->i_pages);
                local_irq_enable();
                remap_page(head, thp_nr_pages(head));
index 5560b50..e0a5f9c 100644 (file)
@@ -2121,12 +2121,18 @@ out:
  * be restored when a newly allocated huge page must be freed.  It is
  * to be called after calling vma_needs_reservation to determine if a
  * reservation exists.
+ *
+ * vma_del_reservation is used in error paths where an entry in the reserve
+ * map was created during huge page allocation and must be removed.  It is to
+ * be called after calling vma_needs_reservation to determine if a reservation
+ * exists.
  */
 enum vma_resv_mode {
        VMA_NEEDS_RESV,
        VMA_COMMIT_RESV,
        VMA_END_RESV,
        VMA_ADD_RESV,
+       VMA_DEL_RESV,
 };
 static long __vma_reservation_common(struct hstate *h,
                                struct vm_area_struct *vma, unsigned long addr,
@@ -2170,11 +2176,21 @@ static long __vma_reservation_common(struct hstate *h,
                        ret = region_del(resv, idx, idx + 1);
                }
                break;
+       case VMA_DEL_RESV:
+               if (vma->vm_flags & VM_MAYSHARE) {
+                       region_abort(resv, idx, idx + 1, 1);
+                       ret = region_del(resv, idx, idx + 1);
+               } else {
+                       ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
+                       /* region_add calls of range 1 should never fail. */
+                       VM_BUG_ON(ret < 0);
+               }
+               break;
        default:
                BUG();
        }
 
-       if (vma->vm_flags & VM_MAYSHARE)
+       if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV)
                return ret;
        /*
         * We know private mapping must have HPAGE_RESV_OWNER set.
@@ -2222,25 +2238,39 @@ static long vma_add_reservation(struct hstate *h,
        return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
 }
 
+static long vma_del_reservation(struct hstate *h,
+                       struct vm_area_struct *vma, unsigned long addr)
+{
+       return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV);
+}
+
 /*
- * This routine is called to restore a reservation on error paths.  In the
- * specific error paths, a huge page was allocated (via alloc_huge_page)
- * and is about to be freed.  If a reservation for the page existed,
- * alloc_huge_page would have consumed the reservation and set
- * HPageRestoreReserve in the newly allocated page.  When the page is freed
- * via free_huge_page, the global reservation count will be incremented if
- * HPageRestoreReserve is set.  However, free_huge_page can not adjust the
- * reserve map.  Adjust the reserve map here to be consistent with global
- * reserve count adjustments to be made by free_huge_page.
+ * This routine is called to restore reservation information on error paths.
+ * It should ONLY be called for pages allocated via alloc_huge_page(), and
+ * the hugetlb mutex should remain held when calling this routine.
+ *
+ * It handles two specific cases:
+ * 1) A reservation was in place and the page consumed the reservation.
+ *    HPageRestoreReserve is set in the page.
+ * 2) No reservation was in place for the page, so HPageRestoreReserve is
+ *    not set.  However, alloc_huge_page always updates the reserve map.
+ *
+ * In case 1, free_huge_page later in the error path will increment the
+ * global reserve count.  But, free_huge_page does not have enough context
+ * to adjust the reservation map.  This case deals primarily with private
+ * mappings.  Adjust the reserve map here to be consistent with global
+ * reserve count adjustments to be made by free_huge_page.  Make sure the
+ * reserve map indicates there is a reservation present.
+ *
+ * In case 2, simply undo reserve map modifications done by alloc_huge_page.
  */
-static void restore_reserve_on_error(struct hstate *h,
-                       struct vm_area_struct *vma, unsigned long address,
-                       struct page *page)
+void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
+                       unsigned long address, struct page *page)
 {
-       if (unlikely(HPageRestoreReserve(page))) {
-               long rc = vma_needs_reservation(h, vma, address);
+       long rc = vma_needs_reservation(h, vma, address);
 
-               if (unlikely(rc < 0)) {
+       if (HPageRestoreReserve(page)) {
+               if (unlikely(rc < 0))
                        /*
                         * Rare out of memory condition in reserve map
                         * manipulation.  Clear HPageRestoreReserve so that
@@ -2253,16 +2283,57 @@ static void restore_reserve_on_error(struct hstate *h,
                         * accounting of reserve counts.
                         */
                        ClearHPageRestoreReserve(page);
-               } else if (rc) {
-                       rc = vma_add_reservation(h, vma, address);
-                       if (unlikely(rc < 0))
+               else if (rc)
+                       (void)vma_add_reservation(h, vma, address);
+               else
+                       vma_end_reservation(h, vma, address);
+       } else {
+               if (!rc) {
+                       /*
+                        * This indicates there is an entry in the reserve map
+                        * added by alloc_huge_page.  We know it was added
+                        * before the alloc_huge_page call, otherwise
+                        * HPageRestoreReserve would be set on the page.
+                        * Remove the entry so that a subsequent allocation
+                        * does not consume a reservation.
+                        */
+                       rc = vma_del_reservation(h, vma, address);
+                       if (rc < 0)
+                               /*
+                                * VERY rare out of memory condition.  Since
+                                * we can not delete the entry, set
+                                * HPageRestoreReserve so that the reserve
+                                * count will be incremented when the page
+                                * is freed.  This reserve will be consumed
+                                * on a subsequent allocation.
+                                */
+                               SetHPageRestoreReserve(page);
+               } else if (rc < 0) {
+                       /*
+                        * Rare out of memory condition from
+                        * vma_needs_reservation call.  Memory allocation is
+                        * only attempted if a new entry is needed.  Therefore,
+                        * this implies there is not an entry in the
+                        * reserve map.
+                        *
+                        * For shared mappings, no entry in the map indicates
+                        * no reservation.  We are done.
+                        */
+                       if (!(vma->vm_flags & VM_MAYSHARE))
                                /*
-                                * See above comment about rare out of
-                                * memory condition.
+                                * For private mappings, no entry indicates
+                                * a reservation is present.  Since we can
+                                * not add an entry, set SetHPageRestoreReserve
+                                * on the page so reserve count will be
+                                * incremented when freed.  This reserve will
+                                * be consumed on a subsequent allocation.
                                 */
-                               ClearHPageRestoreReserve(page);
+                               SetHPageRestoreReserve(page);
                } else
-                       vma_end_reservation(h, vma, address);
+                       /*
+                        * No reservation present, do nothing
+                        */
+                        vma_end_reservation(h, vma, address);
        }
 }
 
@@ -4037,6 +4108,8 @@ again:
                                spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
                                entry = huge_ptep_get(src_pte);
                                if (!pte_same(src_pte_old, entry)) {
+                                       restore_reserve_on_error(h, vma, addr,
+                                                               new);
                                        put_page(new);
                                        /* dst_entry won't change as in child */
                                        goto again;
@@ -5006,6 +5079,7 @@ out_release_unlock:
        if (vm_shared || is_continue)
                unlock_page(page);
 out_release_nounlock:
+       restore_reserve_on_error(h, dst_vma, dst_addr, page);
        put_page(page);
        goto out;
 }
@@ -5857,6 +5931,21 @@ unlock:
        return ret;
 }
 
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+       int ret = 0;
+
+       *hugetlb = false;
+       spin_lock_irq(&hugetlb_lock);
+       if (PageHeadHuge(page)) {
+               *hugetlb = true;
+               if (HPageFreed(page) || HPageMigratable(page))
+                       ret = get_page_unless_zero(page);
+       }
+       spin_unlock_irq(&hugetlb_lock);
+       return ret;
+}
+
 void putback_active_hugepage(struct page *page)
 {
        spin_lock_irq(&hugetlb_lock);
index 2f11829..e8fdb53 100644 (file)
@@ -384,27 +384,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
 
 /*
- * At what user virtual address is page expected in @vma?
+ * At what user virtual address is page expected in vma?
+ * Returns -EFAULT if all of the page is outside the range of vma.
+ * If page is a compound head, the entire compound page is considered.
  */
 static inline unsigned long
-__vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address(struct page *page, struct vm_area_struct *vma)
 {
-       pgoff_t pgoff = page_to_pgoff(page);
-       return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+       pgoff_t pgoff;
+       unsigned long address;
+
+       VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
+       pgoff = page_to_pgoff(page);
+       if (pgoff >= vma->vm_pgoff) {
+               address = vma->vm_start +
+                       ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+               /* Check for address beyond vma (or wrapped through 0?) */
+               if (address < vma->vm_start || address >= vma->vm_end)
+                       address = -EFAULT;
+       } else if (PageHead(page) &&
+                  pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) {
+               /* Test above avoids possibility of wrap to 0 on 32-bit */
+               address = vma->vm_start;
+       } else {
+               address = -EFAULT;
+       }
+       return address;
 }
 
+/*
+ * Then at what user virtual address will none of the page be found in vma?
+ * Assumes that vma_address() already returned a good starting address.
+ * If page is a compound head, the entire compound page is considered.
+ */
 static inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address_end(struct page *page, struct vm_area_struct *vma)
 {
-       unsigned long start, end;
-
-       start = __vma_address(page, vma);
-       end = start + thp_size(page) - PAGE_SIZE;
-
-       /* page should be within @vma mapping range */
-       VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
-
-       return max(start, vma->vm_start);
+       pgoff_t pgoff;
+       unsigned long address;
+
+       VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
+       pgoff = page_to_pgoff(page) + compound_nr(page);
+       address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+       /* Check for address beyond vma (or wrapped through 0?) */
+       if (address < vma->vm_start || address > vma->vm_end)
+               address = vma->vm_end;
+       return address;
 }
 
 static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
index 85ad98c..0143d32 100644 (file)
@@ -949,6 +949,17 @@ static int page_action(struct page_state *ps, struct page *p,
        return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
 }
 
+/*
+ * Return true if a page type of a given page is supported by hwpoison
+ * mechanism (while handling could fail), otherwise false.  This function
+ * does not return true for hugetlb or device memory pages, so it's assumed
+ * to be called only in the context where we never have such pages.
+ */
+static inline bool HWPoisonHandlable(struct page *page)
+{
+       return PageLRU(page) || __PageMovable(page);
+}
+
 /**
  * __get_hwpoison_page() - Get refcount for memory error handling:
  * @page:      raw error page (hit by memory error)
@@ -959,8 +970,22 @@ static int page_action(struct page_state *ps, struct page *p,
 static int __get_hwpoison_page(struct page *page)
 {
        struct page *head = compound_head(page);
+       int ret = 0;
+       bool hugetlb = false;
+
+       ret = get_hwpoison_huge_page(head, &hugetlb);
+       if (hugetlb)
+               return ret;
 
-       if (!PageHuge(head) && PageTransHuge(head)) {
+       /*
+        * This check prevents from calling get_hwpoison_unless_zero()
+        * for any unsupported type of page in order to reduce the risk of
+        * unexpected races caused by taking a page refcount.
+        */
+       if (!HWPoisonHandlable(head))
+               return 0;
+
+       if (PageTransHuge(head)) {
                /*
                 * Non anonymous thp exists only in allocation/free time. We
                 * can't handle such a case correctly, so let's give it up.
@@ -1017,7 +1042,7 @@ try_again:
                        ret = -EIO;
                }
        } else {
-               if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+               if (PageHuge(p) || HWPoisonHandlable(p)) {
                        ret = 1;
                } else {
                        /*
@@ -1527,7 +1552,12 @@ try_again:
                return 0;
        }
 
-       if (!PageTransTail(p) && !PageLRU(p))
+       /*
+        * __munlock_pagevec may clear a writeback page's LRU flag without
+        * page_lock. We need wait writeback completion for this page or it
+        * may trigger vfs BUG while evict inode.
+        */
+       if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
                goto identify_page_state;
 
        /*
index f3ffab9..486f4a2 100644 (file)
@@ -1361,7 +1361,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
                        else if (zap_huge_pmd(tlb, vma, pmd, addr))
                                goto next;
                        /* fall through */
+               } else if (details && details->single_page &&
+                          PageTransCompound(details->single_page) &&
+                          next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
+                       spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
+                       /*
+                        * Take and drop THP pmd lock so that we cannot return
+                        * prematurely, while zap_huge_pmd() has cleared *pmd,
+                        * but not yet decremented compound_mapcount().
+                        */
+                       spin_unlock(ptl);
                }
+
                /*
                 * Here there can be other concurrent MADV_DONTNEED or
                 * trans huge page faults running, and if the pmd is
@@ -3236,6 +3247,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
        }
 }
 
+/**
+ * unmap_mapping_page() - Unmap single page from processes.
+ * @page: The locked page to be unmapped.
+ *
+ * Unmap this page from any userspace process which still has it mmaped.
+ * Typically, for efficiency, the range of nearby pages has already been
+ * unmapped by unmap_mapping_pages() or unmap_mapping_range().  But once
+ * truncation or invalidation holds the lock on a page, it may find that
+ * the page has been remapped again: and then uses unmap_mapping_page()
+ * to unmap it finally.
+ */
+void unmap_mapping_page(struct page *page)
+{
+       struct address_space *mapping = page->mapping;
+       struct zap_details details = { };
+
+       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON(PageTail(page));
+
+       details.check_mapping = mapping;
+       details.first_index = page->index;
+       details.last_index = page->index + thp_nr_pages(page) - 1;
+       details.single_page = page;
+
+       i_mmap_lock_write(mapping);
+       if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+               unmap_mapping_range_tree(&mapping->i_mmap, &details);
+       i_mmap_unlock_write(mapping);
+}
+
 /**
  * unmap_mapping_pages() - Unmap pages from processes.
  * @mapping: The address space containing pages to be unmapped.
index b234c3f..41ff2c9 100644 (file)
@@ -295,6 +295,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
                goto out;
 
        page = migration_entry_to_page(entry);
+       page = compound_head(page);
 
        /*
         * Once page cache replacement of page migration started, page_count
index 2cf01d9..e37bd43 100644 (file)
@@ -212,23 +212,34 @@ restart:
                        pvmw->ptl = NULL;
                }
        } else if (!pmd_present(pmde)) {
+               /*
+                * If PVMW_SYNC, take and drop THP pmd lock so that we
+                * cannot return prematurely, while zap_huge_pmd() has
+                * cleared *pmd but not decremented compound_mapcount().
+                */
+               if ((pvmw->flags & PVMW_SYNC) &&
+                   PageTransCompound(pvmw->page)) {
+                       spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+
+                       spin_unlock(ptl);
+               }
                return false;
        }
        if (!map_pte(pvmw))
                goto next_pte;
        while (1) {
+               unsigned long end;
+
                if (check_pte(pvmw))
                        return true;
 next_pte:
                /* Seek to next pte only makes sense for THP */
                if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
                        return not_found(pvmw);
+               end = vma_address_end(pvmw->page, pvmw->vma);
                do {
                        pvmw->address += PAGE_SIZE;
-                       if (pvmw->address >= pvmw->vma->vm_end ||
-                           pvmw->address >=
-                                       __vma_address(pvmw->page, pvmw->vma) +
-                                       thp_size(pvmw->page))
+                       if (pvmw->address >= end)
                                return not_found(pvmw);
                        /* Did we cross page table boundary? */
                        if (pvmw->address % PMD_SIZE == 0) {
@@ -266,14 +277,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
                .vma = vma,
                .flags = PVMW_SYNC,
        };
-       unsigned long start, end;
-
-       start = __vma_address(page, vma);
-       end = start + thp_size(page) - PAGE_SIZE;
 
-       if (unlikely(end < vma->vm_start || start >= vma->vm_end))
+       pvmw.address = vma_address(page, vma);
+       if (pvmw.address == -EFAULT)
                return 0;
-       pvmw.address = max(start, vma->vm_start);
        if (!page_vma_mapped_walk(&pvmw))
                return 0;
        page_vma_mapped_walk_done(&pvmw);
index c2210e1..4e640ba 100644 (file)
@@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
 {
        pmd_t pmd;
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-       VM_BUG_ON(!pmd_present(*pmdp));
-       /* Below assumes pmd_present() is true */
-       VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
+       VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+                          !pmd_devmap(*pmdp));
        pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
        flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return pmd;
index 693a610..e05c300 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -707,7 +707,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
  */
 unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 {
-       unsigned long address;
        if (PageAnon(page)) {
                struct anon_vma *page__anon_vma = page_anon_vma(page);
                /*
@@ -717,15 +716,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
                if (!vma->anon_vma || !page__anon_vma ||
                    vma->anon_vma->root != page__anon_vma->root)
                        return -EFAULT;
-       } else if (page->mapping) {
-               if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
-                       return -EFAULT;
-       } else
+       } else if (!vma->vm_file) {
                return -EFAULT;
-       address = __vma_address(page, vma);
-       if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+       } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
                return -EFAULT;
-       return address;
+       }
+
+       return vma_address(page, vma);
 }
 
 pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
@@ -919,7 +916,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
         */
        mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
                                0, vma, vma->vm_mm, address,
-                               min(vma->vm_end, address + page_size(page)));
+                               vma_address_end(page, vma));
        mmu_notifier_invalidate_range_start(&range);
 
        while (page_vma_mapped_walk(&pvmw)) {
@@ -1405,6 +1402,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
        struct mmu_notifier_range range;
        enum ttu_flags flags = (enum ttu_flags)(long)arg;
 
+       /*
+        * When racing against e.g. zap_pte_range() on another cpu,
+        * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+        * try_to_unmap() may return false when it is about to become true,
+        * if page table locking is skipped: use TTU_SYNC to wait for that.
+        */
+       if (flags & TTU_SYNC)
+               pvmw.flags = PVMW_SYNC;
+
        /* munlock has nothing to gain from examining un-locked vmas */
        if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
                return true;
@@ -1426,9 +1432,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         * Note that the page can not be free in this function as call of
         * try_to_unmap() must hold a reference on the page.
         */
+       range.end = PageKsm(page) ?
+                       address + PAGE_SIZE : vma_address_end(page, vma);
        mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
-                               address,
-                               min(vma->vm_end, address + page_size(page)));
+                               address, range.end);
        if (PageHuge(page)) {
                /*
                 * If sharing is possible, start and end will be adjusted
@@ -1777,7 +1784,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
        else
                rmap_walk(page, &rwc);
 
-       return !page_mapcount(page) ? true : false;
+       /*
+        * When racing against e.g. zap_pte_range() on another cpu,
+        * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+        * try_to_unmap() may return false when it is about to become true,
+        * if page table locking is skipped: use TTU_SYNC to wait for that.
+        */
+       return !page_mapcount(page);
 }
 
 /**
@@ -1874,6 +1887,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
                struct vm_area_struct *vma = avc->vma;
                unsigned long address = vma_address(page, vma);
 
+               VM_BUG_ON_VMA(address == -EFAULT, vma);
                cond_resched();
 
                if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
@@ -1928,6 +1942,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
                        pgoff_start, pgoff_end) {
                unsigned long address = vma_address(page, vma);
 
+               VM_BUG_ON_VMA(address == -EFAULT, vma);
                cond_resched();
 
                if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
index a4a5714..7cab776 100644 (file)
@@ -97,8 +97,7 @@ EXPORT_SYMBOL(kmem_cache_size);
 #ifdef CONFIG_DEBUG_VM
 static int kmem_cache_sanity_check(const char *name, unsigned int size)
 {
-       if (!name || in_interrupt() || size < sizeof(void *) ||
-               size > KMALLOC_MAX_SIZE) {
+       if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
                pr_err("kmem_cache_create(%s) integrity check failed\n", name);
                return -EINVAL;
        }
index 3f96e09..61bd40e 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/bit_spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/swab.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include "slab.h"
@@ -712,15 +713,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
               p, p - addr, get_freepointer(s, p));
 
        if (s->flags & SLAB_RED_ZONE)
-               print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
+               print_section(KERN_ERR, "Redzone  ", p - s->red_left_pad,
                              s->red_left_pad);
        else if (p > addr + 16)
                print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
 
-       print_section(KERN_ERR, "Object ", p,
+       print_section(KERN_ERR,         "Object   ", p,
                      min_t(unsigned int, s->object_size, PAGE_SIZE));
        if (s->flags & SLAB_RED_ZONE)
-               print_section(KERN_ERR, "Redzone ", p + s->object_size,
+               print_section(KERN_ERR, "Redzone  ", p + s->object_size,
                        s->inuse - s->object_size);
 
        off = get_info_end(s);
@@ -732,7 +733,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 
        if (off != size_from_object(s))
                /* Beginning of the filler is the free pointer */
-               print_section(KERN_ERR, "Padding ", p + off,
+               print_section(KERN_ERR, "Padding  ", p + off,
                              size_from_object(s) - off);
 
        dump_stack();
@@ -909,11 +910,11 @@ static int check_object(struct kmem_cache *s, struct page *page,
        u8 *endobject = object + s->object_size;
 
        if (s->flags & SLAB_RED_ZONE) {
-               if (!check_bytes_and_report(s, page, object, "Redzone",
+               if (!check_bytes_and_report(s, page, object, "Left Redzone",
                        object - s->red_left_pad, val, s->red_left_pad))
                        return 0;
 
-               if (!check_bytes_and_report(s, page, object, "Redzone",
+               if (!check_bytes_and_report(s, page, object, "Right Redzone",
                        endobject, val, s->inuse - s->object_size))
                        return 0;
        } else {
@@ -928,7 +929,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
                if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
                        (!check_bytes_and_report(s, page, p, "Poison", p,
                                        POISON_FREE, s->object_size - 1) ||
-                        !check_bytes_and_report(s, page, p, "Poison",
+                        !check_bytes_and_report(s, page, p, "End Poison",
                                p + s->object_size - 1, POISON_END, 1)))
                        return 0;
                /*
@@ -3689,7 +3690,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 {
        slab_flags_t flags = s->flags;
        unsigned int size = s->object_size;
-       unsigned int freepointer_area;
        unsigned int order;
 
        /*
@@ -3698,13 +3698,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
         * the possible location of the free pointer.
         */
        size = ALIGN(size, sizeof(void *));
-       /*
-        * This is the area of the object where a freepointer can be
-        * safely written. If redzoning adds more to the inuse size, we
-        * can't use that portion for writing the freepointer, so
-        * s->offset must be limited within this for the general case.
-        */
-       freepointer_area = size;
 
 #ifdef CONFIG_SLUB_DEBUG
        /*
@@ -3730,19 +3723,21 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 
        /*
         * With that we have determined the number of bytes in actual use
-        * by the object. This is the potential offset to the free pointer.
+        * by the object and redzoning.
         */
        s->inuse = size;
 
-       if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
-               s->ctor)) {
+       if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
+           ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
+           s->ctor) {
                /*
                 * Relocate free pointer after the object if it is not
                 * permitted to overwrite the first word of the object on
                 * kmem_cache_free.
                 *
                 * This is the case if we do RCU, have a constructor or
-                * destructor or are poisoning the objects.
+                * destructor, are poisoning the objects, or are
+                * redzoning an object smaller than sizeof(void *).
                 *
                 * The assumption that s->offset >= s->inuse means free
                 * pointer is outside of the object is used in the
@@ -3751,13 +3746,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
                 */
                s->offset = size;
                size += sizeof(void *);
-       } else if (freepointer_area > sizeof(void *)) {
+       } else {
                /*
                 * Store freelist pointer near middle of object to keep
                 * it away from the edges of the object to avoid small
                 * sized over/underflows from neighboring allocations.
                 */
-               s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
+               s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
        }
 
 #ifdef CONFIG_SLUB_DEBUG
index b2ada9d..55c18af 100644 (file)
@@ -344,6 +344,15 @@ size_t mem_section_usage_size(void)
        return sizeof(struct mem_section_usage) + usemap_size();
 }
 
+static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
+{
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+       return __pa_symbol(pgdat);
+#else
+       return __pa(pgdat);
+#endif
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static struct mem_section_usage * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
@@ -362,7 +371,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
         * from the same section as the pgdat where possible to avoid
         * this problem.
         */
-       goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
+       goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
        limit = goal + (1UL << PA_SECTION_SHIFT);
        nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 again:
@@ -390,7 +399,7 @@ static void __init check_usemap_section_nr(int nid,
        }
 
        usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
-       pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+       pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT);
        if (usemap_snr == pgdat_snr)
                return;
 
index 149e774..996afa8 100644 (file)
@@ -1900,7 +1900,7 @@ unsigned int count_swap_pages(int type, int free)
 
 static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte)
 {
-       return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte);
+       return pte_same(pte_swp_clear_flags(pte), swp_pte);
 }
 
 /*
index 95af244..234ddd8 100644 (file)
@@ -167,13 +167,10 @@ void do_invalidatepage(struct page *page, unsigned int offset,
  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
  */
-static void
-truncate_cleanup_page(struct address_space *mapping, struct page *page)
+static void truncate_cleanup_page(struct page *page)
 {
-       if (page_mapped(page)) {
-               unsigned int nr = thp_nr_pages(page);
-               unmap_mapping_pages(mapping, page->index, nr, false);
-       }
+       if (page_mapped(page))
+               unmap_mapping_page(page);
 
        if (page_has_private(page))
                do_invalidatepage(page, 0, thp_size(page));
@@ -218,7 +215,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
        if (page->mapping != mapping)
                return -EIO;
 
-       truncate_cleanup_page(mapping, page);
+       truncate_cleanup_page(page);
        delete_from_page_cache(page);
        return 0;
 }
@@ -325,7 +322,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
                index = indices[pagevec_count(&pvec) - 1] + 1;
                truncate_exceptional_pvec_entries(mapping, &pvec, indices);
                for (i = 0; i < pagevec_count(&pvec); i++)
-                       truncate_cleanup_page(mapping, pvec.pages[i]);
+                       truncate_cleanup_page(pvec.pages[i]);
                delete_from_page_cache_batch(mapping, &pvec);
                for (i = 0; i < pagevec_count(&pvec); i++)
                        unlock_page(pvec.pages[i]);
@@ -639,6 +636,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                continue;
                        }
 
+                       if (!did_range_unmap && page_mapped(page)) {
+                               /*
+                                * If page is mapped, before taking its lock,
+                                * zap the rest of the file in one hit.
+                                */
+                               unmap_mapping_pages(mapping, index,
+                                               (1 + end - index), false);
+                               did_range_unmap = 1;
+                       }
+
                        lock_page(page);
                        WARN_ON(page_to_index(page) != index);
                        if (page->mapping != mapping) {
@@ -646,23 +653,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                continue;
                        }
                        wait_on_page_writeback(page);
-                       if (page_mapped(page)) {
-                               if (!did_range_unmap) {
-                                       /*
-                                        * Zap the rest of the file in one hit.
-                                        */
-                                       unmap_mapping_pages(mapping, index,
-                                               (1 + end - index), false);
-                                       did_range_unmap = 1;
-                               } else {
-                                       /*
-                                        * Just zap this page
-                                        */
-                                       unmap_mapping_pages(mapping, index,
-                                                               1, false);
-                               }
-                       }
+
+                       if (page_mapped(page))
+                               unmap_mapping_page(page);
                        BUG_ON(page_mapped(page));
+
                        ret2 = do_launder_page(mapping, page);
                        if (ret2 == 0) {
                                if (!invalidate_complete_page2(mapping, page))
index be18af4..c7236da 100644 (file)
@@ -768,7 +768,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev,
        if (a && a->status & ATIF_PROBE) {
                a->status |= ATIF_PROBE_FAIL;
                /*
-                * we do not respond to probe or request packets for
+                * we do not respond to probe or request packets of
                 * this address while we are probing this address
                 */
                goto unlock;
index 789f257..fc8be49 100644 (file)
@@ -409,8 +409,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
        if (WARN_ON(!forw_packet->if_outgoing))
                return;
 
-       if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
+       if (forw_packet->if_outgoing->soft_iface != soft_iface) {
+               pr_warn("%s: soft interface switch for queued OGM\n", __func__);
                return;
+       }
 
        if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
                return;
index 372e3b2..7dd51da 100644 (file)
@@ -3229,7 +3229,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
 {
        struct l2cap_chan *chan;
 
-       bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan);
+       BT_DBG("pchan %p", pchan);
 
        chan = l2cap_chan_create();
        if (!chan)
@@ -3250,7 +3250,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
         */
        atomic_set(&chan->nesting, L2CAP_NESTING_SMP);
 
-       bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan);
+       BT_DBG("created chan %p", chan);
 
        return chan;
 }
@@ -3354,7 +3354,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
 {
        struct smp_dev *smp;
 
-       bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan);
+       BT_DBG("chan %p", chan);
 
        smp = chan->data;
        if (smp) {
index 7ce8a77..e013d33 100644 (file)
@@ -90,8 +90,8 @@ struct bridge_mcast_stats {
 #endif
 
 struct br_tunnel_info {
-       __be64                  tunnel_id;
-       struct metadata_dst     *tunnel_dst;
+       __be64                          tunnel_id;
+       struct metadata_dst __rcu       *tunnel_dst;
 };
 
 /* private vlan flags */
index 0d3a8c0..0101744 100644 (file)
@@ -41,26 +41,33 @@ static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl,
                                      br_vlan_tunnel_rht_params);
 }
 
+static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan)
+{
+       struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst);
+
+       WRITE_ONCE(vlan->tinfo.tunnel_id, 0);
+       RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL);
+       dst_release(&tdst->dst);
+}
+
 void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
                          struct net_bridge_vlan *vlan)
 {
-       if (!vlan->tinfo.tunnel_dst)
+       if (!rcu_access_pointer(vlan->tinfo.tunnel_dst))
                return;
        rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode,
                               br_vlan_tunnel_rht_params);
-       vlan->tinfo.tunnel_id = 0;
-       dst_release(&vlan->tinfo.tunnel_dst->dst);
-       vlan->tinfo.tunnel_dst = NULL;
+       vlan_tunnel_info_release(vlan);
 }
 
 static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
                                  struct net_bridge_vlan *vlan, u32 tun_id)
 {
-       struct metadata_dst *metadata = NULL;
+       struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
        __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
        int err;
 
-       if (vlan->tinfo.tunnel_dst)
+       if (metadata)
                return -EEXIST;
 
        metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
@@ -69,8 +76,8 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
                return -EINVAL;
 
        metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE;
-       vlan->tinfo.tunnel_dst = metadata;
-       vlan->tinfo.tunnel_id = key;
+       rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata);
+       WRITE_ONCE(vlan->tinfo.tunnel_id, key);
 
        err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode,
                                            br_vlan_tunnel_rht_params);
@@ -79,9 +86,7 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
 
        return 0;
 out:
-       dst_release(&vlan->tinfo.tunnel_dst->dst);
-       vlan->tinfo.tunnel_dst = NULL;
-       vlan->tinfo.tunnel_id = 0;
+       vlan_tunnel_info_release(vlan);
 
        return err;
 }
@@ -182,12 +187,15 @@ int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
 int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
                                 struct net_bridge_vlan *vlan)
 {
+       struct metadata_dst *tunnel_dst;
+       __be64 tunnel_id;
        int err;
 
-       if (!vlan || !vlan->tinfo.tunnel_id)
+       if (!vlan)
                return 0;
 
-       if (unlikely(!skb_vlan_tag_present(skb)))
+       tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id);
+       if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb)))
                return 0;
 
        skb_dst_drop(skb);
@@ -195,7 +203,9 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
        if (err)
                return err;
 
-       skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst));
+       tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
+       if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
+               skb_dst_set(skb, &tunnel_dst->dst);
 
        return 0;
 }
index 909b9e6..f3e4d95 100644 (file)
@@ -125,7 +125,7 @@ struct bcm_sock {
        struct sock sk;
        int bound;
        int ifindex;
-       struct notifier_block notifier;
+       struct list_head notifier;
        struct list_head rx_ops;
        struct list_head tx_ops;
        unsigned long dropped_usr_msgs;
@@ -133,6 +133,10 @@ struct bcm_sock {
        char procname [32]; /* inode number in decimal with \0 */
 };
 
+static LIST_HEAD(bcm_notifier_list);
+static DEFINE_SPINLOCK(bcm_notifier_lock);
+static struct bcm_sock *bcm_busy_notifier;
+
 static inline struct bcm_sock *bcm_sk(const struct sock *sk)
 {
        return (struct bcm_sock *)sk;
@@ -402,6 +406,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
                if (!op->count && (op->flags & TX_COUNTEVT)) {
 
                        /* create notification to user */
+                       memset(&msg_head, 0, sizeof(msg_head));
                        msg_head.opcode  = TX_EXPIRED;
                        msg_head.flags   = op->flags;
                        msg_head.count   = op->count;
@@ -439,6 +444,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
        /* this element is not throttled anymore */
        data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
 
+       memset(&head, 0, sizeof(head));
        head.opcode  = RX_CHANGED;
        head.flags   = op->flags;
        head.count   = op->count;
@@ -560,6 +566,7 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
        }
 
        /* create notification to user */
+       memset(&msg_head, 0, sizeof(msg_head));
        msg_head.opcode  = RX_TIMEOUT;
        msg_head.flags   = op->flags;
        msg_head.count   = op->count;
@@ -1378,20 +1385,15 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 /*
  * notification handler for netdevice status changes
  */
-static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
-                       void *ptr)
+static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
+                      struct net_device *dev)
 {
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-       struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
        struct sock *sk = &bo->sk;
        struct bcm_op *op;
        int notify_enodev = 0;
 
        if (!net_eq(dev_net(dev), sock_net(sk)))
-               return NOTIFY_DONE;
-
-       if (dev->type != ARPHRD_CAN)
-               return NOTIFY_DONE;
+               return;
 
        switch (msg) {
 
@@ -1426,7 +1428,28 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
                                sk->sk_error_report(sk);
                }
        }
+}
 
+static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
+                       void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+       if (dev->type != ARPHRD_CAN)
+               return NOTIFY_DONE;
+       if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+               return NOTIFY_DONE;
+       if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */
+               return NOTIFY_DONE;
+
+       spin_lock(&bcm_notifier_lock);
+       list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) {
+               spin_unlock(&bcm_notifier_lock);
+               bcm_notify(bcm_busy_notifier, msg, dev);
+               spin_lock(&bcm_notifier_lock);
+       }
+       bcm_busy_notifier = NULL;
+       spin_unlock(&bcm_notifier_lock);
        return NOTIFY_DONE;
 }
 
@@ -1446,9 +1469,9 @@ static int bcm_init(struct sock *sk)
        INIT_LIST_HEAD(&bo->rx_ops);
 
        /* set notifier */
-       bo->notifier.notifier_call = bcm_notifier;
-
-       register_netdevice_notifier(&bo->notifier);
+       spin_lock(&bcm_notifier_lock);
+       list_add_tail(&bo->notifier, &bcm_notifier_list);
+       spin_unlock(&bcm_notifier_lock);
 
        return 0;
 }
@@ -1471,7 +1494,14 @@ static int bcm_release(struct socket *sock)
 
        /* remove bcm_ops, timer, rx_unregister(), etc. */
 
-       unregister_netdevice_notifier(&bo->notifier);
+       spin_lock(&bcm_notifier_lock);
+       while (bcm_busy_notifier == bo) {
+               spin_unlock(&bcm_notifier_lock);
+               schedule_timeout_uninterruptible(1);
+               spin_lock(&bcm_notifier_lock);
+       }
+       list_del(&bo->notifier);
+       spin_unlock(&bcm_notifier_lock);
 
        lock_sock(sk);
 
@@ -1692,6 +1722,10 @@ static struct pernet_operations canbcm_pernet_ops __read_mostly = {
        .exit = canbcm_pernet_exit,
 };
 
+static struct notifier_block canbcm_notifier = {
+       .notifier_call = bcm_notifier
+};
+
 static int __init bcm_module_init(void)
 {
        int err;
@@ -1705,12 +1739,14 @@ static int __init bcm_module_init(void)
        }
 
        register_pernet_subsys(&canbcm_pernet_ops);
+       register_netdevice_notifier(&canbcm_notifier);
        return 0;
 }
 
 static void __exit bcm_module_exit(void)
 {
        can_proto_unregister(&bcm_can_proto);
+       unregister_netdevice_notifier(&canbcm_notifier);
        unregister_pernet_subsys(&canbcm_pernet_ops);
 }
 
index 253b244..be6183f 100644 (file)
@@ -143,10 +143,14 @@ struct isotp_sock {
        u32 force_tx_stmin;
        u32 force_rx_stmin;
        struct tpcon rx, tx;
-       struct notifier_block notifier;
+       struct list_head notifier;
        wait_queue_head_t wait;
 };
 
+static LIST_HEAD(isotp_notifier_list);
+static DEFINE_SPINLOCK(isotp_notifier_lock);
+static struct isotp_sock *isotp_busy_notifier;
+
 static inline struct isotp_sock *isotp_sk(const struct sock *sk)
 {
        return (struct isotp_sock *)sk;
@@ -1013,7 +1017,14 @@ static int isotp_release(struct socket *sock)
        /* wait for complete transmission of current pdu */
        wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
 
-       unregister_netdevice_notifier(&so->notifier);
+       spin_lock(&isotp_notifier_lock);
+       while (isotp_busy_notifier == so) {
+               spin_unlock(&isotp_notifier_lock);
+               schedule_timeout_uninterruptible(1);
+               spin_lock(&isotp_notifier_lock);
+       }
+       list_del(&so->notifier);
+       spin_unlock(&isotp_notifier_lock);
 
        lock_sock(sk);
 
@@ -1317,21 +1328,16 @@ static int isotp_getsockopt(struct socket *sock, int level, int optname,
        return 0;
 }
 
-static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
-                         void *ptr)
+static void isotp_notify(struct isotp_sock *so, unsigned long msg,
+                        struct net_device *dev)
 {
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-       struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier);
        struct sock *sk = &so->sk;
 
        if (!net_eq(dev_net(dev), sock_net(sk)))
-               return NOTIFY_DONE;
-
-       if (dev->type != ARPHRD_CAN)
-               return NOTIFY_DONE;
+               return;
 
        if (so->ifindex != dev->ifindex)
-               return NOTIFY_DONE;
+               return;
 
        switch (msg) {
        case NETDEV_UNREGISTER:
@@ -1357,7 +1363,28 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
                        sk->sk_error_report(sk);
                break;
        }
+}
 
+static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
+                         void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+       if (dev->type != ARPHRD_CAN)
+               return NOTIFY_DONE;
+       if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+               return NOTIFY_DONE;
+       if (unlikely(isotp_busy_notifier)) /* Check for reentrant bug. */
+               return NOTIFY_DONE;
+
+       spin_lock(&isotp_notifier_lock);
+       list_for_each_entry(isotp_busy_notifier, &isotp_notifier_list, notifier) {
+               spin_unlock(&isotp_notifier_lock);
+               isotp_notify(isotp_busy_notifier, msg, dev);
+               spin_lock(&isotp_notifier_lock);
+       }
+       isotp_busy_notifier = NULL;
+       spin_unlock(&isotp_notifier_lock);
        return NOTIFY_DONE;
 }
 
@@ -1394,8 +1421,9 @@ static int isotp_init(struct sock *sk)
 
        init_waitqueue_head(&so->wait);
 
-       so->notifier.notifier_call = isotp_notifier;
-       register_netdevice_notifier(&so->notifier);
+       spin_lock(&isotp_notifier_lock);
+       list_add_tail(&so->notifier, &isotp_notifier_list);
+       spin_unlock(&isotp_notifier_lock);
 
        return 0;
 }
@@ -1442,6 +1470,10 @@ static const struct can_proto isotp_can_proto = {
        .prot = &isotp_proto,
 };
 
+static struct notifier_block canisotp_notifier = {
+       .notifier_call = isotp_notifier
+};
+
 static __init int isotp_module_init(void)
 {
        int err;
@@ -1451,6 +1483,8 @@ static __init int isotp_module_init(void)
        err = can_proto_register(&isotp_can_proto);
        if (err < 0)
                pr_err("can: registration of isotp protocol failed\n");
+       else
+               register_netdevice_notifier(&canisotp_notifier);
 
        return err;
 }
@@ -1458,6 +1492,7 @@ static __init int isotp_module_init(void)
 static __exit void isotp_module_exit(void)
 {
        can_proto_unregister(&isotp_can_proto);
+       unregister_netdevice_notifier(&canisotp_notifier);
 }
 
 module_init(isotp_module_init);
index e09d087..c3946c3 100644 (file)
@@ -330,6 +330,9 @@ static void j1939_session_skb_drop_old(struct j1939_session *session)
 
        if ((do_skcb->offset + do_skb->len) < offset_start) {
                __skb_unlink(do_skb, &session->skb_queue);
+               /* drop ref taken in j1939_session_skb_queue() */
+               skb_unref(do_skb);
+
                kfree_skb(do_skb);
        }
        spin_unlock_irqrestore(&session->skb_queue.lock, flags);
@@ -349,12 +352,13 @@ void j1939_session_skb_queue(struct j1939_session *session,
 
        skcb->flags |= J1939_ECU_LOCAL_SRC;
 
+       skb_get(skb);
        skb_queue_tail(&session->skb_queue, skb);
 }
 
 static struct
-sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
-                                         unsigned int offset_start)
+sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session,
+                                        unsigned int offset_start)
 {
        struct j1939_priv *priv = session->priv;
        struct j1939_sk_buff_cb *do_skcb;
@@ -371,6 +375,10 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
                        skb = do_skb;
                }
        }
+
+       if (skb)
+               skb_get(skb);
+
        spin_unlock_irqrestore(&session->skb_queue.lock, flags);
 
        if (!skb)
@@ -381,12 +389,12 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
        return skb;
 }
 
-static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+static struct sk_buff *j1939_session_skb_get(struct j1939_session *session)
 {
        unsigned int offset_start;
 
        offset_start = session->pkt.dpo * 7;
-       return j1939_session_skb_find_by_offset(session, offset_start);
+       return j1939_session_skb_get_by_offset(session, offset_start);
 }
 
 /* see if we are receiver
@@ -776,7 +784,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
        int ret = 0;
        u8 dat[8];
 
-       se_skb = j1939_session_skb_find_by_offset(session, session->pkt.tx * 7);
+       se_skb = j1939_session_skb_get_by_offset(session, session->pkt.tx * 7);
        if (!se_skb)
                return -ENOBUFS;
 
@@ -801,7 +809,8 @@ static int j1939_session_tx_dat(struct j1939_session *session)
                        netdev_err_once(priv->ndev,
                                        "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
                                        __func__, session, skcb->offset, se_skb->len , session->pkt.tx);
-                       return -EOVERFLOW;
+                       ret = -EOVERFLOW;
+                       goto out_free;
                }
 
                if (!len) {
@@ -835,6 +844,12 @@ static int j1939_session_tx_dat(struct j1939_session *session)
        if (pkt_done)
                j1939_tp_set_rxtimeout(session, 250);
 
+ out_free:
+       if (ret)
+               kfree_skb(se_skb);
+       else
+               consume_skb(se_skb);
+
        return ret;
 }
 
@@ -1007,7 +1022,7 @@ static int j1939_xtp_txnext_receiver(struct j1939_session *session)
 static int j1939_simple_txnext(struct j1939_session *session)
 {
        struct j1939_priv *priv = session->priv;
-       struct sk_buff *se_skb = j1939_session_skb_find(session);
+       struct sk_buff *se_skb = j1939_session_skb_get(session);
        struct sk_buff *skb;
        int ret;
 
@@ -1015,8 +1030,10 @@ static int j1939_simple_txnext(struct j1939_session *session)
                return 0;
 
        skb = skb_clone(se_skb, GFP_ATOMIC);
-       if (!skb)
-               return -ENOMEM;
+       if (!skb) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
 
        can_skb_set_owner(skb, se_skb->sk);
 
@@ -1024,12 +1041,18 @@ static int j1939_simple_txnext(struct j1939_session *session)
 
        ret = j1939_send_one(priv, skb);
        if (ret)
-               return ret;
+               goto out_free;
 
        j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
        j1939_sk_queue_activate_next(session);
 
-       return 0;
+ out_free:
+       if (ret)
+               kfree_skb(se_skb);
+       else
+               consume_skb(se_skb);
+
+       return ret;
 }
 
 static bool j1939_session_deactivate_locked(struct j1939_session *session)
@@ -1170,9 +1193,10 @@ static void j1939_session_completed(struct j1939_session *session)
        struct sk_buff *skb;
 
        if (!session->transmission) {
-               skb = j1939_session_skb_find(session);
+               skb = j1939_session_skb_get(session);
                /* distribute among j1939 receivers */
                j1939_sk_recv(session->priv, skb);
+               consume_skb(skb);
        }
 
        j1939_session_deactivate_activate_next(session);
@@ -1744,7 +1768,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 {
        struct j1939_priv *priv = session->priv;
        struct j1939_sk_buff_cb *skcb;
-       struct sk_buff *se_skb;
+       struct sk_buff *se_skb = NULL;
        const u8 *dat;
        u8 *tpdat;
        int offset;
@@ -1786,7 +1810,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
                goto out_session_cancel;
        }
 
-       se_skb = j1939_session_skb_find_by_offset(session, packet * 7);
+       se_skb = j1939_session_skb_get_by_offset(session, packet * 7);
        if (!se_skb) {
                netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__,
                            session);
@@ -1848,11 +1872,13 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
                j1939_tp_set_rxtimeout(session, 250);
        }
        session->last_cmd = 0xff;
+       consume_skb(se_skb);
        j1939_session_put(session);
 
        return;
 
  out_session_cancel:
+       kfree_skb(se_skb);
        j1939_session_timers_cancel(session);
        j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
        j1939_session_put(session);
index 139d947..ac96fc2 100644 (file)
@@ -83,7 +83,7 @@ struct raw_sock {
        struct sock sk;
        int bound;
        int ifindex;
-       struct notifier_block notifier;
+       struct list_head notifier;
        int loopback;
        int recv_own_msgs;
        int fd_frames;
@@ -95,6 +95,10 @@ struct raw_sock {
        struct uniqframe __percpu *uniq;
 };
 
+static LIST_HEAD(raw_notifier_list);
+static DEFINE_SPINLOCK(raw_notifier_lock);
+static struct raw_sock *raw_busy_notifier;
+
 /* Return pointer to store the extra msg flags for raw_recvmsg().
  * We use the space of one unsigned int beyond the 'struct sockaddr_can'
  * in skb->cb.
@@ -263,21 +267,16 @@ static int raw_enable_allfilters(struct net *net, struct net_device *dev,
        return err;
 }
 
-static int raw_notifier(struct notifier_block *nb,
-                       unsigned long msg, void *ptr)
+static void raw_notify(struct raw_sock *ro, unsigned long msg,
+                      struct net_device *dev)
 {
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-       struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
        struct sock *sk = &ro->sk;
 
        if (!net_eq(dev_net(dev), sock_net(sk)))
-               return NOTIFY_DONE;
-
-       if (dev->type != ARPHRD_CAN)
-               return NOTIFY_DONE;
+               return;
 
        if (ro->ifindex != dev->ifindex)
-               return NOTIFY_DONE;
+               return;
 
        switch (msg) {
        case NETDEV_UNREGISTER:
@@ -305,7 +304,28 @@ static int raw_notifier(struct notifier_block *nb,
                        sk->sk_error_report(sk);
                break;
        }
+}
+
+static int raw_notifier(struct notifier_block *nb, unsigned long msg,
+                       void *ptr)
+{
+       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+       if (dev->type != ARPHRD_CAN)
+               return NOTIFY_DONE;
+       if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+               return NOTIFY_DONE;
+       if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */
+               return NOTIFY_DONE;
 
+       spin_lock(&raw_notifier_lock);
+       list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) {
+               spin_unlock(&raw_notifier_lock);
+               raw_notify(raw_busy_notifier, msg, dev);
+               spin_lock(&raw_notifier_lock);
+       }
+       raw_busy_notifier = NULL;
+       spin_unlock(&raw_notifier_lock);
        return NOTIFY_DONE;
 }
 
@@ -334,9 +354,9 @@ static int raw_init(struct sock *sk)
                return -ENOMEM;
 
        /* set notifier */
-       ro->notifier.notifier_call = raw_notifier;
-
-       register_netdevice_notifier(&ro->notifier);
+       spin_lock(&raw_notifier_lock);
+       list_add_tail(&ro->notifier, &raw_notifier_list);
+       spin_unlock(&raw_notifier_lock);
 
        return 0;
 }
@@ -351,7 +371,14 @@ static int raw_release(struct socket *sock)
 
        ro = raw_sk(sk);
 
-       unregister_netdevice_notifier(&ro->notifier);
+       spin_lock(&raw_notifier_lock);
+       while (raw_busy_notifier == ro) {
+               spin_unlock(&raw_notifier_lock);
+               schedule_timeout_uninterruptible(1);
+               spin_lock(&raw_notifier_lock);
+       }
+       list_del(&ro->notifier);
+       spin_unlock(&raw_notifier_lock);
 
        lock_sock(sk);
 
@@ -889,6 +916,10 @@ static const struct can_proto raw_can_proto = {
        .prot       = &raw_proto,
 };
 
+static struct notifier_block canraw_notifier = {
+       .notifier_call = raw_notifier
+};
+
 static __init int raw_module_init(void)
 {
        int err;
@@ -898,6 +929,8 @@ static __init int raw_module_init(void)
        err = can_proto_register(&raw_can_proto);
        if (err < 0)
                pr_err("can: registration of raw protocol failed\n");
+       else
+               register_netdevice_notifier(&canraw_notifier);
 
        return err;
 }
@@ -905,6 +938,7 @@ static __init int raw_module_init(void)
 static __exit void raw_module_exit(void)
 {
        can_proto_unregister(&raw_can_proto);
+       unregister_netdevice_notifier(&canraw_notifier);
 }
 
 module_init(raw_module_init);
index 98f20ef..bf77457 100644 (file)
@@ -238,6 +238,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 
                        write_lock(&n->lock);
                        if ((n->nud_state == NUD_FAILED) ||
+                           (n->nud_state == NUD_NOARP) ||
                            (tbl->is_multicast &&
                             tbl->is_multicast(n->primary_key)) ||
                            time_after(tref, n->updated))
index 43b6ac4..9b5a767 100644 (file)
@@ -641,6 +641,18 @@ void __put_net(struct net *net)
 }
 EXPORT_SYMBOL_GPL(__put_net);
 
+/**
+ * get_net_ns - increment the refcount of the network namespace
+ * @ns: common namespace (net)
+ *
+ * Returns the net's common namespace.
+ */
+struct ns_common *get_net_ns(struct ns_common *ns)
+{
+       return &get_net(container_of(ns, struct net, ns))->ns;
+}
+EXPORT_SYMBOL_GPL(get_net_ns);
+
 struct net *get_net_ns_by_fd(int fd)
 {
        struct file *file;
@@ -660,14 +672,8 @@ struct net *get_net_ns_by_fd(int fd)
        fput(file);
        return net;
 }
-
-#else
-struct net *get_net_ns_by_fd(int fd)
-{
-       return ERR_PTR(-EINVAL);
-}
-#endif
 EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
+#endif
 
 struct net *get_net_ns_by_pid(pid_t pid)
 {
index 3e84279..ec931b0 100644 (file)
@@ -4842,10 +4842,12 @@ static int rtnl_bridge_notify(struct net_device *dev)
        if (err < 0)
                goto errout;
 
-       if (!skb->len) {
-               err = -EINVAL;
+       /* Notification info is only filled for bridge ports, not the bridge
+        * device itself. Therefore, a zero notification length is valid and
+        * should not result in an error.
+        */
+       if (!skb->len)
                goto errout;
-       }
 
        rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
        return 0;
index 3ad2287..bbc3b4b 100644 (file)
@@ -1253,6 +1253,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
        struct sock *sk = skb->sk;
        struct sk_buff_head *q;
        unsigned long flags;
+       bool is_zerocopy;
        u32 lo, hi;
        u16 len;
 
@@ -1267,6 +1268,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
        len = uarg->len;
        lo = uarg->id;
        hi = uarg->id + len - 1;
+       is_zerocopy = uarg->zerocopy;
 
        serr = SKB_EXT_ERR(skb);
        memset(serr, 0, sizeof(*serr));
@@ -1274,7 +1276,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
        serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
        serr->ee.ee_data = hi;
        serr->ee.ee_info = lo;
-       if (!uarg->zerocopy)
+       if (!is_zerocopy)
                serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
 
        q = &sk->sk_error_queue;
index 2a6733a..5d38e90 100644 (file)
@@ -95,7 +95,7 @@ static int get_module_eeprom_by_page(struct net_device *dev,
        if (dev->sfp_bus)
                return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack);
 
-       if (ops->get_module_info)
+       if (ops->get_module_eeprom_by_page)
                return ops->get_module_eeprom_by_page(dev, page_data, extack);
 
        return -EOPNOTSUPP;
index 3fa7a39..baa5d10 100644 (file)
@@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr,
        if (eeprom.offset + eeprom.len > total_len)
                return -EINVAL;
 
-       data = kmalloc(PAGE_SIZE, GFP_USER);
+       data = kzalloc(PAGE_SIZE, GFP_USER);
        if (!data)
                return -ENOMEM;
 
@@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
        if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
                return -EINVAL;
 
-       data = kmalloc(PAGE_SIZE, GFP_USER);
+       data = kzalloc(PAGE_SIZE, GFP_USER);
        if (!data)
                return -ENOMEM;
 
@@ -1765,7 +1765,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
                return -EFAULT;
 
        test.len = test_len;
-       data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
+       data = kcalloc(test_len, sizeof(u64), GFP_USER);
        if (!data)
                return -ENOMEM;
 
@@ -2293,7 +2293,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
        ret = ethtool_tunable_valid(&tuna);
        if (ret)
                return ret;
-       data = kmalloc(tuna.len, GFP_USER);
+       data = kzalloc(tuna.len, GFP_USER);
        if (!data)
                return -ENOMEM;
        ret = ops->get_tunable(dev, &tuna, data);
@@ -2485,7 +2485,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
        ret = ethtool_phy_tunable_valid(&tuna);
        if (ret)
                return ret;
-       data = kmalloc(tuna.len, GFP_USER);
+       data = kzalloc(tuna.len, GFP_USER);
        if (!data)
                return -ENOMEM;
        if (phy_drv_tunable) {
index b3029ff..2d51b7a 100644 (file)
@@ -353,6 +353,8 @@ static int strset_reply_size(const struct ethnl_req_info *req_base,
        int len = 0;
        int ret;
 
+       len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */
+
        for (i = 0; i < ETH_SS_COUNT; i++) {
                const struct strset_info *set_info = &data->sets[i];
 
index f17870e..2f94d22 100644 (file)
@@ -575,7 +575,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
                        return err;
        }
 
-       if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+       if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
                return -EAGAIN;
        return sk->sk_prot->connect(sk, uaddr, addr_len);
 }
@@ -803,7 +803,7 @@ int inet_send_prepare(struct sock *sk)
        sock_rps_record_flow(sk);
 
        /* We may need to bind the socket. */
-       if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+       if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
            inet_autobind(sk))
                return -EAGAIN;
 
index bfaf327..e0480c6 100644 (file)
@@ -472,6 +472,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
                kfree(doi_def->map.std->lvl.local);
                kfree(doi_def->map.std->cat.cipso);
                kfree(doi_def->map.std->cat.local);
+               kfree(doi_def->map.std);
                break;
        }
        kfree(doi_def);
index 2e35f68..1c6429c 100644 (file)
@@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
                return -EAFNOSUPPORT;
 
        if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
-               BUG();
+               return -EINVAL;
 
        if (tb[IFLA_INET_CONF]) {
                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
index 7b6931a..752e392 100644 (file)
@@ -759,6 +759,13 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
                icmp_param.data_len = room;
        icmp_param.head_len = sizeof(struct icmphdr);
 
+       /* if we don't have a source address at this point, fall back to the
+        * dummy address instead of sending out a packet with a source address
+        * of 0.0.0.0
+        */
+       if (!fl4.saddr)
+               fl4.saddr = htonl(INADDR_DUMMY);
+
        icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
 ende:
        ip_rt_put(rt);
index 7b272bb..6b3c558 100644 (file)
@@ -1801,6 +1801,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
        while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
                in_dev->mc_list = i->next_rcu;
                in_dev->mc_count--;
+               ip_mc_clear_src(i);
                ip_ma_put(i);
        }
 }
index 1c9f71a..95a7183 100644 (file)
@@ -954,6 +954,7 @@ bool ping_rcv(struct sk_buff *skb)
        struct sock *sk;
        struct net *net = dev_net(skb->dev);
        struct icmphdr *icmph = icmp_hdr(skb);
+       bool rc = false;
 
        /* We assume the packet has already been checked by icmp_rcv */
 
@@ -968,14 +969,15 @@ bool ping_rcv(struct sk_buff *skb)
                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
                pr_debug("rcv on socket %p\n", sk);
-               if (skb2)
-                       ping_queue_rcv_skb(sk, skb2);
+               if (skb2 && !ping_queue_rcv_skb(sk, skb2))
+                       rc = true;
                sock_put(sk);
-               return true;
        }
-       pr_debug("no socket, dropping\n");
 
-       return false;
+       if (!rc)
+               pr_debug("no socket, dropping\n");
+
+       return rc;
 }
 EXPORT_SYMBOL_GPL(ping_rcv);
 
index f6787c5..6a36ac9 100644 (file)
@@ -2056,6 +2056,19 @@ martian_source:
        return err;
 }
 
+/* get device for dst_alloc with local routes */
+static struct net_device *ip_rt_get_dev(struct net *net,
+                                       const struct fib_result *res)
+{
+       struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
+       struct net_device *dev = NULL;
+
+       if (nhc)
+               dev = l3mdev_master_dev_rcu(nhc->nhc_dev);
+
+       return dev ? : net->loopback_dev;
+}
+
 /*
  *     NOTE. We drop all the packets that has local source
  *     addresses, because every properly looped back packet
@@ -2212,7 +2225,7 @@ local_input:
                }
        }
 
-       rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
+       rth = rt_dst_alloc(ip_rt_get_dev(net, res),
                           flags | RTCF_LOCAL, res->type,
                           IN_DEV_ORCONF(in_dev, NOPOLICY), false);
        if (!rth)
index 15f5504..1307ad0 100644 (file)
@@ -2607,6 +2607,9 @@ void udp_destroy_sock(struct sock *sk)
 {
        struct udp_sock *up = udp_sk(sk);
        bool slow = lock_sock_fast(sk);
+
+       /* protects from races with udp_abort() */
+       sock_set_flag(sk, SOCK_DEAD);
        udp_flush_pending_frames(sk);
        unlock_sock_fast(sk, slow);
        if (static_branch_unlikely(&udp_encap_needed_key)) {
@@ -2857,10 +2860,17 @@ int udp_abort(struct sock *sk, int err)
 {
        lock_sock(sk);
 
+       /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
+        * with close()
+        */
+       if (sock_flag(sk, SOCK_DEAD))
+               goto out;
+
        sk->sk_err = err;
        sk->sk_error_report(sk);
        __udp_disconnect(sk, 0);
 
+out:
        release_sock(sk);
 
        return 0;
index b0ef65e..701eb82 100644 (file)
@@ -5827,7 +5827,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
                return -EAFNOSUPPORT;
 
        if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
-               BUG();
+               return -EINVAL;
 
        if (tb[IFLA_INET6_TOKEN]) {
                err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),
index e204163..92f3235 100644 (file)
@@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 }
 EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
 
+static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph)
+{
+       if (likely(next != IPPROTO_ICMPV6))
+               return false;
+
+       if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY)
+               return false;
+
+       return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL;
+}
+
 void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
                   const struct nft_pktinfo *pkt)
 {
@@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 
        lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
 
-       if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
-           nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
-               nft_fib_store_result(dest, priv, nft_in(pkt));
-               return;
+       if (nft_hook(pkt) == NF_INET_PRE_ROUTING ||
+           nft_hook(pkt) == NF_INET_INGRESS) {
+               if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) ||
+                   nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
+                       nft_fib_store_result(dest, priv, nft_in(pkt));
+                       return;
+               }
        }
 
        *dest = 0;
index 199b080..3fcd86f 100644 (file)
@@ -1598,6 +1598,9 @@ void udpv6_destroy_sock(struct sock *sk)
 {
        struct udp_sock *up = udp_sk(sk);
        lock_sock(sk);
+
+       /* protects from races with udp_abort() */
+       sock_set_flag(sk, SOCK_DEAD);
        udp_v6_flush_pending_frames(sk);
        release_sock(sk);
 
index 1c572c8..6201965 100644 (file)
@@ -1066,11 +1066,6 @@ out_error:
                goto partial_message;
        }
 
-       if (skb_has_frag_list(head)) {
-               kfree_skb_list(skb_shinfo(head)->frag_list);
-               skb_shinfo(head)->frag_list = NULL;
-       }
-
        if (head != kcm->seq_skb)
                kfree_skb(head);
 
index 9245c04..fc34ae2 100644 (file)
@@ -4,7 +4,7 @@
  *
  * Copyright 2007      Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2019, 2021 Intel Corporation
  */
 
 #include <linux/debugfs.h>
@@ -387,10 +387,17 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf,
                           size_t count, loff_t *ppos)
 {
        struct ieee80211_local *local = file->private_data;
+       int ret;
 
        rtnl_lock();
+       wiphy_lock(local->hw.wiphy);
        __ieee80211_suspend(&local->hw, NULL);
-       __ieee80211_resume(&local->hw);
+       ret = __ieee80211_resume(&local->hw);
+       wiphy_unlock(local->hw.wiphy);
+
+       if (ret)
+               cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
        rtnl_unlock();
 
        return count;
index 214404a..648696b 100644 (file)
@@ -1442,7 +1442,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
        rcu_read_lock();
        chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
 
-       if (WARN_ON_ONCE(!chanctx_conf)) {
+       if (!chanctx_conf) {
                rcu_read_unlock();
                return NULL;
        }
index 2e2f73a..137fa4c 100644 (file)
@@ -476,14 +476,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
                                   GFP_KERNEL);
        }
 
-       /* APs need special treatment */
        if (sdata->vif.type == NL80211_IFTYPE_AP) {
-               struct ieee80211_sub_if_data *vlan, *tmpsdata;
-
-               /* down all dependent devices, that is VLANs */
-               list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
-                                        u.vlan.list)
-                       dev_close(vlan->dev);
                WARN_ON(!list_empty(&sdata->u.ap.vlans));
        } else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
                /* remove all packets in parent bc_buf pointing to this dev */
@@ -641,6 +634,15 @@ static int ieee80211_stop(struct net_device *dev)
 {
        struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
+       /* close all dependent VLAN interfaces before locking wiphy */
+       if (sdata->vif.type == NL80211_IFTYPE_AP) {
+               struct ieee80211_sub_if_data *vlan, *tmpsdata;
+
+               list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
+                                        u.vlan.list)
+                       dev_close(vlan->dev);
+       }
+
        wiphy_lock(sdata->local->hw.wiphy);
        ieee80211_do_stop(sdata, true);
        wiphy_unlock(sdata->local->hw.wiphy);
@@ -1591,6 +1593,9 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
 
        switch (sdata->vif.type) {
        case NL80211_IFTYPE_AP:
+               if (!list_empty(&sdata->u.ap.vlans))
+                       return -EBUSY;
+               break;
        case NL80211_IFTYPE_STATION:
        case NL80211_IFTYPE_ADHOC:
        case NL80211_IFTYPE_OCB:
index 62145e5..f33a3ac 100644 (file)
@@ -252,6 +252,7 @@ static void ieee80211_restart_work(struct work_struct *work)
        struct ieee80211_local *local =
                container_of(work, struct ieee80211_local, restart_work);
        struct ieee80211_sub_if_data *sdata;
+       int ret;
 
        /* wait for scan work complete */
        flush_workqueue(local->workqueue);
@@ -301,8 +302,12 @@ static void ieee80211_restart_work(struct work_struct *work)
        /* wait for all packet processing to be done */
        synchronize_net();
 
-       ieee80211_reconfig(local);
+       ret = ieee80211_reconfig(local);
        wiphy_unlock(local->hw.wiphy);
+
+       if (ret)
+               cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
        rtnl_unlock();
 }
 
index 2480bd0..3f2aad2 100644 (file)
@@ -4062,10 +4062,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
                if (elems.mbssid_config_ie)
                        bss_conf->profile_periodicity =
                                elems.mbssid_config_ie->profile_periodicity;
+               else
+                       bss_conf->profile_periodicity = 0;
 
                if (elems.ext_capab_len >= 11 &&
                    (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
                        bss_conf->ema_ap = true;
+               else
+                       bss_conf->ema_ap = false;
 
                /* continue assoc process */
                ifmgd->assoc_data->timeout = jiffies;
@@ -5802,12 +5806,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
                                              beacon_ies->data, beacon_ies->len);
                if (elem && elem->datalen >= 3)
                        sdata->vif.bss_conf.profile_periodicity = elem->data[2];
+               else
+                       sdata->vif.bss_conf.profile_periodicity = 0;
 
                elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
                                          beacon_ies->data, beacon_ies->len);
                if (elem && elem->datalen >= 11 &&
                    (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
                        sdata->vif.bss_conf.ema_ap = true;
+               else
+                       sdata->vif.bss_conf.ema_ap = false;
        } else {
                assoc_data->timeout = jiffies;
                assoc_data->timeout_started = true;
index 6487b05..a6f3fb4 100644 (file)
@@ -1514,7 +1514,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
            (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
                return;
 
-       if (time_is_before_jiffies(mi->sample_time))
+       if (time_is_after_jiffies(mi->sample_time))
                return;
 
        mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL;
index 1bb43ed..af0ef45 100644 (file)
@@ -2240,17 +2240,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
        sc = le16_to_cpu(hdr->seq_ctrl);
        frag = sc & IEEE80211_SCTL_FRAG;
 
-       if (is_multicast_ether_addr(hdr->addr1)) {
-               I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
-               goto out_no_led;
-       }
-
        if (rx->sta)
                cache = &rx->sta->frags;
 
        if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
                goto out;
 
+       if (is_multicast_ether_addr(hdr->addr1))
+               return RX_DROP_MONITOR;
+
        I802_DEBUG_INC(rx->local->rx_handlers_fragments);
 
        if (skb_linearize(rx->skb))
@@ -2376,7 +2374,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 
  out:
        ieee80211_led_rx(rx->local);
- out_no_led:
        if (rx->sta)
                rx->sta->rx_stats.packets++;
        return RX_CONTINUE;
index d4cc9ac..6b50cb5 100644 (file)
@@ -251,13 +251,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
        struct ieee80211_mgmt *mgmt = (void *)skb->data;
        struct ieee80211_bss *bss;
        struct ieee80211_channel *channel;
+       size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
+                                     u.probe_resp.variable);
+
+       if (!ieee80211_is_probe_resp(mgmt->frame_control) &&
+           !ieee80211_is_beacon(mgmt->frame_control) &&
+           !ieee80211_is_s1g_beacon(mgmt->frame_control))
+               return;
 
        if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
-               if (skb->len < 15)
-                       return;
-       } else if (skb->len < 24 ||
-                (!ieee80211_is_probe_resp(mgmt->frame_control) &&
-                 !ieee80211_is_beacon(mgmt->frame_control)))
+               if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+                       min_hdr_len = offsetof(struct ieee80211_ext,
+                                              u.s1g_short_beacon.variable);
+               else
+                       min_hdr_len = offsetof(struct ieee80211_ext,
+                                              u.s1g_beacon);
+       }
+
+       if (skb->len < min_hdr_len)
                return;
 
        sdata1 = rcu_dereference(local->scan_sdata);
index 0b719f3..2651498 100644 (file)
@@ -2014,6 +2014,26 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
        ieee80211_tx(sdata, sta, skb, false);
 }
 
+static bool ieee80211_validate_radiotap_len(struct sk_buff *skb)
+{
+       struct ieee80211_radiotap_header *rthdr =
+               (struct ieee80211_radiotap_header *)skb->data;
+
+       /* check for not even having the fixed radiotap header part */
+       if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
+               return false; /* too short to be possibly valid */
+
+       /* is it a header version we can trust to find length from? */
+       if (unlikely(rthdr->it_version))
+               return false; /* only version 0 is supported */
+
+       /* does the skb contain enough to deliver on the alleged length? */
+       if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
+               return false; /* skb too short for claimed rt header extent */
+
+       return true;
+}
+
 bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
                                 struct net_device *dev)
 {
@@ -2022,8 +2042,6 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
        struct ieee80211_radiotap_header *rthdr =
                (struct ieee80211_radiotap_header *) skb->data;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-       struct ieee80211_supported_band *sband =
-               local->hw.wiphy->bands[info->band];
        int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
                                                   NULL);
        u16 txflags;
@@ -2036,17 +2054,8 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
        u8 vht_mcs = 0, vht_nss = 0;
        int i;
 
-       /* check for not even having the fixed radiotap header part */
-       if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
-               return false; /* too short to be possibly valid */
-
-       /* is it a header version we can trust to find length from? */
-       if (unlikely(rthdr->it_version))
-               return false; /* only version 0 is supported */
-
-       /* does the skb contain enough to deliver on the alleged length? */
-       if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
-               return false; /* skb too short for claimed rt header extent */
+       if (!ieee80211_validate_radiotap_len(skb))
+               return false;
 
        info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
                       IEEE80211_TX_CTL_DONTFRAG;
@@ -2186,6 +2195,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
                return false;
 
        if (rate_found) {
+               struct ieee80211_supported_band *sband =
+                       local->hw.wiphy->bands[info->band];
+
                info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT;
 
                for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
@@ -2199,7 +2211,7 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
                } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) {
                        ieee80211_rate_set_vht(info->control.rates, vht_mcs,
                                               vht_nss);
-               } else {
+               } else if (sband) {
                        for (i = 0; i < sband->n_bitrates; i++) {
                                if (rate * 5 != sband->bitrates[i].bitrate)
                                        continue;
@@ -2236,8 +2248,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
        info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
                      IEEE80211_TX_CTL_INJECTED;
 
-       /* Sanity-check and process the injection radiotap header */
-       if (!ieee80211_parse_tx_radiotap(skb, dev))
+       /* Sanity-check the length of the radiotap header */
+       if (!ieee80211_validate_radiotap_len(skb))
                goto fail;
 
        /* we now know there is a radiotap header with a length we can use */
@@ -2351,6 +2363,14 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
        ieee80211_select_queue_80211(sdata, skb, hdr);
        skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority));
 
+       /*
+        * Process the radiotap header. This will now take into account the
+        * selected chandef above to accurately set injection rates and
+        * retransmissions.
+        */
+       if (!ieee80211_parse_tx_radiotap(skb, dev))
+               goto fail_rcu;
+
        /* remove the injection radiotap header */
        skb_pull(skb, len_rthdr);
 
index 0a0481f..060059e 100644 (file)
@@ -947,7 +947,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
 
        switch (elem->data[0]) {
        case WLAN_EID_EXT_HE_MU_EDCA:
-               if (len == sizeof(*elems->mu_edca_param_set)) {
+               if (len >= sizeof(*elems->mu_edca_param_set)) {
                        elems->mu_edca_param_set = data;
                        if (crc)
                                *crc = crc32_be(*crc, (void *)elem,
@@ -968,7 +968,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
                }
                break;
        case WLAN_EID_EXT_UORA:
-               if (len == 1)
+               if (len >= 1)
                        elems->uora_element = data;
                break;
        case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME:
@@ -976,7 +976,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
                        elems->max_channel_switch_time = data;
                break;
        case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION:
-               if (len == sizeof(*elems->mbssid_config_ie))
+               if (len >= sizeof(*elems->mbssid_config_ie))
                        elems->mbssid_config_ie = data;
                break;
        case WLAN_EID_EXT_HE_SPR:
@@ -985,7 +985,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
                        elems->he_spr = data;
                break;
        case WLAN_EID_EXT_HE_6GHZ_CAPA:
-               if (len == sizeof(*elems->he_6ghz_capa))
+               if (len >= sizeof(*elems->he_6ghz_capa))
                        elems->he_6ghz_capa = data;
                break;
        }
@@ -1074,14 +1074,14 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 
                switch (id) {
                case WLAN_EID_LINK_ID:
-                       if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) {
+                       if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) {
                                elem_parse_failed = true;
                                break;
                        }
                        elems->lnk_id = (void *)(pos - 2);
                        break;
                case WLAN_EID_CHAN_SWITCH_TIMING:
-                       if (elen != sizeof(struct ieee80211_ch_switch_timing)) {
+                       if (elen < sizeof(struct ieee80211_ch_switch_timing)) {
                                elem_parse_failed = true;
                                break;
                        }
@@ -1244,7 +1244,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        elems->sec_chan_offs = (void *)pos;
                        break;
                case WLAN_EID_CHAN_SWITCH_PARAM:
-                       if (elen !=
+                       if (elen <
                            sizeof(*elems->mesh_chansw_params_ie)) {
                                elem_parse_failed = true;
                                break;
@@ -1253,7 +1253,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        break;
                case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
                        if (!action ||
-                           elen != sizeof(*elems->wide_bw_chansw_ie)) {
+                           elen < sizeof(*elems->wide_bw_chansw_ie)) {
                                elem_parse_failed = true;
                                break;
                        }
@@ -1272,7 +1272,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
                                              pos, elen);
                        if (ie) {
-                               if (ie[1] == sizeof(*elems->wide_bw_chansw_ie))
+                               if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie))
                                        elems->wide_bw_chansw_ie =
                                                (void *)(ie + 2);
                                else
@@ -1316,7 +1316,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                        elems->cisco_dtpc_elem = pos;
                        break;
                case WLAN_EID_ADDBA_EXT:
-                       if (elen != sizeof(struct ieee80211_addba_ext_ie)) {
+                       if (elen < sizeof(struct ieee80211_addba_ext_ie)) {
                                elem_parse_failed = true;
                                break;
                        }
@@ -1342,7 +1342,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
                                                          elem, elems);
                        break;
                case WLAN_EID_S1G_CAPABILITIES:
-                       if (elen == sizeof(*elems->s1g_capab))
+                       if (elen >= sizeof(*elems->s1g_capab))
                                elems->s1g_capab = (void *)pos;
                        else
                                elem_parse_failed = true;
@@ -2178,8 +2178,6 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
        list_for_each_entry(ctx, &local->chanctx_list, list)
                ctx->driver_present = false;
        mutex_unlock(&local->chanctx_mtx);
-
-       cfg80211_shutdown_all_interfaces(local->hw.wiphy);
 }
 
 static void ieee80211_assign_chanctx(struct ieee80211_local *local,
index 6b825fb..9b263f2 100644 (file)
@@ -356,6 +356,8 @@ void mptcp_get_options(const struct sk_buff *skb,
                        length--;
                        continue;
                default:
+                       if (length < 2)
+                               return;
                        opsize = *ptr++;
                        if (opsize < 2) /* "silly options" */
                                return;
index 5edc686..6323500 100644 (file)
@@ -280,11 +280,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 
        /* try to fetch required memory from subflow */
        if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
-               if (ssk->sk_forward_alloc < skb->truesize)
-                       goto drop;
-               __sk_mem_reclaim(ssk, skb->truesize);
-               if (!sk_rmem_schedule(sk, skb, skb->truesize))
+               int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
+
+               if (ssk->sk_forward_alloc < amount)
                        goto drop;
+
+               ssk->sk_forward_alloc -= amount;
+               sk->sk_forward_alloc += amount;
        }
 
        /* the skb map_seq accounts for the skb offset:
@@ -668,18 +670,22 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
 /* In most cases we will be able to lock the mptcp socket.  If its already
  * owned, we need to defer to the work queue to avoid ABBA deadlock.
  */
-static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 {
        struct sock *sk = (struct sock *)msk;
        unsigned int moved = 0;
 
        if (inet_sk_state_load(sk) == TCP_CLOSE)
-               return;
-
-       mptcp_data_lock(sk);
+               return false;
 
        __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
        __mptcp_ofo_queue(msk);
+       if (unlikely(ssk->sk_err)) {
+               if (!sock_owned_by_user(sk))
+                       __mptcp_error_report(sk);
+               else
+                       set_bit(MPTCP_ERROR_REPORT,  &msk->flags);
+       }
 
        /* If the moves have caught up with the DATA_FIN sequence number
         * it's time to ack the DATA_FIN and change socket state, but
@@ -688,7 +694,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
         */
        if (mptcp_pending_data_fin(sk, NULL))
                mptcp_schedule_work(sk);
-       mptcp_data_unlock(sk);
+       return moved > 0;
 }
 
 void mptcp_data_ready(struct sock *sk, struct sock *ssk)
@@ -696,7 +702,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
        struct mptcp_sock *msk = mptcp_sk(sk);
        int sk_rbuf, ssk_rbuf;
-       bool wake;
 
        /* The peer can send data while we are shutting down this
         * subflow at msk destruction time, but we must avoid enqueuing
@@ -705,28 +710,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
        if (unlikely(subflow->disposable))
                return;
 
-       /* move_skbs_to_msk below can legitly clear the data_avail flag,
-        * but we will need later to properly woke the reader, cache its
-        * value
-        */
-       wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
-       if (wake)
-               set_bit(MPTCP_DATA_READY, &msk->flags);
-
        ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
        sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
        if (unlikely(ssk_rbuf > sk_rbuf))
                sk_rbuf = ssk_rbuf;
 
-       /* over limit? can't append more skbs to msk */
+       /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
        if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
-               goto wake;
-
-       move_skbs_to_msk(msk, ssk);
+               return;
 
-wake:
-       if (wake)
+       /* Wake-up the reader only for in-sequence data */
+       mptcp_data_lock(sk);
+       if (move_skbs_to_msk(msk, ssk)) {
+               set_bit(MPTCP_DATA_READY, &msk->flags);
                sk->sk_data_ready(sk);
+       }
+       mptcp_data_unlock(sk);
 }
 
 static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
@@ -858,7 +857,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
        sock_owned_by_me(sk);
 
        mptcp_for_each_subflow(msk, subflow) {
-               if (subflow->data_avail)
+               if (READ_ONCE(subflow->data_avail))
                        return mptcp_subflow_tcp_sock(subflow);
        }
 
@@ -1955,6 +1954,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
                done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
                mptcp_data_unlock(sk);
                tcp_cleanup_rbuf(ssk, moved);
+
+               if (unlikely(ssk->sk_err))
+                       __mptcp_error_report(sk);
                unlock_sock_fast(ssk, slowpath);
        } while (!done);
 
index 0c6f99c..385796f 100644 (file)
@@ -362,7 +362,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
 enum mptcp_data_avail {
        MPTCP_SUBFLOW_NODATA,
        MPTCP_SUBFLOW_DATA_AVAIL,
-       MPTCP_SUBFLOW_OOO_DATA
 };
 
 struct mptcp_delegated_action {
index ef3d037..be1de40 100644 (file)
@@ -784,10 +784,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
        return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
 }
 
-static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
+static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
 {
-       WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
-                 ssn, subflow->map_subflow_seq, subflow->map_data_len);
+       pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+                ssn, subflow->map_subflow_seq, subflow->map_data_len);
 }
 
 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
@@ -812,13 +812,13 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
                /* Mapping covers data later in the subflow stream,
                 * currently unsupported.
                 */
-               warn_bad_map(subflow, ssn);
+               dbg_bad_map(subflow, ssn);
                return false;
        }
        if (unlikely(!before(ssn, subflow->map_subflow_seq +
                                  subflow->map_data_len))) {
                /* Mapping does covers past subflow data, invalid */
-               warn_bad_map(subflow, ssn + skb->len);
+               dbg_bad_map(subflow, ssn);
                return false;
        }
        return true;
@@ -1000,7 +1000,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
        struct sk_buff *skb;
 
        if (!skb_peek(&ssk->sk_receive_queue))
-               subflow->data_avail = 0;
+               WRITE_ONCE(subflow->data_avail, 0);
        if (subflow->data_avail)
                return true;
 
@@ -1039,18 +1039,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
                ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
                pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
                         ack_seq);
-               if (ack_seq == old_ack) {
-                       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
-                       break;
-               } else if (after64(ack_seq, old_ack)) {
-                       subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
-                       break;
+               if (unlikely(before64(ack_seq, old_ack))) {
+                       mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+                       continue;
                }
 
-               /* only accept in-sequence mapping. Old values are spurious
-                * retransmission
-                */
-               mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+               WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+               break;
        }
        return true;
 
@@ -1065,12 +1060,11 @@ fallback:
                 * subflow_error_report() will introduce the appropriate barriers
                 */
                ssk->sk_err = EBADMSG;
-               ssk->sk_error_report(ssk);
                tcp_set_state(ssk, TCP_CLOSE);
                subflow->reset_transient = 0;
                subflow->reset_reason = MPTCP_RST_EMPTCP;
                tcp_send_active_reset(ssk, GFP_ATOMIC);
-               subflow->data_avail = 0;
+               WRITE_ONCE(subflow->data_avail, 0);
                return false;
        }
 
@@ -1080,7 +1074,7 @@ fallback:
        subflow->map_seq = READ_ONCE(msk->ack_seq);
        subflow->map_data_len = skb->len;
        subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
-       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+       WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
        return true;
 }
 
@@ -1092,7 +1086,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
        if (subflow->map_valid &&
            mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
                subflow->map_valid = 0;
-               subflow->data_avail = 0;
+               WRITE_ONCE(subflow->data_avail, 0);
 
                pr_debug("Done with mapping: seq=%u data_len=%u",
                         subflow->map_subflow_seq,
@@ -1120,41 +1114,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
        *full_space = tcp_full_space(sk);
 }
 
-static void subflow_data_ready(struct sock *sk)
-{
-       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-       u16 state = 1 << inet_sk_state_load(sk);
-       struct sock *parent = subflow->conn;
-       struct mptcp_sock *msk;
-
-       msk = mptcp_sk(parent);
-       if (state & TCPF_LISTEN) {
-               /* MPJ subflow are removed from accept queue before reaching here,
-                * avoid stray wakeups
-                */
-               if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
-                       return;
-
-               set_bit(MPTCP_DATA_READY, &msk->flags);
-               parent->sk_data_ready(parent);
-               return;
-       }
-
-       WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
-                    !subflow->mp_join && !(state & TCPF_CLOSE));
-
-       if (mptcp_subflow_data_available(sk))
-               mptcp_data_ready(parent, sk);
-}
-
-static void subflow_write_space(struct sock *ssk)
-{
-       struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
-
-       mptcp_propagate_sndbuf(sk, ssk);
-       mptcp_write_space(sk);
-}
-
 void __mptcp_error_report(struct sock *sk)
 {
        struct mptcp_subflow_context *subflow;
@@ -1195,6 +1154,43 @@ static void subflow_error_report(struct sock *ssk)
        mptcp_data_unlock(sk);
 }
 
+static void subflow_data_ready(struct sock *sk)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+       u16 state = 1 << inet_sk_state_load(sk);
+       struct sock *parent = subflow->conn;
+       struct mptcp_sock *msk;
+
+       msk = mptcp_sk(parent);
+       if (state & TCPF_LISTEN) {
+               /* MPJ subflow are removed from accept queue before reaching here,
+                * avoid stray wakeups
+                */
+               if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+                       return;
+
+               set_bit(MPTCP_DATA_READY, &msk->flags);
+               parent->sk_data_ready(parent);
+               return;
+       }
+
+       WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+                    !subflow->mp_join && !(state & TCPF_CLOSE));
+
+       if (mptcp_subflow_data_available(sk))
+               mptcp_data_ready(parent, sk);
+       else if (unlikely(sk->sk_err))
+               subflow_error_report(sk);
+}
+
+static void subflow_write_space(struct sock *ssk)
+{
+       struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+       mptcp_propagate_sndbuf(sk, ssk);
+       mptcp_write_space(sk);
+}
+
 static struct inet_connection_sock_af_ops *
 subflow_default_af_ops(struct sock *sk)
 {
@@ -1505,6 +1501,8 @@ static void subflow_state_change(struct sock *sk)
         */
        if (mptcp_subflow_data_available(sk))
                mptcp_data_ready(parent, sk);
+       else if (unlikely(sk->sk_err))
+               subflow_error_report(sk);
 
        subflow_sched_work_if_closed(mptcp_sk(parent), sk);
 
index b100c04..3d6d494 100644 (file)
@@ -31,6 +31,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
        int length = (th->doff * 4) - sizeof(*th);
        u8 buf[40], *ptr;
 
+       if (unlikely(length < 0))
+               return false;
+
        ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
        if (ptr == NULL)
                return false;
@@ -47,6 +50,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
                        length--;
                        continue;
                default:
+                       if (length < 2)
+                               return true;
                        opsize = *ptr++;
                        if (opsize < 2)
                                return true;
index 72bc759..bf4d6ec 100644 (file)
@@ -4364,13 +4364,45 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
        err = nf_tables_set_alloc_name(&ctx, set, name);
        kfree(name);
        if (err < 0)
-               goto err_set_alloc_name;
+               goto err_set_name;
+
+       udata = NULL;
+       if (udlen) {
+               udata = set->data + size;
+               nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+       }
+
+       INIT_LIST_HEAD(&set->bindings);
+       INIT_LIST_HEAD(&set->catchall_list);
+       set->table = table;
+       write_pnet(&set->net, net);
+       set->ops = ops;
+       set->ktype = ktype;
+       set->klen = desc.klen;
+       set->dtype = dtype;
+       set->objtype = objtype;
+       set->dlen = desc.dlen;
+       set->flags = flags;
+       set->size = desc.size;
+       set->policy = policy;
+       set->udlen = udlen;
+       set->udata = udata;
+       set->timeout = timeout;
+       set->gc_int = gc_int;
+
+       set->field_count = desc.field_count;
+       for (i = 0; i < desc.field_count; i++)
+               set->field_len[i] = desc.field_len[i];
+
+       err = ops->init(set, &desc, nla);
+       if (err < 0)
+               goto err_set_init;
 
        if (nla[NFTA_SET_EXPR]) {
                expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
                if (IS_ERR(expr)) {
                        err = PTR_ERR(expr);
-                       goto err_set_alloc_name;
+                       goto err_set_expr_alloc;
                }
                set->exprs[0] = expr;
                set->num_exprs++;
@@ -4381,75 +4413,44 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 
                if (!(flags & NFT_SET_EXPR)) {
                        err = -EINVAL;
-                       goto err_set_alloc_name;
+                       goto err_set_expr_alloc;
                }
                i = 0;
                nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
                        if (i == NFT_SET_EXPR_MAX) {
                                err = -E2BIG;
-                               goto err_set_init;
+                               goto err_set_expr_alloc;
                        }
                        if (nla_type(tmp) != NFTA_LIST_ELEM) {
                                err = -EINVAL;
-                               goto err_set_init;
+                               goto err_set_expr_alloc;
                        }
                        expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
                        if (IS_ERR(expr)) {
                                err = PTR_ERR(expr);
-                               goto err_set_init;
+                               goto err_set_expr_alloc;
                        }
                        set->exprs[i++] = expr;
                        set->num_exprs++;
                }
        }
 
-       udata = NULL;
-       if (udlen) {
-               udata = set->data + size;
-               nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
-       }
-
-       INIT_LIST_HEAD(&set->bindings);
-       INIT_LIST_HEAD(&set->catchall_list);
-       set->table = table;
-       write_pnet(&set->net, net);
-       set->ops   = ops;
-       set->ktype = ktype;
-       set->klen  = desc.klen;
-       set->dtype = dtype;
-       set->objtype = objtype;
-       set->dlen  = desc.dlen;
-       set->flags = flags;
-       set->size  = desc.size;
-       set->policy = policy;
-       set->udlen  = udlen;
-       set->udata  = udata;
-       set->timeout = timeout;
-       set->gc_int = gc_int;
        set->handle = nf_tables_alloc_handle(table);
 
-       set->field_count = desc.field_count;
-       for (i = 0; i < desc.field_count; i++)
-               set->field_len[i] = desc.field_len[i];
-
-       err = ops->init(set, &desc, nla);
-       if (err < 0)
-               goto err_set_init;
-
        err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
        if (err < 0)
-               goto err_set_trans;
+               goto err_set_expr_alloc;
 
        list_add_tail_rcu(&set->list, &table->sets);
        table->use++;
        return 0;
 
-err_set_trans:
-       ops->destroy(set);
-err_set_init:
+err_set_expr_alloc:
        for (i = 0; i < set->num_exprs; i++)
                nft_expr_destroy(&ctx, set->exprs[i]);
-err_set_alloc_name:
+
+       ops->destroy(set);
+err_set_init:
        kfree(set->name);
 err_set_name:
        kvfree(set);
index ae906eb..330ba68 100644 (file)
@@ -2683,7 +2683,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
        }
        if (likely(saddr == NULL)) {
                dev     = packet_cached_dev_get(po);
-               proto   = po->num;
+               proto   = READ_ONCE(po->num);
        } else {
                err = -EINVAL;
                if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2896,7 +2896,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
        if (likely(saddr == NULL)) {
                dev     = packet_cached_dev_get(po);
-               proto   = po->num;
+               proto   = READ_ONCE(po->num);
        } else {
                err = -EINVAL;
                if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -3034,10 +3034,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
 
-       if (po->tx_ring.pg_vec)
+       /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
+        * tpacket_snd() will redo the check safely.
+        */
+       if (data_race(po->tx_ring.pg_vec))
                return tpacket_snd(po, msg);
-       else
-               return packet_snd(sock, msg, len);
+
+       return packet_snd(sock, msg, len);
 }
 
 /*
@@ -3168,7 +3171,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                        /* prevents packet_notifier() from calling
                         * register_prot_hook()
                         */
-                       po->num = 0;
+                       WRITE_ONCE(po->num, 0);
                        __unregister_prot_hook(sk, true);
                        rcu_read_lock();
                        dev_curr = po->prot_hook.dev;
@@ -3178,17 +3181,17 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                }
 
                BUG_ON(po->running);
-               po->num = proto;
+               WRITE_ONCE(po->num, proto);
                po->prot_hook.type = proto;
 
                if (unlikely(unlisted)) {
                        dev_put(dev);
                        po->prot_hook.dev = NULL;
-                       po->ifindex = -1;
+                       WRITE_ONCE(po->ifindex, -1);
                        packet_cached_dev_reset(po);
                } else {
                        po->prot_hook.dev = dev;
-                       po->ifindex = dev ? dev->ifindex : 0;
+                       WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
                        packet_cached_dev_assign(po, dev);
                }
        }
@@ -3502,7 +3505,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
        uaddr->sa_family = AF_PACKET;
        memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
        rcu_read_lock();
-       dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
+       dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
        if (dev)
                strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
        rcu_read_unlock();
@@ -3517,16 +3520,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
        struct sock *sk = sock->sk;
        struct packet_sock *po = pkt_sk(sk);
        DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
+       int ifindex;
 
        if (peer)
                return -EOPNOTSUPP;
 
+       ifindex = READ_ONCE(po->ifindex);
        sll->sll_family = AF_PACKET;
-       sll->sll_ifindex = po->ifindex;
-       sll->sll_protocol = po->num;
+       sll->sll_ifindex = ifindex;
+       sll->sll_protocol = READ_ONCE(po->num);
        sll->sll_pkttype = 0;
        rcu_read_lock();
-       dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
+       dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
        if (dev) {
                sll->sll_hatype = dev->type;
                sll->sll_halen = dev->addr_len;
@@ -4105,7 +4110,7 @@ static int packet_notifier(struct notifier_block *this,
                                }
                                if (msg == NETDEV_UNREGISTER) {
                                        packet_cached_dev_reset(po);
-                                       po->ifindex = -1;
+                                       WRITE_ONCE(po->ifindex, -1);
                                        if (po->prot_hook.dev)
                                                dev_put(po->prot_hook.dev);
                                        po->prot_hook.dev = NULL;
@@ -4411,7 +4416,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
        was_running = po->running;
        num = po->num;
        if (was_running) {
-               po->num = 0;
+               WRITE_ONCE(po->num, 0);
                __unregister_prot_hook(sk, false);
        }
        spin_unlock(&po->bind_lock);
@@ -4446,7 +4451,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 
        spin_lock(&po->bind_lock);
        if (was_running) {
-               po->num = num;
+               WRITE_ONCE(po->num, num);
                register_prot_hook(sk);
        }
        spin_unlock(&po->bind_lock);
@@ -4616,8 +4621,8 @@ static int packet_seq_show(struct seq_file *seq, void *v)
                           s,
                           refcount_read(&s->sk_refcnt),
                           s->sk_type,
-                          ntohs(po->num),
-                          po->ifindex,
+                          ntohs(READ_ONCE(po->num)),
+                          READ_ONCE(po->ifindex),
                           po->running,
                           atomic_read(&s->sk_rmem_alloc),
                           from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
index c0477be..f2efaa4 100644 (file)
@@ -436,7 +436,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
        struct qrtr_sock *ipc;
        struct sk_buff *skb;
        struct qrtr_cb *cb;
-       unsigned int size;
+       size_t size;
        unsigned int ver;
        size_t hdrlen;
 
index 4db109f..5b426dc 100644 (file)
@@ -714,7 +714,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
                if (rds_cmsg_recv(inc, msg, rs)) {
                        ret = -EFAULT;
-                       goto out;
+                       break;
                }
                rds_recvmsg_zcookie(rs, msg);
 
index 18edd9a..a656baa 100644 (file)
@@ -904,14 +904,19 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
        }
 
        err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
-       if (err == NF_ACCEPT &&
-           ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
-               if (maniptype == NF_NAT_MANIP_SRC)
-                       maniptype = NF_NAT_MANIP_DST;
-               else
-                       maniptype = NF_NAT_MANIP_SRC;
-
-               err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
+       if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
+               if (ct->status & IPS_SRC_NAT) {
+                       if (maniptype == NF_NAT_MANIP_SRC)
+                               maniptype = NF_NAT_MANIP_DST;
+                       else
+                               maniptype = NF_NAT_MANIP_SRC;
+
+                       err = ct_nat_execute(skb, ct, ctinfo, range,
+                                            maniptype);
+               } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+                       err = ct_nat_execute(skb, ct, ctinfo, NULL,
+                                            NF_NAT_MANIP_SRC);
+               }
        }
        return err;
 #else
index 7d37638..9515428 100644 (file)
@@ -943,7 +943,7 @@ static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
        }
 
        tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
-       if (!tcph)
+       if (!tcph || tcph->doff < 5)
                return NULL;
 
        return skb_header_pointer(skb, offset,
@@ -967,6 +967,8 @@ static const void *cake_get_tcpopt(const struct tcphdr *tcph,
                        length--;
                        continue;
                }
+               if (length < 2)
+                       break;
                opsize = *ptr++;
                if (opsize < 2 || opsize > length)
                        break;
@@ -1104,6 +1106,8 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph,
                        length--;
                        continue;
                }
+               if (length < 2)
+                       break;
                opsize = *ptr++;
                if (opsize < 2 || opsize > length)
                        break;
@@ -2338,7 +2342,7 @@ static int cake_config_precedence(struct Qdisc *sch)
 
 /*     List of known Diffserv codepoints:
  *
- *     Least Effort (CS1)
+ *     Least Effort (CS1, LE)
  *     Best Effort (CS0)
  *     Max Reliability & LLT "Lo" (TOS1)
  *     Max Throughput (TOS2)
@@ -2360,7 +2364,7 @@ static int cake_config_precedence(struct Qdisc *sch)
  *     Total 25 codepoints.
  */
 
-/*     List of traffic classes in RFC 4594:
+/*     List of traffic classes in RFC 4594, updated by RFC 8622:
  *             (roughly descending order of contended priority)
  *             (roughly ascending order of uncontended throughput)
  *
@@ -2375,7 +2379,7 @@ static int cake_config_precedence(struct Qdisc *sch)
  *     Ops, Admin, Management (CS2,TOS1) - eg. ssh
  *     Standard Service (CS0 & unrecognised codepoints)
  *     High Throughput Data (AF1x,TOS2)  - eg. web traffic
- *     Low Priority Data (CS1)           - eg. BitTorrent
+ *     Low Priority Data (CS1,LE)        - eg. BitTorrent
 
  *     Total 12 traffic classes.
  */
@@ -2391,7 +2395,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
  *             Video Streaming          (AF4x, AF3x, CS3)
  *             Bog Standard             (CS0 etc.)
  *             High Throughput          (AF1x, TOS2)
- *             Background Traffic       (CS1)
+ *             Background Traffic       (CS1, LE)
  *
  *             Total 8 traffic classes.
  */
@@ -2435,7 +2439,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
  *         Latency Sensitive  (CS7, CS6, EF, VA, CS5, CS4)
  *         Streaming Media    (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
  *         Best Effort        (CS0, AF1x, TOS2, and those not specified)
- *         Background Traffic (CS1)
+ *         Background Traffic (CS1, LE)
  *
  *             Total 4 traffic classes.
  */
@@ -2473,7 +2477,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
 static int cake_config_diffserv3(struct Qdisc *sch)
 {
 /*  Simplified Diffserv structure with 3 tins.
- *             Low Priority            (CS1)
+ *             Low Priority            (CS1, LE)
  *             Best Effort
  *             Latency Sensitive       (TOS4, VA, EF, CS6, CS7)
  */
index 27e3e7d..4f2c6d2 100644 (file)
@@ -1072,19 +1072,6 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *     what to do with it - that's up to the protocol still.
  */
 
-/**
- *     get_net_ns - increment the refcount of the network namespace
- *     @ns: common namespace (net)
- *
- *     Returns the net's common namespace.
- */
-
-struct ns_common *get_net_ns(struct ns_common *ns)
-{
-       return &get_net(container_of(ns, struct net, ns))->ns;
-}
-EXPORT_SYMBOL_GPL(get_net_ns);
-
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
        struct socket *sock;
index 5a31307..5d1192c 100644 (file)
@@ -535,12 +535,14 @@ static void unix_release_sock(struct sock *sk, int embrion)
        u->path.mnt = NULL;
        state = sk->sk_state;
        sk->sk_state = TCP_CLOSE;
+
+       skpair = unix_peer(sk);
+       unix_peer(sk) = NULL;
+
        unix_state_unlock(sk);
 
        wake_up_interruptible_all(&u->peer_wait);
 
-       skpair = unix_peer(sk);
-
        if (skpair != NULL) {
                if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
                        unix_state_lock(skpair);
@@ -555,7 +557,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
 
                unix_dgram_peer_wake_disconnect(sk, skpair);
                sock_put(skpair); /* It may now die */
-               unix_peer(sk) = NULL;
        }
 
        /* Try to flush out this socket. Throw out buffers at least */
index 2eee939..af590ae 100644 (file)
@@ -28,7 +28,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
        @$(kecho) "  GEN     $@"
        @(echo '#include "reg.h"'; \
          echo 'const u8 shipped_regdb_certs[] = {'; \
-         cat $^ ; \
+         echo | cat - $^ ; \
          echo '};'; \
          echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
         ) > $@
index 6fbf753..8d0883e 100644 (file)
@@ -1340,6 +1340,11 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev,
        rdev->devlist_generation++;
        wdev->registered = true;
 
+       if (wdev->netdev &&
+           sysfs_create_link(&wdev->netdev->dev.kobj, &rdev->wiphy.dev.kobj,
+                             "phy80211"))
+               pr_err("failed to add phy80211 symlink to netdev!\n");
+
        nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 }
 
@@ -1365,14 +1370,6 @@ int cfg80211_register_netdevice(struct net_device *dev)
        if (ret)
                goto out;
 
-       if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
-                             "phy80211")) {
-               pr_err("failed to add phy80211 symlink to netdev!\n");
-               unregister_netdevice(dev);
-               ret = -EINVAL;
-               goto out;
-       }
-
        cfg80211_register_wdev(rdev, wdev);
        ret = 0;
 out:
index 6bdd964..d245968 100644 (file)
@@ -334,6 +334,7 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
                            gfp_t gfp)
 {
        struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+       struct cfg80211_pmsr_request *tmp, *prev, *to_free = NULL;
        struct sk_buff *msg;
        void *hdr;
 
@@ -364,9 +365,20 @@ free_msg:
        nlmsg_free(msg);
 free_request:
        spin_lock_bh(&wdev->pmsr_lock);
-       list_del(&req->list);
+       /*
+        * cfg80211_pmsr_process_abort() may have already moved this request
+        * to the free list, and will free it later. In this case, don't free
+        * it here.
+        */
+       list_for_each_entry_safe(tmp, prev, &wdev->pmsr_list, list) {
+               if (tmp == req) {
+                       list_del(&req->list);
+                       to_free = req;
+                       break;
+               }
+       }
        spin_unlock_bh(&wdev->pmsr_lock);
-       kfree(req);
+       kfree(to_free);
 }
 EXPORT_SYMBOL_GPL(cfg80211_pmsr_complete);
 
index 9b959e3..0c3f05c 100644 (file)
@@ -133,6 +133,10 @@ static int wiphy_resume(struct device *dev)
        if (rdev->wiphy.registered && rdev->ops->resume)
                ret = rdev_resume(rdev);
        wiphy_unlock(&rdev->wiphy);
+
+       if (ret)
+               cfg80211_shutdown_all_interfaces(&rdev->wiphy);
+
        rtnl_unlock();
 
        return ret;
index 7ec021a..18dba3d 100644 (file)
@@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
                case NL80211_IFTYPE_MESH_POINT:
                        /* mesh should be handled? */
                        break;
+               case NL80211_IFTYPE_OCB:
+                       cfg80211_leave_ocb(rdev, dev);
+                       break;
                default:
                        break;
                }
index f9b1952..1e9baa5 100644 (file)
@@ -192,15 +192,20 @@ static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab,
                                 Elf32_Word const *symtab_shndx)
 {
        unsigned long offset;
+       unsigned short shndx = w2(sym->st_shndx);
        int index;
 
-       if (sym->st_shndx != SHN_XINDEX)
-               return w2(sym->st_shndx);
+       if (shndx > SHN_UNDEF && shndx < SHN_LORESERVE)
+               return shndx;
 
-       offset = (unsigned long)sym - (unsigned long)symtab;
-       index = offset / sizeof(*sym);
+       if (shndx == SHN_XINDEX) {
+               offset = (unsigned long)sym - (unsigned long)symtab;
+               index = offset / sizeof(*sym);
 
-       return w(symtab_shndx[index]);
+               return w(symtab_shndx[index]);
+       }
+
+       return 0;
 }
 
 static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0)
index 438fa18..9408ee6 100644 (file)
@@ -3388,44 +3388,30 @@ static int rt5645_probe(struct snd_soc_component *component)
 {
        struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
        struct rt5645_priv *rt5645 = snd_soc_component_get_drvdata(component);
-       int ret = 0;
 
        rt5645->component = component;
 
        switch (rt5645->codec_type) {
        case CODEC_TYPE_RT5645:
-               ret = snd_soc_dapm_new_controls(dapm,
+               snd_soc_dapm_new_controls(dapm,
                        rt5645_specific_dapm_widgets,
                        ARRAY_SIZE(rt5645_specific_dapm_widgets));
-               if (ret < 0)
-                       goto exit;
-
-               ret = snd_soc_dapm_add_routes(dapm,
+               snd_soc_dapm_add_routes(dapm,
                        rt5645_specific_dapm_routes,
                        ARRAY_SIZE(rt5645_specific_dapm_routes));
-               if (ret < 0)
-                       goto exit;
-
                if (rt5645->v_id < 3) {
-                       ret = snd_soc_dapm_add_routes(dapm,
+                       snd_soc_dapm_add_routes(dapm,
                                rt5645_old_dapm_routes,
                                ARRAY_SIZE(rt5645_old_dapm_routes));
-                       if (ret < 0)
-                               goto exit;
                }
                break;
        case CODEC_TYPE_RT5650:
-               ret = snd_soc_dapm_new_controls(dapm,
+               snd_soc_dapm_new_controls(dapm,
                        rt5650_specific_dapm_widgets,
                        ARRAY_SIZE(rt5650_specific_dapm_widgets));
-               if (ret < 0)
-                       goto exit;
-
-               ret = snd_soc_dapm_add_routes(dapm,
+               snd_soc_dapm_add_routes(dapm,
                        rt5650_specific_dapm_routes,
                        ARRAY_SIZE(rt5650_specific_dapm_routes));
-               if (ret < 0)
-                       goto exit;
                break;
        }
 
@@ -3433,17 +3419,9 @@ static int rt5645_probe(struct snd_soc_component *component)
 
        /* for JD function */
        if (rt5645->pdata.jd_mode) {
-               ret = snd_soc_dapm_force_enable_pin(dapm, "JD Power");
-               if (ret < 0)
-                       goto exit;
-
-               ret = snd_soc_dapm_force_enable_pin(dapm, "LDO2");
-               if (ret < 0)
-                       goto exit;
-
-               ret = snd_soc_dapm_sync(dapm);
-               if (ret < 0)
-                       goto exit;
+               snd_soc_dapm_force_enable_pin(dapm, "JD Power");
+               snd_soc_dapm_force_enable_pin(dapm, "LDO2");
+               snd_soc_dapm_sync(dapm);
        }
 
        if (rt5645->pdata.long_name)
@@ -3454,14 +3432,9 @@ static int rt5645_probe(struct snd_soc_component *component)
                GFP_KERNEL);
 
        if (!rt5645->eq_param)
-               ret = -ENOMEM;
-exit:
-       /*
-        * If there was an error above, everything will be cleaned up by the
-        * caller if we return an error here.  This will be done with a later
-        * call to rt5645_remove().
-        */
-       return ret;
+               return -ENOMEM;
+
+       return 0;
 }
 
 static void rt5645_remove(struct snd_soc_component *component)
index 6de5a7f..d2a9420 100644 (file)
@@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
 
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
index 7d66876..d1b3270 100644 (file)
@@ -289,6 +289,9 @@ struct sockaddr_in {
 /* Address indicating an error return. */
 #define        INADDR_NONE             ((unsigned long int) 0xffffffff)
 
+/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
+#define        INADDR_DUMMY            ((unsigned long int) 0xc0000008)
+
 /* Network number for local host loopback. */
 #define        IN_LOOPBACKNET          127
 
index 6061431..e9b619a 100644 (file)
@@ -1094,7 +1094,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
                        goto out_put_ctx;
                }
                if (xsk->fd == umem->fd)
-                       umem->rx_ring_setup_done = true;
+                       umem->tx_ring_setup_done = true;
        }
 
        err = xsk_get_mmap_offsets(xsk->fd, &off);
index 22eb31e..2f9948b 100755 (executable)
@@ -11,9 +11,9 @@ compare_number()
        second_num=$2
 
        # upper bound is first_num * 110%
-       upper=$(( $first_num + $first_num / 10 ))
+       upper=$(expr $first_num + $first_num / 10 )
        # lower bound is first_num * 90%
-       lower=$(( $first_num - $first_num / 10 ))
+       lower=$(expr $first_num - $first_num / 10 )
 
        if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
                echo "The difference between $first_num and $second_num are greater than 10%."
index b8fc5c5..0d8e3dc 100644 (file)
@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
                            int __user *usockvec);
 extern int __sys_shutdown_sock(struct socket *sock, int how);
 extern int __sys_shutdown(int fd, int how);
-
-extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
index 3ff4936..da19be7 100644 (file)
@@ -776,10 +776,10 @@ static int machine__process_ksymbol_register(struct machine *machine,
                if (dso) {
                        dso->kernel = DSO_SPACE__KERNEL;
                        map = map__new2(0, dso);
+                       dso__put(dso);
                }
 
                if (!dso || !map) {
-                       dso__put(dso);
                        return -ENOMEM;
                }
 
@@ -792,6 +792,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
                map->start = event->ksymbol.addr;
                map->end = map->start + event->ksymbol.len;
                maps__insert(&machine->kmaps, map);
+               map__put(map);
                dso__set_loaded(dso);
 
                if (is_bpf_image(event->ksymbol.name)) {
index 8336dd8..d3cf2de 100644 (file)
@@ -162,10 +162,10 @@ static bool contains_event(struct evsel **metric_events, int num_events,
        return false;
 }
 
-static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2)
+static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2)
 {
        if (!ev1->pmu_name || !ev2->pmu_name)
-               return false;
+               return true;
 
        return !strcmp(ev1->pmu_name, ev2->pmu_name);
 }
@@ -288,7 +288,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
                         */
                        if (!has_constraint &&
                            ev->leader != metric_events[i]->leader &&
-                           evsel_same_pmu(ev->leader, metric_events[i]->leader))
+                           evsel_same_pmu_or_none(ev->leader, metric_events[i]->leader))
                                break;
                        if (!strcmp(metric_events[i]->name, ev->name)) {
                                set_bit(ev->idx, evlist_used);
@@ -1073,16 +1073,18 @@ static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe,
 
        ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids);
        if (ret)
-               return ret;
+               goto out;
 
        ret = resolve_metric(d->metric_no_group,
                                     d->metric_list, NULL, d->ids);
        if (ret)
-               return ret;
+               goto out;
 
        *(d->has_match) = true;
 
-       return *d->ret;
+out:
+       *(d->ret) = ret;
+       return ret;
 }
 
 static int metricgroup__add_metric(const char *metric, bool metric_no_group,
index 1512092..3a9e332 100644 (file)
@@ -1147,7 +1147,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
                }
        }
 
-       if (test->insn_processed) {
+       if (!unpriv && test->insn_processed) {
                uint32_t insn_processed;
                char *proc;
 
index ca8fdb1..7d7ebee 100644 (file)
@@ -61,6 +61,8 @@
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R1 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 0
 },
index 8a1caf4..e061e87 100644 (file)
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT
 },
 {
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT
 },
 {
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 min value is outside of the allowed memory range",
+       .result_unpriv = REJECT,
        .fixup_map_hash_8b = { 3 },
        .result = ACCEPT,
 },
index 17fe33a..2c8935b 100644 (file)
@@ -8,6 +8,8 @@
        BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 7,
 },
index bd5cae4..1c857b2 100644 (file)
@@ -87,6 +87,8 @@
        BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
        BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
        BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 2,
 },
index 8dcd4e0..11fc68d 100644 (file)
@@ -82,8 +82,8 @@
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-       .retval_unpriv = 1,
-       .result_unpriv = ACCEPT,
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .retval = 1,
        .result = ACCEPT,
 },
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-       .result_unpriv = ACCEPT,
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-       .result_unpriv = ACCEPT,
+       .errstr_unpriv = "R9 !read_ok",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
index bd436df..111801a 100644 (file)
        BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R7 invalid mem access 'inv'",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 0,
 },
index 7ae2859..a3e593d 100644 (file)
        .fixup_map_array_48b = { 1 },
        .result = ACCEPT,
        .result_unpriv = REJECT,
-       .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+       .errstr_unpriv = "R2 pointer comparison prohibited",
        .retval = 0,
 },
 {
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        // fake-dead code; targeted from branch A to
-       // prevent dead code sanitization
+       // prevent dead code sanitization, rejected
+       // via branch B however
        BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_EXIT_INSN(),
        .fixup_map_array_48b = { 1 },
        .result = ACCEPT,
        .result_unpriv = REJECT,
-       .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+       .errstr_unpriv = "R0 invalid mem access 'inv'",
        .retval = 0,
 },
 {
index 5c70596..a2b732c 100644 (file)
@@ -82,7 +82,7 @@ int kvm_check_cap(long cap)
 
        kvm_fd = open_kvm_dev_path_or_exit();
        ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
-       TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+       TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
                "  rc: %i errno: %i", ret, errno);
 
        close(kvm_fd);
index 6ad6c82..af1031f 100644 (file)
@@ -166,75 +166,75 @@ size_t get_def_hugetlb_pagesz(void)
        return 0;
 }
 
+#define ANON_FLAGS     (MAP_PRIVATE | MAP_ANONYMOUS)
+#define ANON_HUGE_FLAGS        (ANON_FLAGS | MAP_HUGETLB)
+
 const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
 {
-       static const int anon_flags = MAP_PRIVATE | MAP_ANONYMOUS;
-       static const int anon_huge_flags = anon_flags | MAP_HUGETLB;
-
        static const struct vm_mem_backing_src_alias aliases[] = {
                [VM_MEM_SRC_ANONYMOUS] = {
                        .name = "anonymous",
-                       .flag = anon_flags,
+                       .flag = ANON_FLAGS,
                },
                [VM_MEM_SRC_ANONYMOUS_THP] = {
                        .name = "anonymous_thp",
-                       .flag = anon_flags,
+                       .flag = ANON_FLAGS,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
                        .name = "anonymous_hugetlb",
-                       .flag = anon_huge_flags,
+                       .flag = ANON_HUGE_FLAGS,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
                        .name = "anonymous_hugetlb_16kb",
-                       .flag = anon_huge_flags | MAP_HUGE_16KB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
                        .name = "anonymous_hugetlb_64kb",
-                       .flag = anon_huge_flags | MAP_HUGE_64KB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
                        .name = "anonymous_hugetlb_512kb",
-                       .flag = anon_huge_flags | MAP_HUGE_512KB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
                        .name = "anonymous_hugetlb_1mb",
-                       .flag = anon_huge_flags | MAP_HUGE_1MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
                        .name = "anonymous_hugetlb_2mb",
-                       .flag = anon_huge_flags | MAP_HUGE_2MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
                        .name = "anonymous_hugetlb_8mb",
-                       .flag = anon_huge_flags | MAP_HUGE_8MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
                        .name = "anonymous_hugetlb_16mb",
-                       .flag = anon_huge_flags | MAP_HUGE_16MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
                        .name = "anonymous_hugetlb_32mb",
-                       .flag = anon_huge_flags | MAP_HUGE_32MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
                        .name = "anonymous_hugetlb_256mb",
-                       .flag = anon_huge_flags | MAP_HUGE_256MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
                        .name = "anonymous_hugetlb_512mb",
-                       .flag = anon_huge_flags | MAP_HUGE_512MB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
                        .name = "anonymous_hugetlb_1gb",
-                       .flag = anon_huge_flags | MAP_HUGE_1GB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
                        .name = "anonymous_hugetlb_2gb",
-                       .flag = anon_huge_flags | MAP_HUGE_2GB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB,
                },
                [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
                        .name = "anonymous_hugetlb_16gb",
-                       .flag = anon_huge_flags | MAP_HUGE_16GB,
+                       .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB,
                },
                [VM_MEM_SRC_SHMEM] = {
                        .name = "shmem",
index 76d9487..5abe92d 100755 (executable)
@@ -1384,12 +1384,37 @@ ipv4_rt_replace()
        ipv4_rt_replace_mpath
 }
 
+# checks that cached input route on VRF port is deleted
+# when VRF is deleted
+ipv4_local_rt_cache()
+{
+       run_cmd "ip addr add 10.0.0.1/32 dev lo"
+       run_cmd "ip netns add test-ns"
+       run_cmd "ip link add veth-outside type veth peer name veth-inside"
+       run_cmd "ip link add vrf-100 type vrf table 1100"
+       run_cmd "ip link set veth-outside master vrf-100"
+       run_cmd "ip link set veth-inside netns test-ns"
+       run_cmd "ip link set veth-outside up"
+       run_cmd "ip link set vrf-100 up"
+       run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
+       run_cmd "ip netns exec test-ns ip link set veth-inside up"
+       run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside"
+       run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside"
+       run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1"
+       run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1"
+       run_cmd "ip link delete vrf-100"
+
+       # if we do not hang test is a success
+       log_test $? 0 "Cached route removed from VRF port device"
+}
+
 ipv4_route_test()
 {
        route_setup
 
        ipv4_rt_add
        ipv4_rt_replace
+       ipv4_local_rt_cache
 
        route_cleanup
 }
diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh
new file mode 100755 (executable)
index 0000000..e4b04cd
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for checking ICMP response with dummy address instead of 0.0.0.0.
+# Sets up two namespaces like:
+# +----------------------+                          +--------------------+
+# | ns1                  |    v4-via-v6 routes:     | ns2                |
+# |                      |                  '       |                    |
+# |             +--------+   -> 172.16.1.0/24 ->    +--------+           |
+# |             | veth0  +--------------------------+  veth0 |           |
+# |             +--------+   <- 172.16.0.0/24 <-    +--------+           |
+# |           172.16.0.1 |                          | 2001:db8:1::2/64   |
+# |     2001:db8:1::2/64 |                          |                    |
+# +----------------------+                          +--------------------+
+#
+# And then tries to ping 172.16.1.1 from ns1. This results in a "net
+# unreachable" message being sent from ns2, but there is no IPv4 address set in
+# that address space, so the kernel should substitute the dummy address
+# 192.0.0.8 defined in RFC7600.
+
+NS1=ns1
+NS2=ns2
+H1_IP=172.16.0.1/32
+H1_IP6=2001:db8:1::1
+RT1=172.16.1.0/24
+PINGADDR=172.16.1.1
+RT2=172.16.0.0/24
+H2_IP6=2001:db8:1::2
+
+TMPFILE=$(mktemp)
+
+cleanup()
+{
+    rm -f "$TMPFILE"
+    ip netns del $NS1
+    ip netns del $NS2
+}
+
+trap cleanup EXIT
+
+# Namespaces
+ip netns add $NS1
+ip netns add $NS2
+
+# Connectivity
+ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2
+ip -netns $NS1 link set dev veth0 up
+ip -netns $NS2 link set dev veth0 up
+ip -netns $NS1 addr add $H1_IP dev veth0
+ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad
+ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad
+ip -netns $NS1 route add $RT1 via inet6 $H2_IP6
+ip -netns $NS2 route add $RT2 via inet6 $H1_IP6
+
+# Make sure ns2 will respond with ICMP unreachable
+ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1
+
+# Run the test - a ping runs in the background, and we capture ICMP responses
+# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout
+# in case something goes wrong
+ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null &
+ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null
+
+# Parse response and check for dummy address
+# tcpdump output looks like:
+# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92
+RESP_IP=$(awk '{print $2}' < $TMPFILE)
+if [[ "$RESP_IP" != "192.0.0.8" ]]; then
+    echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
index 9ca5f1b..2b495dc 100755 (executable)
@@ -197,9 +197,6 @@ ip -net "$ns4" link set ns4eth3 up
 ip -net "$ns4" route add default via 10.0.3.2
 ip -net "$ns4" route add default via dead:beef:3::2
 
-# use TCP syn cookies, even if no flooding was detected.
-ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
-
 set_ethtool_flags() {
        local ns="$1"
        local dev="$2"
@@ -737,6 +734,14 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
                exit $ret
        fi
 
+       # ns1<->ns2 is not subject to reordering/tc delays. Use it to test
+       # mptcp syncookie support.
+       if [ $sender = $ns1 ]; then
+               ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+       else
+               ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
+       fi
+
        run_tests "$ns2" $sender 10.0.1.2
        run_tests "$ns2" $sender dead:beef:1::2
        run_tests "$ns2" $sender 10.0.2.1
index a8fa641..7f26591 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 readonly BASE="ns-$(mktemp -u XXXXXX)"
index 2fedc07..11d7cdb 100755 (executable)
@@ -18,7 +18,8 @@ ret=0
 
 cleanup() {
        local ns
-       local -r jobs="$(jobs -p)"
+       local jobs
+       readonly jobs="$(jobs -p)"
        [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
        rm -f $STATS
 
@@ -108,7 +109,7 @@ chk_gro() {
 
 if [ ! -f ../bpf/xdp_dummy.o ]; then
        echo "Missing xdp_dummy helper. Build bpf selftest first"
-       exit -1
+       exit 1
 fi
 
 create_ns
index 3171069..cd6430b 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for netfilter selftests
 
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
+TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
        conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
        nft_concat_range.sh nft_conntrack_helper.sh \
        nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
new file mode 100755 (executable)
index 0000000..6caf6ac
--- /dev/null
@@ -0,0 +1,221 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+       ip netns del ${ns1}
+       ip netns del ${ns2}
+       ip netns del ${nsrouter}
+
+       [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without nft tool"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not create net namespace"
+       exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+dmesg | grep -q ' nft_rpfilter: '
+if [ $? -eq 0 ]; then
+       dmesg -c | grep ' nft_rpfilter: '
+       echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+load_ruleset() {
+       local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+       chain prerouting {
+               type filter hook prerouting priority 0; policy accept;
+               fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+       }
+}
+EOF
+}
+
+load_ruleset_count() {
+       local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+       chain prerouting {
+               type filter hook prerouting priority 0; policy accept;
+               ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+               ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+       }
+}
+EOF
+}
+
+check_drops() {
+       dmesg | grep -q ' nft_rpfilter: '
+       if [ $? -eq 0 ]; then
+               dmesg | grep ' nft_rpfilter: '
+               echo "FAIL: rpfilter did drop packets"
+               return 1
+       fi
+
+       return 0
+}
+
+check_fib_counter() {
+       local want=$1
+       local ns=$2
+       local address=$3
+
+       line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
+       ret=$?
+
+       if [ $ret -ne 0 ];then
+               echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+               ip netns exec ${ns} nft list table inet filter
+               return 1
+       fi
+
+       if [ $want -gt 0 ]; then
+               echo "PASS: fib expression did drop packets for $address"
+       fi
+
+       return 0
+}
+
+load_ruleset ${nsrouter}
+load_ruleset ${ns1}
+load_ruleset ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+test_ping() {
+  local daddr4=$1
+  local daddr6=$2
+
+  ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
+  ret=$?
+  if [ $ret -ne 0 ];then
+       check_drops
+       echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+       return 1
+  fi
+
+  ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
+  ret=$?
+  if [ $ret -ne 0 ];then
+       check_drops
+       echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+       return 1
+  fi
+
+  return 0
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+sleep 3
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec ${nsrouter} nft flush table inet filter
+
+ip -net ${ns1} route del default
+ip -net ${ns1} -6 route del default
+
+ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr del dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr add dead:2::99/64 dev eth0
+
+ip -net ${ns1} route add default via 10.0.2.1
+ip -net ${ns1} -6 route add default via dead:2::1
+
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count ${nsrouter}
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
+check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
+
+ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
+
+sleep 2
+ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+
+exit 0