Merge tag 'drm-fixes-2019-04-26' of git://anongit.freedesktop.org/drm/drm

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 26 Apr 2019 17:39:46 +0000 (10:39 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 26 Apr 2019 17:39:46 +0000 (10:39 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Apr 2019 17:39:46 +0000 (10:39 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 26 Apr 2019 17:39:46 +0000 (10:39 -0700)
diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt

index 24c5cda..ca83dcc 100644 (file)
--- a/Documentation/devicetree/bindings/net/davinci_emac.txt
+++ b/Documentation/devicetree/bindings/net/davinci_emac.txt
@@ -20,6 +20,8 @@ Required properties:
  Optional properties:
  - phy-handle: See ethernet.txt file in the same directory.
                If absent, davinci_emac driver defaults to 100/FULL.
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
  - ti,davinci-rmii-en: 1 byte, 1 means use RMII
  - ti,davinci-no-bd-ram: boolean, does EMAC have BD RAM?
  
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt

index cfc376b..a686215 100644 (file)
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -10,15 +10,14 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
    the boot program; should be used in cases where the MAC address assigned to
    the device by the boot program is different from the "local-mac-address"
    property;
-- nvmem-cells: phandle, reference to an nvmem node for the MAC address;
-- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used;
  - max-speed: number, specifies maximum speed in Mbit/s supported by the device;
  - max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
    the maximum frame size (there's contradiction in the Devicetree
    Specification).
  - phy-mode: string, operation mode of the PHY interface. This is now a de-facto
    standard property; supported values are:
-  * "internal"
+  * "internal" (Internal means there is not a standard bus between the MAC and
+     the PHY, something proprietary is being used to embed the PHY in the MAC.)
    * "mii"
    * "gmii"
    * "sgmii"
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt

index 174f292..8b80515 100644 (file)
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -26,6 +26,10 @@ Required properties:
         Optional elements: 'tsu_clk'
  - clocks: Phandles to input clocks.
  
+Optional properties:
+- nvmem-cells: phandle, reference to an nvmem node for the MAC address
+- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
+
  Optional properties for PHY child node:
  - reset-gpios : Should specify the gpio for phy reset
  - magic-packet : If present, indicates that the hardware supports waking
diff --git a/Documentation/networking/decnet.txt b/Documentation/networking/decnet.txt

index e12a490..d192f8b 100644 (file)
--- a/Documentation/networking/decnet.txt
+++ b/Documentation/networking/decnet.txt
@@ -22,8 +22,6 @@ you'll need the following options as well...
      CONFIG_DECNET_ROUTER (to be able to add/delete routes)
      CONFIG_NETFILTER (will be required for the DECnet routing daemon)
  
-    CONFIG_DECNET_ROUTE_FWMARK is optional
-
  Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
  that you need it, in general you won't and it can cause ifconfig to
  malfunction.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt

index acdfb5d..e2142fe 100644 (file)
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -422,6 +422,7 @@ tcp_min_rtt_wlen - INTEGER
         minimum RTT when it is moved to a longer path (e.g., due to traffic
         engineering). A longer window makes the filter more resistant to RTT
         inflations such as transient congestion. The unit is seconds.
+       Possible values: 0 - 86400 (1 day)
         Default: 300
  
  tcp_moderate_rcvbuf - BOOLEAN
diff --git a/MAINTAINERS b/MAINTAINERS

index 09f43f1..5c38f21 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3121,6 +3121,7 @@ F:        drivers/cpufreq/bmips-cpufreq.c
  BROADCOM BMIPS MIPS ARCHITECTURE
  M:     Kevin Cernekee <cernekee@gmail.com>
  M:     Florian Fainelli <f.fainelli@gmail.com>
+L:     bcm-kernel-feedback-list@broadcom.com
  L:     linux-mips@vger.kernel.org
  T:     git git://github.com/broadcom/stblinux.git
  S:     Maintained
@@ -8707,6 +8708,7 @@ F:        scripts/leaking_addresses.pl
  LED SUBSYSTEM
  M:     Jacek Anaszewski <jacek.anaszewski@gmail.com>
  M:     Pavel Machek <pavel@ucw.cz>
+R:     Dan Murphy <dmurphy@ti.com>
  L:     linux-leds@vger.kernel.org
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
  S:     Maintained
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl

index 63ed39c..165f268 100644 (file)
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -463,3 +463,7 @@
  532    common  getppid                         sys_getppid
  # all other architectures have common numbers for new syscall, alpha
  # is the exception.
+534    common  pidfd_send_signal               sys_pidfd_send_signal
+535    common  io_uring_setup                  sys_io_uring_setup
+536    common  io_uring_enter                  sys_io_uring_enter
+537    common  io_uring_register               sys_io_uring_register
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl

index 9016f40..0393917 100644 (file)
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -437,3 +437,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h

index d1dd934..f2a83ff 100644 (file)
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
  #define __ARM_NR_compat_set_tls                (__ARM_NR_COMPAT_BASE + 5)
  #define __ARM_NR_COMPAT_END            (__ARM_NR_COMPAT_BASE + 0x800)
  
-#define __NR_compat_syscalls           424
+#define __NR_compat_syscalls           428
  #endif
  
  #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h

index 5590f26..23f1a44 100644 (file)
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -866,6 +866,14 @@ __SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64)
  __SYSCALL(__NR_futex_time64, sys_futex)
  #define __NR_sched_rr_get_interval_time64 423
  __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
  
  /*
   * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl

index ab9cda5..56e3d0b 100644 (file)
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -344,3 +344,7 @@
  332    common  pkey_free                       sys_pkey_free
  333    common  rseq                            sys_rseq
  # 334 through 423 are reserved to sync up with other architectures
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl

index 125c141..df4ec3e 100644 (file)
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -423,3 +423,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl

index 8ee3a8c..4964947 100644 (file)
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -429,3 +429,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c

index 4a70c5d..25a5789 100644 (file)
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -210,12 +210,6 @@ const char *get_system_type(void)
         return ath79_sys_type;
  }
  
-int get_c0_perfcount_int(void)
-{
-       return ATH79_MISC_IRQ(5);
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
  unsigned int get_c0_compare_int(void)
  {
         return CP0_LEGACY_COMPARE_IRQ;
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S

index f158c58..feb2653 100644 (file)
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -125,7 +125,7 @@ trace_a_syscall:
         subu    t1, v0,  __NR_O32_Linux
         move    a1, v0
         bnez    t1, 1f /* __NR_syscall at offset 0 */
-       lw      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
+       ld      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
         .set    pop
  
  1:     jal     syscall_trace_enter
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl

index 15f4117..9392dfe 100644 (file)
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -362,3 +362,7 @@
  421    n32     rt_sigtimedwait_time64          compat_sys_rt_sigtimedwait_time64
  422    n32     futex_time64                    sys_futex
  423    n32     sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    n32     pidfd_send_signal               sys_pidfd_send_signal
+425    n32     io_uring_setup                  sys_io_uring_setup
+426    n32     io_uring_enter                  sys_io_uring_enter
+427    n32     io_uring_register               sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl

index c85502e..cd0c8aa 100644 (file)
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -338,3 +338,7 @@
  327    n64     rseq                            sys_rseq
  328    n64     io_pgetevents                   sys_io_pgetevents
  # 329 through 423 are reserved to sync up with other architectures
+424    n64     pidfd_send_signal               sys_pidfd_send_signal
+425    n64     io_uring_setup                  sys_io_uring_setup
+426    n64     io_uring_enter                  sys_io_uring_enter
+427    n64     io_uring_register               sys_io_uring_register
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl

index 2e063d0..e849e8f 100644 (file)
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -411,3 +411,7 @@
  421    o32     rt_sigtimedwait_time64          sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait_time64
  422    o32     futex_time64                    sys_futex                       sys_futex
  423    o32     sched_rr_get_interval_time64    sys_sched_rr_get_interval       sys_sched_rr_get_interval
+424    o32     pidfd_send_signal               sys_pidfd_send_signal
+425    o32     io_uring_setup                  sys_io_uring_setup
+426    o32     io_uring_enter                  sys_io_uring_enter
+427    o32     io_uring_register               sys_io_uring_register
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl

index b26766c..fe8ca62 100644 (file)
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -420,3 +420,7 @@
  421    32      rt_sigtimedwait_time64          sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait_time64
  422    32      futex_time64                    sys_futex                       sys_futex
  423    32      sched_rr_get_interval_time64    sys_sched_rr_get_interval       sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl

index b18abb0..00f5a63 100644 (file)
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -505,3 +505,7 @@
  421    32      rt_sigtimedwait_time64          sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait_time64
  422    32      futex_time64                    sys_futex                       sys_futex
  423    32      sched_rr_get_interval_time64    sys_sched_rr_get_interval       sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl

index 02579f9..061418f 100644 (file)
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
  421    32      rt_sigtimedwait_time64  -                               compat_sys_rt_sigtimedwait_time64
  422    32      futex_time64            -                               sys_futex
  423    32      sched_rr_get_interval_time64    -                       sys_sched_rr_get_interval
+424  common    pidfd_send_signal       sys_pidfd_send_signal           sys_pidfd_send_signal
+425  common    io_uring_setup          sys_io_uring_setup              sys_io_uring_setup
+426  common    io_uring_enter          sys_io_uring_enter              sys_io_uring_enter
+427  common    io_uring_register       sys_io_uring_register           sys_io_uring_register
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl

index bfda678..480b057 100644 (file)
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -426,3 +426,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl

index b9a5a04..a1dd243 100644 (file)
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -469,3 +469,7 @@
  421    32      rt_sigtimedwait_time64          sys_rt_sigtimedwait             compat_sys_rt_sigtimedwait_time64
  422    32      futex_time64                    sys_futex                       sys_futex
  423    32      sched_rr_get_interval_time64    sys_sched_rr_get_interval       sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl

index 6af4992..30084ea 100644 (file)
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -394,3 +394,7 @@
  421    common  rt_sigtimedwait_time64          sys_rt_sigtimedwait
  422    common  futex_time64                    sys_futex
  423    common  sched_rr_get_interval_time64    sys_sched_rr_get_interval
+424    common  pidfd_send_signal               sys_pidfd_send_signal
+425    common  io_uring_setup                  sys_io_uring_setup
+426    common  io_uring_enter                  sys_io_uring_enter
+427    common  io_uring_register               sys_io_uring_register
diff --git a/crypto/lrw.c b/crypto/lrw.c

index 0430ccd..08a0e45 100644 (file)
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -212,8 +212,12 @@ static void crypt_done(struct crypto_async_request *areq, int err)
  {
         struct skcipher_request *req = areq->data;
  
-       if (!err)
+       if (!err) {
+               struct rctx *rctx = skcipher_request_ctx(req);
+
+               rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
                 err = xor_tweak_post(req);
+       }
  
         skcipher_request_complete(req, err);
  }
diff --git a/crypto/xts.c b/crypto/xts.c

index 847f54f..2f94832 100644 (file)
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -137,8 +137,12 @@ static void crypt_done(struct crypto_async_request *areq, int err)
  {
         struct skcipher_request *req = areq->data;
  
-       if (!err)
+       if (!err) {
+               struct rctx *rctx = skcipher_request_ctx(req);
+
+               rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
                 err = xor_tweak_post(req);
+       }
  
         skcipher_request_complete(req, err);
  }
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c

index 11e1663..b2c06da 100644 (file)
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1646,7 +1646,7 @@ static irqreturn_t fs_irq (int irq, void *dev_id)
         }
  
         if (status & ISR_TBRQ_W) {
-               fs_dprintk (FS_DEBUG_IRQ, "Data tramsitted!\n");
+               fs_dprintk (FS_DEBUG_IRQ, "Data transmitted!\n");
                 process_txdone_queue (dev, &dev->tx_relq);
         }
  
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c

index aa72907..0390603 100644 (file)
--- a/drivers/irqchip/irq-ath79-misc.c
+++ b/drivers/irqchip/irq-ath79-misc.c
@@ -22,6 +22,15 @@
  #define AR71XX_RESET_REG_MISC_INT_ENABLE       4
  
  #define ATH79_MISC_IRQ_COUNT                   32
+#define ATH79_MISC_PERF_IRQ                    5
+
+static int ath79_perfcount_irq;
+
+int get_c0_perfcount_int(void)
+{
+       return ath79_perfcount_irq;
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
  
  static void ath79_misc_irq_handler(struct irq_desc *desc)
  {
@@ -113,6 +122,8 @@ static void __init ath79_misc_intc_domain_init(
  {
         void __iomem *base = domain->host_data;
  
+       ath79_perfcount_irq = irq_create_mapping(domain, ATH79_MISC_PERF_IRQ);
+
         /* Disable and clear all interrupts */
         __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
         __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c

index 9e07b46..156fbc5 100644 (file)
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1721,7 +1721,7 @@ static void atl1_inc_smb(struct atl1_adapter *adapter)
         adapter->soft_stats.scc += smb->tx_1_col;
         adapter->soft_stats.mcc += smb->tx_2_col;
         adapter->soft_stats.latecol += smb->tx_late_col;
-       adapter->soft_stats.tx_underun += smb->tx_underrun;
+       adapter->soft_stats.tx_underrun += smb->tx_underrun;
         adapter->soft_stats.tx_trunc += smb->tx_trunc;
         adapter->soft_stats.tx_pause += smb->tx_pause;
  
@@ -3179,7 +3179,7 @@ static struct atl1_stats atl1_gstrings_stats[] = {
         {"tx_deferred_ok", ATL1_STAT(soft_stats.deffer)},
         {"tx_single_coll_ok", ATL1_STAT(soft_stats.scc)},
         {"tx_multi_coll_ok", ATL1_STAT(soft_stats.mcc)},
-       {"tx_underun", ATL1_STAT(soft_stats.tx_underun)},
+       {"tx_underrun", ATL1_STAT(soft_stats.tx_underrun)},
         {"tx_trunc", ATL1_STAT(soft_stats.tx_trunc)},
         {"tx_pause", ATL1_STAT(soft_stats.tx_pause)},
         {"rx_pause", ATL1_STAT(soft_stats.rx_pause)},
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.h b/drivers/net/ethernet/atheros/atlx/atl1.h

index 34a58cd..eacff19 100644 (file)
--- a/drivers/net/ethernet/atheros/atlx/atl1.h
+++ b/drivers/net/ethernet/atheros/atlx/atl1.h
@@ -681,7 +681,7 @@ struct atl1_sft_stats {
         u64 scc;                /* packets TX after a single collision */
         u64 mcc;                /* packets TX after multiple collisions */
         u64 latecol;            /* TX packets w/ late collisions */
-       u64 tx_underun;         /* TX packets aborted due to TX FIFO underrun
+       u64 tx_underrun;        /* TX packets aborted due to TX FIFO underrun
                                  * or TRD FIFO underrun */
         u64 tx_trunc;           /* TX packets truncated due to size > MTU */
         u64 rx_pause;           /* num Pause packets received. */
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c

index d99317b..98da0fa 100644 (file)
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -553,7 +553,7 @@ static void atl2_intr_tx(struct atl2_adapter *adapter)
                         netdev->stats.tx_aborted_errors++;
                 if (txs->late_col)
                         netdev->stats.tx_window_errors++;
-               if (txs->underun)
+               if (txs->underrun)
                         netdev->stats.tx_fifo_errors++;
         } while (1);
  
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.h b/drivers/net/ethernet/atheros/atlx/atl2.h

index c64a6bd..25ec84c 100644 (file)
--- a/drivers/net/ethernet/atheros/atlx/atl2.h
+++ b/drivers/net/ethernet/atheros/atlx/atl2.h
@@ -260,7 +260,7 @@ struct tx_pkt_status {
         unsigned multi_col:1;
         unsigned late_col:1;
         unsigned abort_col:1;
-       unsigned underun:1;     /* current packet is aborted
+       unsigned underrun:1;    /* current packet is aborted
                                  * due to txram underrun */
         unsigned:3;             /* reserved */
         unsigned update:1;      /* always 1'b1 in tx_status_buf */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c

index 03b2a9f..cad34d6 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -33,6 +33,26 @@
  #include <linux/bpf_trace.h>
  #include "en/xdp.h"
  
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params)
+{
+       int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM;
+
+       /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)).
+        * The condition checked in mlx5e_rx_is_linear_skb is:
+        *   SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE         (1)
+        *   (Note that hw_mtu == sw_mtu + hard_mtu.)
+        * What is returned from this function is:
+        *   max_mtu = PAGE_SIZE - S - hr - hard_mtu                         (2)
+        * After assigning sw_mtu := max_mtu, the left side of (1) turns to
+        * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE,
+        * because both PAGE_SIZE and S are already aligned. Any number greater
+        * than max_mtu would make the left side of (1) greater than PAGE_SIZE,
+        * so max_mtu is the maximum MTU allowed.
+        */
+
+       return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr));
+}
+
  static inline bool
  mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
                     struct xdp_buff *xdp)
@@ -304,9 +324,9 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq)
                                         mlx5e_xdpi_fifo_pop(xdpi_fifo);
  
                                 if (is_redirect) {
-                                       xdp_return_frame(xdpi.xdpf);
                                         dma_unmap_single(sq->pdev, xdpi.dma_addr,
                                                          xdpi.xdpf->len, DMA_TO_DEVICE);
+                                       xdp_return_frame(xdpi.xdpf);
                                 } else {
                                         /* Recycle RX page */
                                         mlx5e_page_release(rq, &xdpi.di, true);
@@ -345,9 +365,9 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq)
                                 mlx5e_xdpi_fifo_pop(xdpi_fifo);
  
                         if (is_redirect) {
-                               xdp_return_frame(xdpi.xdpf);
                                 dma_unmap_single(sq->pdev, xdpi.dma_addr,
                                                  xdpi.xdpf->len, DMA_TO_DEVICE);
+                               xdp_return_frame(xdpi.xdpf);
                         } else {
                                 /* Recycle RX page */
                                 mlx5e_page_release(rq, &xdpi.di, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h

index ee27a7c..553956c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -34,13 +34,12 @@
  
  #include "en.h"
  
-#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
-                                MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
  #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
  #define MLX5E_XDP_TX_EMPTY_DS_COUNT \
         (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
  #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
  
+int mlx5e_xdp_max_mtu(struct mlx5e_params *params);
  bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
                       void *va, u16 *rx_headroom, u32 *len);
  bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

index 76a3d01..78dc8fe 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1586,7 +1586,7 @@ static int mlx5e_get_module_info(struct net_device *netdev,
                 break;
         case MLX5_MODULE_ID_SFP:
                 modinfo->type       = ETH_MODULE_SFF_8472;
-               modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+               modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH;
                 break;
         default:
                 netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index f7eb521..46157e2 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3777,7 +3777,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
         if (params->xdp_prog &&
             !mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
                 netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n",
-                          new_mtu, MLX5E_XDP_MAX_MTU);
+                          new_mtu, mlx5e_xdp_max_mtu(params));
                 err = -EINVAL;
                 goto out;
         }
@@ -4212,7 +4212,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
  
         if (!mlx5e_rx_is_linear_skb(priv->mdev, &new_channels.params)) {
                 netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n",
-                           new_channels.params.sw_mtu, MLX5E_XDP_MAX_MTU);
+                           new_channels.params.sw_mtu,
+                           mlx5e_xdp_max_mtu(&new_channels.params));
                 return -EINVAL;
         }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c

index 21b7f05..361468e 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -317,10 +317,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
                 size -= offset + size - MLX5_EEPROM_PAGE_LENGTH;
  
         i2c_addr = MLX5_I2C_ADDR_LOW;
-       if (offset >= MLX5_EEPROM_PAGE_LENGTH) {
-               i2c_addr = MLX5_I2C_ADDR_HIGH;
-               offset -= MLX5_EEPROM_PAGE_LENGTH;
-       }
  
         MLX5_SET(mcia_reg, in, l, 0);
         MLX5_SET(mcia_reg, in, module, module_num);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h

index ffee38e..8648ca1 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -27,7 +27,7 @@
  
  #define MLXSW_PCI_SW_RESET                     0xF0010
  #define MLXSW_PCI_SW_RESET_RST_BIT             BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       13000
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       20000
  #define MLXSW_PCI_SW_RESET_WAIT_MSECS          100
  #define MLXSW_PCI_FW_READY                     0xA1844
  #define MLXSW_PCI_FW_READY_MASK                        0xFFFF
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c

index 9eb6330..6b8aa37 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3126,11 +3126,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
         if (err)
                 return err;
  
+       mlxsw_sp_port->link.autoneg = autoneg;
+
         if (!netif_running(dev))
                 return 0;
  
-       mlxsw_sp_port->link.autoneg = autoneg;
-
         mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
         mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
  
@@ -3316,7 +3316,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
                 err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
                                             MLXSW_REG_QEEC_HIERARCY_TC,
                                             i + 8, i,
-                                           false, 0);
+                                           true, 100);
                 if (err)
                         return err;
         }
diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c

index 9852080..ff39130 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/abm/cls.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c
@@ -39,7 +39,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode,
         }
         if (knode->sel->off || knode->sel->offshift || knode->sel->offmask ||
             knode->sel->offoff || knode->fshift) {
-               NL_SET_ERR_MSG_MOD(extack, "variable offseting not supported");
+               NL_SET_ERR_MSG_MOD(extack, "variable offsetting not supported");
                 return false;
         }
         if (knode->sel->hoff || knode->sel->hmask) {
@@ -78,7 +78,7 @@ nfp_abm_u32_check_knode(struct nfp_abm *abm, struct tc_cls_u32_knode *knode,
  
         k = &knode->sel->keys[0];
         if (k->offmask) {
-               NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offseting not supported");
+               NL_SET_ERR_MSG_MOD(extack, "offset mask - variable offsetting not supported");
                 return false;
         }
         if (k->off) {
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c

index a181497..cba5881 100644 (file)
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -673,7 +673,8 @@ static void netsec_process_tx(struct netsec_priv *priv)
  }
  
  static void *netsec_alloc_rx_data(struct netsec_priv *priv,
-                                 dma_addr_t *dma_handle, u16 *desc_len)
+                                 dma_addr_t *dma_handle, u16 *desc_len,
+                                 bool napi)
  {
         size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
         size_t payload_len = NETSEC_RX_BUF_SZ;
@@ -682,7 +683,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
  
         total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD);
  
-       buf = napi_alloc_frag(total_len);
+       buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len);
         if (!buf)
                 return NULL;
  
@@ -765,7 +766,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
                 /* allocate a fresh buffer and map it to the hardware.
                  * This will eventually replace the old buffer in the hardware
                  */
-               buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len);
+               buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len,
+                                               true);
                 if (unlikely(!buf_addr))
                         break;
  
@@ -1069,7 +1071,8 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
                 void *buf;
                 u16 len;
  
-               buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
+               buf = netsec_alloc_rx_data(priv, &dma_handle, &len,
+                                          false);
                 if (!buf) {
                         netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
                         goto err_out;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c

index b7dd4e3..6d69067 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -140,7 +140,7 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
         p->des0 |= cpu_to_le32(RDES0_OWN);
  
         bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1);
-       p->des1 |= cpu_to_le32(bfsize & RDES1_BUFFER1_SIZE_MASK);
+       p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK);
  
         if (mode == STMMAC_CHAIN_MODE)
                 ndesc_rx_set_on_chain(p, end);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

index a26e36d..4871243 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2616,8 +2616,6 @@ static int stmmac_open(struct net_device *dev)
         u32 chan;
         int ret;
  
-       stmmac_check_ether_addr(priv);
-
         if (priv->hw->pcs != STMMAC_PCS_RGMII &&
             priv->hw->pcs != STMMAC_PCS_TBI &&
             priv->hw->pcs != STMMAC_PCS_RTBI) {
@@ -4303,6 +4301,8 @@ int stmmac_dvr_probe(struct device *device,
         if (ret)
                 goto error_hw_init;
  
+       stmmac_check_ether_addr(priv);
+
         /* Configure real RX and TX queues */
         netif_set_real_num_rx_queues(ndev, priv->plat->rx_queues_to_use);
         netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c

index d819e8e..cc1e887 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -159,6 +159,12 @@ static const struct dmi_system_id quark_pci_dmi[] = {
                 },
                 .driver_data = (void *)&galileo_stmmac_dmi_data,
         },
+       /*
+        * There are 2 types of SIMATIC IOT2000: IOT20202 and IOT2040.
+        * The asset tag "6ES7647-0AA00-0YA2" is only for IOT2020 which
+        * has only one pci network device while other asset tags are
+        * for IOT2040 which has two.
+        */
         {
                 .matches = {
                         DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
@@ -170,8 +176,6 @@ static const struct dmi_system_id quark_pci_dmi[] = {
         {
                 .matches = {
                         DMI_EXACT_MATCH(DMI_BOARD_NAME, "SIMATIC IOT2000"),
-                       DMI_EXACT_MATCH(DMI_BOARD_ASSET_TAG,
-                                       "6ES7647-0AA00-1YA2"),
                 },
                 .driver_data = (void *)&iot2040_stmmac_dmi_data,
         },
diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c

index 92b64e2..7475cef 100644 (file)
--- a/drivers/net/phy/spi_ks8995.c
+++ b/drivers/net/phy/spi_ks8995.c
@@ -159,6 +159,14 @@ static const struct spi_device_id ks8995_id[] = {
  };
  MODULE_DEVICE_TABLE(spi, ks8995_id);
  
+static const struct of_device_id ks8895_spi_of_match[] = {
+        { .compatible = "micrel,ks8995" },
+        { .compatible = "micrel,ksz8864" },
+        { .compatible = "micrel,ksz8795" },
+        { },
+ };
+MODULE_DEVICE_TABLE(of, ks8895_spi_of_match);
+
  static inline u8 get_chip_id(u8 val)
  {
         return (val >> ID1_CHIPID_S) & ID1_CHIPID_M;
@@ -526,6 +534,7 @@ static int ks8995_remove(struct spi_device *spi)
  static struct spi_driver ks8995_driver = {
         .driver = {
                 .name       = "spi-ks8995",
+               .of_match_table = of_match_ptr(ks8895_spi_of_match),
         },
         .probe    = ks8995_probe,
         .remove   = ks8995_remove,
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c

index 9ce61b0..16963f7 100644 (file)
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1156,6 +1156,13 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
                 return -EINVAL;
         }
  
+       if (netdev_has_upper_dev(dev, port_dev)) {
+               NL_SET_ERR_MSG(extack, "Device is already an upper device of the team interface");
+               netdev_err(dev, "Device %s is already an upper device of the team interface\n",
+                          portname);
+               return -EBUSY;
+       }
+
         if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&
             vlan_uses_dev(dev)) {
                 NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c

index cd15c32..9ee4d74 100644 (file)
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -875,6 +875,7 @@ static const struct net_device_ops vrf_netdev_ops = {
         .ndo_init               = vrf_dev_init,
         .ndo_uninit             = vrf_dev_uninit,
         .ndo_start_xmit         = vrf_xmit,
+       .ndo_set_mac_address    = eth_mac_addr,
         .ndo_get_stats64        = vrf_get_stats64,
         .ndo_add_slave          = vrf_add_slave,
         .ndo_del_slave          = vrf_del_slave,
@@ -1274,6 +1275,7 @@ static void vrf_setup(struct net_device *dev)
         /* default to no qdisc; user can add if desired */
         dev->priv_flags |= IFF_NO_QUEUE;
         dev->priv_flags |= IFF_NO_RX_HANDLER;
+       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
  
         /* VRF devices do not care about MTU, but if the MTU is set
          * too low then the ipv4 and ipv6 protocols are disabled
diff --git a/drivers/nfc/st95hf/core.c b/drivers/nfc/st95hf/core.c

index 2b26f76..01acb6e 100644 (file)
--- a/drivers/nfc/st95hf/core.c
+++ b/drivers/nfc/st95hf/core.c
@@ -1074,6 +1074,12 @@ static const struct spi_device_id st95hf_id[] = {
  };
  MODULE_DEVICE_TABLE(spi, st95hf_id);
  
+static const struct of_device_id st95hf_spi_of_match[] = {
+        { .compatible = "st,st95hf" },
+        { },
+};
+MODULE_DEVICE_TABLE(of, st95hf_spi_of_match);
+
  static int st95hf_probe(struct spi_device *nfc_spi_dev)
  {
         int ret;
@@ -1260,6 +1266,7 @@ static struct spi_driver st95hf_driver = {
         .driver = {
                 .name = "st95hf",
                 .owner = THIS_MODULE,
+               .of_match_table = of_match_ptr(st95hf_spi_of_match),
         },
         .id_table = st95hf_id,
         .probe = st95hf_probe,
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c

index 810ab0f..d820f3e 100644 (file)
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -7,7 +7,6 @@
   */
  #include <linux/etherdevice.h>
  #include <linux/kernel.h>
-#include <linux/nvmem-consumer.h>
  #include <linux/of_net.h>
  #include <linux/phy.h>
  #include <linux/export.h>
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c

index 7617d21..f63c5c8 100644 (file)
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -1595,6 +1595,7 @@ static int ctcm_new_device(struct ccwgroup_device *cgdev)
                 if (priv->channel[direction] == NULL) {
                         if (direction == CTCM_WRITE)
                                 channel_free(priv->channel[CTCM_READ]);
+                       result = -ENODEV;
                         goto out_dev;
                 }
                 priv->channel[direction]->netdev = dev;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c

index 920bf3b..cccc75d 100644 (file)
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -7,6 +7,7 @@
  #include <linux/slab.h>
  #include <linux/pagemap.h>
  #include <linux/highmem.h>
+#include <linux/sched/mm.h>
  #include "ctree.h"
  #include "disk-io.h"
  #include "transaction.h"
@@ -427,9 +428,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
         unsigned long this_sum_bytes = 0;
         int i;
         u64 offset;
+       unsigned nofs_flag;
+
+       nofs_flag = memalloc_nofs_save();
+       sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
+                      GFP_KERNEL);
+       memalloc_nofs_restore(nofs_flag);
  
-       sums = kzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
-                      GFP_NOFS);
         if (!sums)
                 return BLK_STS_RESOURCE;
  
@@ -472,8 +477,10 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
  
                                 bytes_left = bio->bi_iter.bi_size - total_bytes;
  
-                               sums = kzalloc(btrfs_ordered_sum_size(fs_info, bytes_left),
-                                              GFP_NOFS);
+                               nofs_flag = memalloc_nofs_save();
+                               sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
+                                                     bytes_left), GFP_KERNEL);
+                               memalloc_nofs_restore(nofs_flag);
                                 BUG_ON(!sums); /* -ENOMEM */
                                 sums->len = bytes_left;
                                 ordered = btrfs_lookup_ordered_extent(inode,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 6fde2b2..45e3cfd 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -6,6 +6,7 @@
  #include <linux/slab.h>
  #include <linux/blkdev.h>
  #include <linux/writeback.h>
+#include <linux/sched/mm.h>
  #include "ctree.h"
  #include "transaction.h"
  #include "btrfs_inode.h"
@@ -442,7 +443,7 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
                         cur = entry->list.next;
                         sum = list_entry(cur, struct btrfs_ordered_sum, list);
                         list_del(&sum->list);
-                       kfree(sum);
+                       kvfree(sum);
                 }
                 kmem_cache_free(btrfs_ordered_extent_cache, entry);
         }
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c

index a8f4298..0637149 100644 (file)
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1766,6 +1766,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
  unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
  {
         struct ceph_inode_info *dci = ceph_inode(dir);
+       unsigned hash;
  
         switch (dci->i_dir_layout.dl_dir_hash) {
         case 0: /* for backward compat */
@@ -1773,8 +1774,11 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
                 return dn->d_name.hash;
  
         default:
-               return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
+               spin_lock(&dn->d_lock);
+               hash = ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
                                      dn->d_name.name, dn->d_name.len);
+               spin_unlock(&dn->d_lock);
+               return hash;
         }
  }
  
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c

index 2d61ddd..c2feb31 100644 (file)
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1163,6 +1163,19 @@ static int splice_dentry(struct dentry **pdn, struct inode *in)
         return 0;
  }
  
+static int d_name_cmp(struct dentry *dentry, const char *name, size_t len)
+{
+       int ret;
+
+       /* take d_lock to ensure dentry->d_name stability */
+       spin_lock(&dentry->d_lock);
+       ret = dentry->d_name.len - len;
+       if (!ret)
+               ret = memcmp(dentry->d_name.name, name, len);
+       spin_unlock(&dentry->d_lock);
+       return ret;
+}
+
  /*
   * Incorporate results into the local cache.  This is either just
   * one inode, or a directory, dentry, and possibly linked-to inode (e.g.,
@@ -1412,7 +1425,8 @@ retry_lookup:
                 err = splice_dentry(&req->r_dentry, in);
                 if (err < 0)
                         goto done;
-       } else if (rinfo->head->is_dentry) {
+       } else if (rinfo->head->is_dentry &&
+                  !d_name_cmp(req->r_dentry, rinfo->dname, rinfo->dname_len)) {
                 struct ceph_vino *ptvino = NULL;
  
                 if ((le32_to_cpu(rinfo->diri.in->cap.caps) & CEPH_CAP_FILE_SHARED) ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index 21c33ed..9049c2a 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1414,6 +1414,15 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                         list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
                         ci->i_prealloc_cap_flush = NULL;
                 }
+
+               if (drop &&
+                  ci->i_wrbuffer_ref_head == 0 &&
+                  ci->i_wr_ref == 0 &&
+                  ci->i_dirty_caps == 0 &&
+                  ci->i_flushing_caps == 0) {
+                      ceph_put_snap_context(ci->i_head_snapc);
+                      ci->i_head_snapc = NULL;
+               }
         }
         spin_unlock(&ci->i_ceph_lock);
         while (!list_empty(&to_remove)) {
@@ -2161,10 +2170,39 @@ retry:
         return path;
  }
  
+/* Duplicate the dentry->d_name.name safely */
+static int clone_dentry_name(struct dentry *dentry, const char **ppath,
+                            int *ppathlen)
+{
+       u32 len;
+       char *name;
+
+retry:
+       len = READ_ONCE(dentry->d_name.len);
+       name = kmalloc(len + 1, GFP_NOFS);
+       if (!name)
+               return -ENOMEM;
+
+       spin_lock(&dentry->d_lock);
+       if (dentry->d_name.len != len) {
+               spin_unlock(&dentry->d_lock);
+               kfree(name);
+               goto retry;
+       }
+       memcpy(name, dentry->d_name.name, len);
+       spin_unlock(&dentry->d_lock);
+
+       name[len] = '\0';
+       *ppath = name;
+       *ppathlen = len;
+       return 0;
+}
+
  static int build_dentry_path(struct dentry *dentry, struct inode *dir,
                              const char **ppath, int *ppathlen, u64 *pino,
-                            int *pfreepath)
+                            bool *pfreepath, bool parent_locked)
  {
+       int ret;
         char *path;
  
         rcu_read_lock();
@@ -2173,8 +2211,15 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
         if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
                 *pino = ceph_ino(dir);
                 rcu_read_unlock();
-               *ppath = dentry->d_name.name;
-               *ppathlen = dentry->d_name.len;
+               if (parent_locked) {
+                       *ppath = dentry->d_name.name;
+                       *ppathlen = dentry->d_name.len;
+               } else {
+                       ret = clone_dentry_name(dentry, ppath, ppathlen);
+                       if (ret)
+                               return ret;
+                       *pfreepath = true;
+               }
                 return 0;
         }
         rcu_read_unlock();
@@ -2182,13 +2227,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
         if (IS_ERR(path))
                 return PTR_ERR(path);
         *ppath = path;
-       *pfreepath = 1;
+       *pfreepath = true;
         return 0;
  }
  
  static int build_inode_path(struct inode *inode,
                             const char **ppath, int *ppathlen, u64 *pino,
-                           int *pfreepath)
+                           bool *pfreepath)
  {
         struct dentry *dentry;
         char *path;
@@ -2204,7 +2249,7 @@ static int build_inode_path(struct inode *inode,
         if (IS_ERR(path))
                 return PTR_ERR(path);
         *ppath = path;
-       *pfreepath = 1;
+       *pfreepath = true;
         return 0;
  }
  
@@ -2215,7 +2260,7 @@ static int build_inode_path(struct inode *inode,
  static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
                                   struct inode *rdiri, const char *rpath,
                                   u64 rino, const char **ppath, int *pathlen,
-                                 u64 *ino, int *freepath)
+                                 u64 *ino, bool *freepath, bool parent_locked)
  {
         int r = 0;
  
@@ -2225,7 +2270,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
                      ceph_snap(rinode));
         } else if (rdentry) {
                 r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
-                                       freepath);
+                                       freepath, parent_locked);
                 dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
                      *ppath);
         } else if (rpath || rino) {
@@ -2251,7 +2296,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
         const char *path2 = NULL;
         u64 ino1 = 0, ino2 = 0;
         int pathlen1 = 0, pathlen2 = 0;
-       int freepath1 = 0, freepath2 = 0;
+       bool freepath1 = false, freepath2 = false;
         int len;
         u16 releases;
         void *p, *end;
@@ -2259,16 +2304,19 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
  
         ret = set_request_path_attr(req->r_inode, req->r_dentry,
                               req->r_parent, req->r_path1, req->r_ino1.ino,
-                             &path1, &pathlen1, &ino1, &freepath1);
+                             &path1, &pathlen1, &ino1, &freepath1,
+                             test_bit(CEPH_MDS_R_PARENT_LOCKED,
+                                       &req->r_req_flags));
         if (ret < 0) {
                 msg = ERR_PTR(ret);
                 goto out;
         }
  
+       /* If r_old_dentry is set, then assume that its parent is locked */
         ret = set_request_path_attr(NULL, req->r_old_dentry,
                               req->r_old_dentry_dir,
                               req->r_path2, req->r_ino2.ino,
-                             &path2, &pathlen2, &ino2, &freepath2);
+                             &path2, &pathlen2, &ino2, &freepath2, true);
         if (ret < 0) {
                 msg = ERR_PTR(ret);
                 goto out_free1;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c

index 89aa37f..b26e12c 100644 (file)
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -572,7 +572,12 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
         old_snapc = NULL;
  
  update_snapc:
-       if (ci->i_head_snapc) {
+       if (ci->i_wrbuffer_ref_head == 0 &&
+           ci->i_wr_ref == 0 &&
+           ci->i_dirty_caps == 0 &&
+           ci->i_flushing_caps == 0) {
+               ci->i_head_snapc = NULL;
+       } else {
                 ci->i_head_snapc = ceph_get_snap_context(new_snapc);
                 dout(" new snapc is %p\n", new_snapc);
         }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c

index 9c0ccc0..7037a13 100644 (file)
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2877,7 +2877,6 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
         struct cifs_tcon *tcon;
         struct cifs_sb_info *cifs_sb;
         struct dentry *dentry = ctx->cfile->dentry;
-       unsigned int i;
         int rc;
  
         tcon = tlink_tcon(ctx->cfile->tlink);
@@ -2941,10 +2940,6 @@ restart_loop:
                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
         }
  
-       if (!ctx->direct_io)
-               for (i = 0; i < ctx->npages; i++)
-                       put_page(ctx->bv[i].bv_page);
-
         cifs_stats_bytes_written(tcon, ctx->total_len);
         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
  
@@ -3582,7 +3577,6 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
         struct iov_iter *to = &ctx->iter;
         struct cifs_sb_info *cifs_sb;
         struct cifs_tcon *tcon;
-       unsigned int i;
         int rc;
  
         tcon = tlink_tcon(ctx->cfile->tlink);
@@ -3666,15 +3660,8 @@ again:
                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
         }
  
-       if (!ctx->direct_io) {
-               for (i = 0; i < ctx->npages; i++) {
-                       if (ctx->should_dirty)
-                               set_page_dirty(ctx->bv[i].bv_page);
-                       put_page(ctx->bv[i].bv_page);
-               }
-
+       if (!ctx->direct_io)
                 ctx->total_len = ctx->len - iov_iter_count(to);
-       }
  
         /* mask nodata case */
         if (rc == -ENODATA)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c

index 53fdb5d..538fd7d 100644 (file)
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1735,6 +1735,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
         if (rc == 0 || rc != -EBUSY)
                 goto do_rename_exit;
  
+       /* Don't fall back to using SMB on SMB 2+ mount */
+       if (server->vals->protocol_id != 0)
+               goto do_rename_exit;
+
         /* open-file renames don't work across directories */
         if (to_dentry->d_parent != from_dentry->d_parent)
                 goto do_rename_exit;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c

index 1e1626a..0dc6f08 100644 (file)
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -789,6 +789,11 @@ cifs_aio_ctx_alloc(void)
  {
         struct cifs_aio_ctx *ctx;
  
+       /*
+        * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io
+        * to false so that we know when we have to unreference pages within
+        * cifs_aio_ctx_release()
+        */
         ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL);
         if (!ctx)
                 return NULL;
@@ -807,7 +812,23 @@ cifs_aio_ctx_release(struct kref *refcount)
                                         struct cifs_aio_ctx, refcount);
  
         cifsFileInfo_put(ctx->cfile);
-       kvfree(ctx->bv);
+
+       /*
+        * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly
+        * which means that iov_iter_get_pages() was a success and thus that
+        * we have taken reference on pages.
+        */
+       if (ctx->bv) {
+               unsigned i;
+
+               for (i = 0; i < ctx->npages; i++) {
+                       if (ctx->should_dirty)
+                               set_page_dirty(ctx->bv[i].bv_page);
+                       put_page(ctx->bv[i].bv_page);
+               }
+               kvfree(ctx->bv);
+       }
+
         kfree(ctx);
  }
  
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c

index b8f7262..a37774a 100644 (file)
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3466,6 +3466,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
                                     io_parms->tcon->tid, ses->Suid,
                                     io_parms->offset, 0);
                 free_rsp_buf(resp_buftype, rsp_iov.iov_base);
+               cifs_small_buf_release(req);
                 return rc == -ENODATA ? 0 : rc;
         } else
                 trace_smb3_read_done(xid, req->PersistentFileId,
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c

index 8f933e8..9bc32af 100644 (file)
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -442,7 +442,9 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
         struct nfsd3_readdirargs *argp = rqstp->rq_argp;
         struct nfsd3_readdirres  *resp = rqstp->rq_resp;
         __be32          nfserr;
-       int             count;
+       int             count = 0;
+       struct page     **p;
+       caddr_t         page_addr = NULL;
  
         dprintk("nfsd: READDIR(3)  %s %d bytes at %d\n",
                                 SVCFH_fmt(&argp->fh),
@@ -462,7 +464,18 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp)
         nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie, 
                                         &resp->common, nfs3svc_encode_entry);
         memcpy(resp->verf, argp->verf, 8);
-       resp->count = resp->buffer - argp->buffer;
+       count = 0;
+       for (p = rqstp->rq_respages + 1; p < rqstp->rq_next_page; p++) {
+               page_addr = page_address(*p);
+
+               if (((caddr_t)resp->buffer >= page_addr) &&
+                   ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+                       count += (caddr_t)resp->buffer - page_addr;
+                       break;
+               }
+               count += PAGE_SIZE;
+       }
+       resp->count = count >> 2;
         if (resp->offset) {
                 loff_t offset = argp->cookie;
  
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c

index 93fea24..8d78912 100644 (file)
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -573,6 +573,7 @@ int
  nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
  {
         struct nfsd3_readdirargs *args = rqstp->rq_argp;
+       int len;
         u32 max_blocksize = svc_max_payload(rqstp);
  
         p = decode_fh(p, &args->fh);
@@ -582,8 +583,14 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
         args->verf   = p; p += 2;
         args->dircount = ~0;
         args->count  = ntohl(*p++);
-       args->count  = min_t(u32, args->count, max_blocksize);
-       args->buffer = page_address(*(rqstp->rq_next_page++));
+       len = args->count  = min_t(u32, args->count, max_blocksize);
+
+       while (len > 0) {
+               struct page *p = *(rqstp->rq_next_page++);
+               if (!args->buffer)
+                       args->buffer = page_address(p);
+               len -= PAGE_SIZE;
+       }
  
         return xdr_argsize_check(rqstp, p);
  }
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c

index d219159..7caa380 100644 (file)
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1010,8 +1010,9 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
         cb->cb_seq_status = 1;
         cb->cb_status = 0;
         if (minorversion) {
-               if (!nfsd41_cb_get_slot(clp, task))
+               if (!cb->cb_holds_slot && !nfsd41_cb_get_slot(clp, task))
                         return;
+               cb->cb_holds_slot = true;
         }
         rpc_call_start(task);
  }
@@ -1038,6 +1039,9 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
                 return true;
         }
  
+       if (!cb->cb_holds_slot)
+               goto need_restart;
+
         switch (cb->cb_seq_status) {
         case 0:
                 /*
@@ -1076,6 +1080,7 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
                         cb->cb_seq_status);
         }
  
+       cb->cb_holds_slot = false;
         clear_bit(0, &clp->cl_cb_slot_busy);
         rpc_wake_up_next(&clp->cl_cb_waitq);
         dprintk("%s: freed slot, new seqid=%d\n", __func__,
@@ -1283,6 +1288,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
         cb->cb_seq_status = 1;
         cb->cb_status = 0;
         cb->cb_need_restart = false;
+       cb->cb_holds_slot = false;
  }
  
  void nfsd4_run_cb(struct nfsd4_callback *cb)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c

index 6a45fb0..f056b1d 100644 (file)
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -265,6 +265,7 @@ find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
  static void
  free_blocked_lock(struct nfsd4_blocked_lock *nbl)
  {
+       locks_delete_block(&nbl->nbl_lock);
         locks_release_private(&nbl->nbl_lock);
         kfree(nbl);
  }
@@ -293,11 +294,18 @@ remove_blocked_locks(struct nfs4_lockowner *lo)
                 nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock,
                                         nbl_lru);
                 list_del_init(&nbl->nbl_lru);
-               locks_delete_block(&nbl->nbl_lock);
                 free_blocked_lock(nbl);
         }
  }
  
+static void
+nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
+{
+       struct nfsd4_blocked_lock       *nbl = container_of(cb,
+                                               struct nfsd4_blocked_lock, nbl_cb);
+       locks_delete_block(&nbl->nbl_lock);
+}
+
  static int
  nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
  {
@@ -325,6 +333,7 @@ nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
  }
  
  static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+       .prepare        = nfsd4_cb_notify_lock_prepare,
         .done           = nfsd4_cb_notify_lock_done,
         .release        = nfsd4_cb_notify_lock_release,
  };
@@ -4863,7 +4872,6 @@ nfs4_laundromat(struct nfsd_net *nn)
                 nbl = list_first_entry(&reaplist,
                                         struct nfsd4_blocked_lock, nbl_lru);
                 list_del_init(&nbl->nbl_lru);
-               locks_delete_block(&nbl->nbl_lock);
                 free_blocked_lock(nbl);
         }
  out:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h

index 396c767..9d6cb24 100644 (file)
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -70,6 +70,7 @@ struct nfsd4_callback {
         int cb_seq_status;
         int cb_status;
         bool cb_need_restart;
+       bool cb_holds_slot;
  };
  
  struct nfsd4_callback_ops {
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h

index e2f3b21..aa8bfd6 100644 (file)
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -448,6 +448,18 @@ static inline void eth_addr_dec(u8 *addr)
         u64_to_ether_addr(u, addr);
  }
  
+/**
+ * eth_addr_inc() - Increment the given MAC address.
+ * @addr: Pointer to a six-byte array containing Ethernet address to increment.
+ */
+static inline void eth_addr_inc(u8 *addr)
+{
+       u64 u = ether_addr_to_u64(addr);
+
+       u++;
+       u64_to_ether_addr(u, addr);
+}
+
  /**
   * is_etherdev_addr - Tell if given Ethernet address belongs to the device.
   * @dev: Pointer to a device structure
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h

index 5ee7b30..d2bc733 100644 (file)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -316,6 +316,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
                                  gfp_t flags);
  void nf_ct_tmpl_free(struct nf_conn *tmpl);
  
+u32 nf_ct_get_id(const struct nf_conn *ct);
+
  static inline void
  nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
  {
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h

index 7780875..a49edfd 100644 (file)
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -75,6 +75,12 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
  bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
                                       const struct nf_conntrack_tuple *orig);
  
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+                           unsigned int dataoff,
+                           const struct nf_hook_state *state,
+                           u8 l4proto,
+                           union nf_inet_addr *outer_daddr);
+
  int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
                               struct sk_buff *skb,
                               unsigned int dataoff,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c

index eb15891..3cad01a 100644 (file)
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
                 if (match_kern)
                         match_kern->match_size = ret;
  
-               if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
+               /* rule should have no remaining data after target */
+               if (type == EBT_COMPAT_TARGET && size_left)
                         return -EINVAL;
  
                 match32 = (struct compat_ebt_entry_mwt *) buf;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index 88ce038..6fdf1c1 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1183,25 +1183,39 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
         return dst;
  }
  
-static void ipv4_link_failure(struct sk_buff *skb)
+static void ipv4_send_dest_unreach(struct sk_buff *skb)
  {
         struct ip_options opt;
-       struct rtable *rt;
         int res;
  
         /* Recompile ip options since IPCB may not be valid anymore.
+        * Also check we have a reasonable ipv4 header.
          */
-       memset(&opt, 0, sizeof(opt));
-       opt.optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+       if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) ||
+           ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5)
+               return;
  
-       rcu_read_lock();
-       res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
-       rcu_read_unlock();
+       memset(&opt, 0, sizeof(opt));
+       if (ip_hdr(skb)->ihl > 5) {
+               if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4))
+                       return;
+               opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
  
-       if (res)
-               return;
+               rcu_read_lock();
+               res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+               rcu_read_unlock();
  
+               if (res)
+                       return;
+       }
         __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt);
+}
+
+static void ipv4_link_failure(struct sk_buff *skb)
+{
+       struct rtable *rt;
+
+       ipv4_send_dest_unreach(skb);
  
         rt = skb_rtable(skb);
         if (rt)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c

index ba0fc4b..eeb4041 100644 (file)
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -49,6 +49,7 @@ static int ip_ping_group_range_min[] = { 0, 0 };
  static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
  static int comp_sack_nr_max = 255;
  static u32 u32_max_div_HZ = UINT_MAX / HZ;
+static int one_day_secs = 24 * 3600;
  
  /* obsolete */
  static int sysctl_tcp_low_latency __read_mostly;
@@ -1151,7 +1152,9 @@ static struct ctl_table ipv4_net_table[] = {
                 .data           = &init_net.ipv4.sysctl_tcp_min_rtt_wlen,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &zero,
+               .extra2         = &one_day_secs
         },
         {
                 .procname       = "tcp_autocorking",
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c

index d43d076..1766325 100644 (file)
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -476,7 +476,7 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
         }
  
         if (nlmsg_attrlen(nlh, sizeof(*ifal))) {
-               NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump requewst");
+               NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request");
                 return -EINVAL;
         }
  
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c

index dc07fcc..802db01 100644 (file)
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -11,6 +11,7 @@
  #include <linux/kernel.h>
  #include <linux/init.h>
  #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
  #include <linux/skbuff.h>
  
  #include <net/ncsi.h>
@@ -667,7 +668,10 @@ static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
         ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
         memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
         /* Increase mac address by 1 for BMC's address */
-       saddr.sa_data[ETH_ALEN - 1]++;
+       eth_addr_inc((u8 *)saddr.sa_data);
+       if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+               return -ENXIO;
+
         ret = ops->ndo_set_mac_address(ndev, &saddr);
         if (ret < 0)
                 netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c

index 43bbaa3..1445755 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1678,7 +1678,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
         if (!cp) {
                 int v;
  
-               if (!sysctl_schedule_icmp(ipvs))
+               if (ipip || !sysctl_schedule_icmp(ipvs))
                         return NF_ACCEPT;
  
                 if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 82bfbee..2a71452 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -25,6 +25,7 @@
  #include <linux/slab.h>
  #include <linux/random.h>
  #include <linux/jhash.h>
+#include <linux/siphash.h>
  #include <linux/err.h>
  #include <linux/percpu.h>
  #include <linux/moduleparam.h>
@@ -449,6 +450,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
  }
  EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
  
+/* Generate a almost-unique pseudo-id for a given conntrack.
+ *
+ * intentionally doesn't re-use any of the seeds used for hash
+ * table location, we assume id gets exposed to userspace.
+ *
+ * Following nf_conn items do not change throughout lifetime
+ * of the nf_conn after it has been committed to main hash table:
+ *
+ * 1. nf_conn address
+ * 2. nf_conn->ext address
+ * 3. nf_conn->master address (normally NULL)
+ * 4. tuple
+ * 5. the associated net namespace
+ */
+u32 nf_ct_get_id(const struct nf_conn *ct)
+{
+       static __read_mostly siphash_key_t ct_id_seed;
+       unsigned long a, b, c, d;
+
+       net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
+
+       a = (unsigned long)ct;
+       b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
+       c = (unsigned long)ct->ext;
+       d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
+                                  &ct_id_seed);
+#ifdef CONFIG_64BIT
+       return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
+#else
+       return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
+#endif
+}
+EXPORT_SYMBOL_GPL(nf_ct_get_id);
+
  static void
  clean_from_lists(struct nf_conn *ct)
  {
@@ -982,12 +1017,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
  
         /* set conntrack timestamp, if enabled. */
         tstamp = nf_conn_tstamp_find(ct);
-       if (tstamp) {
-               if (skb->tstamp == 0)
-                       __net_timestamp(skb);
+       if (tstamp)
+               tstamp->start = ktime_get_real_ns();
  
-               tstamp->start = ktime_to_ns(skb->tstamp);
-       }
         /* Since the lookup is lockless, hash insertion must be done after
          * starting the timer and setting the CONFIRMED bit. The RCU barriers
          * guarantee that no other CPU can find the conntrack before the above
@@ -1350,6 +1382,7 @@ __nf_conntrack_alloc(struct net *net,
         /* save hash for reusing when confirming */
         *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
         ct->status = 0;
+       ct->timeout = 0;
         write_pnet(&ct->ct_net, net);
         memset(&ct->__nfct_init_offset[0], 0,
                offsetof(struct nf_conn, proto) -
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c

index 66c596d..d7f61b0 100644 (file)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -29,6 +29,7 @@
  #include <linux/spinlock.h>
  #include <linux/interrupt.h>
  #include <linux/slab.h>
+#include <linux/siphash.h>
  
  #include <linux/netfilter.h>
  #include <net/netlink.h>
@@ -485,7 +486,9 @@ nla_put_failure:
  
  static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
  {
-       if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
+       __be32 id = (__force __be32)nf_ct_get_id(ct);
+
+       if (nla_put_be32(skb, CTA_ID, id))
                 goto nla_put_failure;
         return 0;
  
@@ -1286,8 +1289,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
         }
  
         if (cda[CTA_ID]) {
-               u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
-               if (id != (u32)(unsigned long)ct) {
+               __be32 id = nla_get_be32(cda[CTA_ID]);
+
+               if (id != (__force __be32)nf_ct_get_id(ct)) {
                         nf_ct_put(ct);
                         return -ENOENT;
                 }
@@ -2692,6 +2696,25 @@ nla_put_failure:
  
  static const union nf_inet_addr any_addr;
  
+static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
+{
+       static __read_mostly siphash_key_t exp_id_seed;
+       unsigned long a, b, c, d;
+
+       net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
+
+       a = (unsigned long)exp;
+       b = (unsigned long)exp->helper;
+       c = (unsigned long)exp->master;
+       d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
+
+#ifdef CONFIG_64BIT
+       return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
+#else
+       return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
+#endif
+}
+
  static int
  ctnetlink_exp_dump_expect(struct sk_buff *skb,
                           const struct nf_conntrack_expect *exp)
@@ -2739,7 +2762,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
         }
  #endif
         if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
-           nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) ||
+           nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
             nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
             nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
                 goto nla_put_failure;
@@ -3044,7 +3067,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
  
         if (cda[CTA_EXPECT_ID]) {
                 __be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
-               if (ntohl(id) != (u32)(unsigned long)exp) {
+
+               if (id != nf_expect_get_id(exp)) {
                         nf_ct_expect_put(exp);
                         return -ENOENT;
                 }
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c

index b9403a2..37bb530 100644 (file)
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -55,7 +55,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
         struct va_format vaf;
         va_list args;
  
-       if (net->ct.sysctl_log_invalid != protonum ||
+       if (net->ct.sysctl_log_invalid != protonum &&
             net->ct.sysctl_log_invalid != IPPROTO_RAW)
                 return;
  
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c

index 7df4779..9becac9 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -103,49 +103,94 @@ int nf_conntrack_icmp_packet(struct nf_conn *ct,
         return NF_ACCEPT;
  }
  
-/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
-static int
-icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
-                  const struct nf_hook_state *state)
+/* Check inner header is related to any of the existing connections */
+int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
+                           unsigned int dataoff,
+                           const struct nf_hook_state *state,
+                           u8 l4proto, union nf_inet_addr *outer_daddr)
  {
         struct nf_conntrack_tuple innertuple, origtuple;
         const struct nf_conntrack_tuple_hash *h;
         const struct nf_conntrack_zone *zone;
         enum ip_conntrack_info ctinfo;
         struct nf_conntrack_zone tmp;
+       union nf_inet_addr *ct_daddr;
+       enum ip_conntrack_dir dir;
+       struct nf_conn *ct;
  
         WARN_ON(skb_nfct(skb));
         zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
  
         /* Are they talking about one of our connections? */
-       if (!nf_ct_get_tuplepr(skb,
-                              skb_network_offset(skb) + ip_hdrlen(skb)
-                                                      + sizeof(struct icmphdr),
-                              PF_INET, state->net, &origtuple)) {
-               pr_debug("icmp_error_message: failed to get tuple\n");
+       if (!nf_ct_get_tuplepr(skb, dataoff,
+                              state->pf, state->net, &origtuple))
                 return -NF_ACCEPT;
-       }
  
         /* Ordinarily, we'd expect the inverted tupleproto, but it's
            been preserved inside the ICMP. */
-       if (!nf_ct_invert_tuple(&innertuple, &origtuple)) {
-               pr_debug("icmp_error_message: no match\n");
+       if (!nf_ct_invert_tuple(&innertuple, &origtuple))
                 return -NF_ACCEPT;
-       }
-
-       ctinfo = IP_CT_RELATED;
  
         h = nf_conntrack_find_get(state->net, zone, &innertuple);
-       if (!h) {
-               pr_debug("icmp_error_message: no match\n");
+       if (!h)
+               return -NF_ACCEPT;
+
+       /* Consider: A -> T (=This machine) -> B
+        *   Conntrack entry will look like this:
+        *      Original:  A->B
+        *      Reply:     B->T (SNAT case) OR A
+        *
+        * When this function runs, we got packet that looks like this:
+        * iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..).
+        *
+        * Above nf_conntrack_find_get() makes lookup based on inner_hdr,
+        * so we should expect that destination of the found connection
+        * matches outer header destination address.
+        *
+        * In above example, we can consider these two cases:
+        *  1. Error coming in reply direction from B or M (middle box) to
+        *     T (SNAT case) or A.
+        *     Inner saddr will be B, dst will be T or A.
+        *     The found conntrack will be reply tuple (B->T/A).
+        *  2. Error coming in original direction from A or M to B.
+        *     Inner saddr will be A, inner daddr will be B.
+        *     The found conntrack will be original tuple (A->B).
+        *
+        * In both cases, conntrack[dir].dst == inner.dst.
+        *
+        * A bogus packet could look like this:
+        *   Inner: B->T
+        *   Outer: B->X (other machine reachable by T).
+        *
+        * In this case, lookup yields connection A->B and will
+        * set packet from B->X as *RELATED*, even though no connection
+        * from X was ever seen.
+        */
+       ct = nf_ct_tuplehash_to_ctrack(h);
+       dir = NF_CT_DIRECTION(h);
+       ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
+       if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
+               if (state->pf == AF_INET) {
+                       nf_l4proto_log_invalid(skb, state->net, state->pf,
+                                              l4proto,
+                                              "outer daddr %pI4 != inner %pI4",
+                                              &outer_daddr->ip, &ct_daddr->ip);
+               } else if (state->pf == AF_INET6) {
+                       nf_l4proto_log_invalid(skb, state->net, state->pf,
+                                              l4proto,
+                                              "outer daddr %pI6 != inner %pI6",
+                                              &outer_daddr->ip6, &ct_daddr->ip6);
+               }
+               nf_ct_put(ct);
                 return -NF_ACCEPT;
         }
  
-       if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+       ctinfo = IP_CT_RELATED;
+       if (dir == IP_CT_DIR_REPLY)
                 ctinfo += IP_CT_IS_REPLY;
  
         /* Update skb to refer to this connection */
-       nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
+       nf_ct_set(skb, ct, ctinfo);
         return NF_ACCEPT;
  }
  
@@ -162,11 +207,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
                               struct sk_buff *skb, unsigned int dataoff,
                               const struct nf_hook_state *state)
  {
+       union nf_inet_addr outer_daddr;
         const struct icmphdr *icmph;
         struct icmphdr _ih;
  
         /* Not enough header? */
-       icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
+       icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
         if (icmph == NULL) {
                 icmp_error_log(skb, state, "short packet");
                 return -NF_ACCEPT;
@@ -199,7 +245,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
             icmph->type != ICMP_REDIRECT)
                 return NF_ACCEPT;
  
-       return icmp_error_message(tmpl, skb, state);
+       memset(&outer_daddr, 0, sizeof(outer_daddr));
+       outer_daddr.ip = ip_hdr(skb)->daddr;
+
+       dataoff += sizeof(*icmph);
+       return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+                                      IPPROTO_ICMP, &outer_daddr);
  }
  
  #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c

index bec4a32..c63ee36 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -123,51 +123,6 @@ int nf_conntrack_icmpv6_packet(struct nf_conn *ct,
         return NF_ACCEPT;
  }
  
-static int
-icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
-                    struct sk_buff *skb,
-                    unsigned int icmp6off)
-{
-       struct nf_conntrack_tuple intuple, origtuple;
-       const struct nf_conntrack_tuple_hash *h;
-       enum ip_conntrack_info ctinfo;
-       struct nf_conntrack_zone tmp;
-
-       WARN_ON(skb_nfct(skb));
-
-       /* Are they talking about one of our connections? */
-       if (!nf_ct_get_tuplepr(skb,
-                              skb_network_offset(skb)
-                               + sizeof(struct ipv6hdr)
-                               + sizeof(struct icmp6hdr),
-                              PF_INET6, net, &origtuple)) {
-               pr_debug("icmpv6_error: Can't get tuple\n");
-               return -NF_ACCEPT;
-       }
-
-       /* Ordinarily, we'd expect the inverted tupleproto, but it's
-          been preserved inside the ICMP. */
-       if (!nf_ct_invert_tuple(&intuple, &origtuple)) {
-               pr_debug("icmpv6_error: Can't invert tuple\n");
-               return -NF_ACCEPT;
-       }
-
-       ctinfo = IP_CT_RELATED;
-
-       h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
-                                 &intuple);
-       if (!h) {
-               pr_debug("icmpv6_error: no match\n");
-               return -NF_ACCEPT;
-       } else {
-               if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
-                       ctinfo += IP_CT_IS_REPLY;
-       }
-
-       /* Update skb to refer to this connection */
-       nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
-       return NF_ACCEPT;
-}
  
  static void icmpv6_error_log(const struct sk_buff *skb,
                              const struct nf_hook_state *state,
@@ -182,6 +137,7 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
                               unsigned int dataoff,
                               const struct nf_hook_state *state)
  {
+       union nf_inet_addr outer_daddr;
         const struct icmp6hdr *icmp6h;
         struct icmp6hdr _ih;
         int type;
@@ -210,7 +166,11 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
         if (icmp6h->icmp6_type >= 128)
                 return NF_ACCEPT;
  
-       return icmpv6_error_message(state->net, tmpl, skb, dataoff);
+       memcpy(&outer_daddr.ip6, &ipv6_hdr(skb)->daddr,
+              sizeof(outer_daddr.ip6));
+       dataoff += sizeof(*icmp6h);
+       return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
+                                      IPPROTO_ICMPV6, &outer_daddr);
  }
  
  #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c

index af7dc65..0009527 100644 (file)
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -415,9 +415,14 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
         case IPPROTO_ICMPV6:
                 /* id is same for either direction... */
                 keyptr = &tuple->src.u.icmp.id;
-               min = range->min_proto.icmp.id;
-               range_size = ntohs(range->max_proto.icmp.id) -
-                            ntohs(range->min_proto.icmp.id) + 1;
+               if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+                       min = 0;
+                       range_size = 65536;
+               } else {
+                       min = ntohs(range->min_proto.icmp.id);
+                       range_size = ntohs(range->max_proto.icmp.id) -
+                                    ntohs(range->min_proto.icmp.id) + 1;
+               }
                 goto find_free_id;
  #if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
         case IPPROTO_GRE:
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index ef7772e..1606eaa 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1545,7 +1545,7 @@ static int nft_chain_parse_hook(struct net *net,
                 if (IS_ERR(type))
                         return PTR_ERR(type);
         }
-       if (!(type->hook_mask & (1 << hook->num)))
+       if (hook->num > NF_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
                 return -EOPNOTSUPP;
  
         if (type->type == NFT_CHAIN_T_NAT &&
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c

index b1f9c53..0b33475 100644 (file)
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -540,7 +540,7 @@ __build_packet_message(struct nfnl_log_net *log,
                         goto nla_put_failure;
         }
  
-       if (skb->tstamp) {
+       if (hooknum <= NF_INET_FORWARD && skb->tstamp) {
                 struct nfulnl_msg_packet_timestamp ts;
                 struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
                 ts.sec = cpu_to_be64(kts.tv_sec);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

index 0dcc359..e057b29 100644 (file)
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -582,7 +582,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
         if (nfqnl_put_bridge(entry, skb) < 0)
                 goto nla_put_failure;
  
-       if (entskb->tstamp) {
+       if (entry->state.hook <= NF_INET_FORWARD && entskb->tstamp) {
                 struct nfqnl_msg_packet_timestamp ts;
                 struct timespec64 kts = ktime_to_timespec64(entskb->tstamp);
  
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c

index c13bcd0..8dbb4d4 100644 (file)
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -163,19 +163,24 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
         s64 stamp;
  
         /*
-        * We cannot use get_seconds() instead of __net_timestamp() here.
+        * We need real time here, but we can neither use skb->tstamp
+        * nor __net_timestamp().
+        *
+        * skb->tstamp and skb->skb_mstamp_ns overlap, however, they
+        * use different clock types (real vs monotonic).
+        *
          * Suppose you have two rules:
-        *      1. match before 13:00
-        *      2. match after 13:00
+        *      1. match before 13:00
+        *      2. match after 13:00
+        *
          * If you match against processing time (get_seconds) it
          * may happen that the same packet matches both rules if
-        * it arrived at the right moment before 13:00.
+        * it arrived at the right moment before 13:00, so it would be
+        * better to check skb->tstamp and set it via __net_timestamp()
+        * if needed.  This however breaks outgoing packets tx timestamp,
+        * and causes them to get delayed forever by fq packet scheduler.
          */
-       if (skb->tstamp == 0)
-               __net_timestamp((struct sk_buff *)skb);
-
-       stamp = ktime_to_ns(skb->tstamp);
-       stamp = div_s64(stamp, NSEC_PER_SEC);
+       stamp = get_seconds();
  
         if (info->flags & XT_TIME_LOCAL_TZ)
                 /* Adjust for local timezone */
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c

index 31cf37d..93c0437 100644 (file)
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -44,6 +44,17 @@ struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
         else
                 pool = rds_ibdev->mr_1m_pool;
  
+       if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
+               queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
+
+       /* Switch pools if one of the pool is reaching upper limit */
+       if (atomic_read(&pool->dirty_count) >=  pool->max_items * 9 / 10) {
+               if (pool->pool_type == RDS_IB_MR_8K_POOL)
+                       pool = rds_ibdev->mr_1m_pool;
+               else
+                       pool = rds_ibdev->mr_8k_pool;
+       }
+
         ibmr = rds_ib_try_reuse_ibmr(pool);
         if (ibmr)
                 return ibmr;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c

index 63c8d10..d664e9a 100644 (file)
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -454,9 +454,6 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
         struct rds_ib_mr *ibmr = NULL;
         int iter = 0;
  
-       if (atomic_read(&pool->dirty_count) >= pool->max_items_soft / 10)
-               queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
-
         while (1) {
                 ibmr = rds_ib_reuse_mr(pool);
                 if (ibmr)
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c

index 7af4f99..094a662 100644 (file)
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -16,6 +16,7 @@
  #include <linux/init.h>
  
  static struct sk_buff_head loopback_queue;
+#define ROSE_LOOPBACK_LIMIT 1000
  static struct timer_list loopback_timer;
  
  static void rose_set_loopback_timer(void);
@@ -35,29 +36,27 @@ static int rose_loopback_running(void)
  
  int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
  {
-       struct sk_buff *skbn;
+       struct sk_buff *skbn = NULL;
  
-       skbn = skb_clone(skb, GFP_ATOMIC);
+       if (skb_queue_len(&loopback_queue) < ROSE_LOOPBACK_LIMIT)
+               skbn = skb_clone(skb, GFP_ATOMIC);
  
-       kfree_skb(skb);
-
-       if (skbn != NULL) {
+       if (skbn) {
+               consume_skb(skb);
                 skb_queue_tail(&loopback_queue, skbn);
  
                 if (!rose_loopback_running())
                         rose_set_loopback_timer();
+       } else {
+               kfree_skb(skb);
         }
  
         return 1;
  }
  
-
  static void rose_set_loopback_timer(void)
  {
-       del_timer(&loopback_timer);
-
-       loopback_timer.expires  = jiffies + 10;
-       add_timer(&loopback_timer);
+       mod_timer(&loopback_timer, jiffies + 10);
  }
  
  static void rose_loopback_timer(struct timer_list *unused)
@@ -68,8 +67,12 @@ static void rose_loopback_timer(struct timer_list *unused)
         struct sock *sk;
         unsigned short frametype;
         unsigned int lci_i, lci_o;
+       int count;
  
-       while ((skb = skb_dequeue(&loopback_queue)) != NULL) {
+       for (count = 0; count < ROSE_LOOPBACK_LIMIT; count++) {
+               skb = skb_dequeue(&loopback_queue);
+               if (!skb)
+                       return;
                 if (skb->len < ROSE_MIN_LEN) {
                         kfree_skb(skb);
                         continue;
@@ -106,6 +109,8 @@ static void rose_loopback_timer(struct timer_list *unused)
                         kfree_skb(skb);
                 }
         }
+       if (!skb_queue_empty(&loopback_queue))
+               mod_timer(&loopback_timer, jiffies + 1);
  }
  
  void __exit rose_loopback_clear(void)
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c

index 4c6f9d0..c2c35cf 100644 (file)
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1161,19 +1161,19 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
   * handle data received on the local endpoint
   * - may be called in interrupt context
   *
- * The socket is locked by the caller and this prevents the socket from being
- * shut down and the local endpoint from going away, thus sk_user_data will not
- * be cleared until this function returns.
+ * [!] Note that as this is called from the encap_rcv hook, the socket is not
+ * held locked by the caller and nothing prevents sk_user_data on the UDP from
+ * being cleared in the middle of processing this function.
   *
   * Called with the RCU read lock held from the IP layer via UDP.
   */
  int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
  {
+       struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
         struct rxrpc_connection *conn;
         struct rxrpc_channel *chan;
         struct rxrpc_call *call = NULL;
         struct rxrpc_skb_priv *sp;
-       struct rxrpc_local *local = udp_sk->sk_user_data;
         struct rxrpc_peer *peer = NULL;
         struct rxrpc_sock *rx = NULL;
         unsigned int channel;
@@ -1181,6 +1181,10 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
  
         _enter("%p", udp_sk);
  
+       if (unlikely(!local)) {
+               kfree_skb(skb);
+               return 0;
+       }
         if (skb->tstamp == 0)
                 skb->tstamp = ktime_get_real();
  
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c

index 15cf42d..01959db 100644 (file)
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -304,7 +304,8 @@ nomem:
         ret = -ENOMEM;
  sock_error:
         mutex_unlock(&rxnet->local_mutex);
-       kfree(local);
+       if (local)
+               call_rcu(&local->rcu, rxrpc_local_rcu);
         _leave(" = %d", ret);
         return ERR_PTR(ret);
  
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c

index 12bb23b..261131d 100644 (file)
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,6 +54,7 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
         h->last_refresh = now;
  }
  
+static inline int cache_is_valid(struct cache_head *h);
  static void cache_fresh_locked(struct cache_head *head, time_t expiry,
                                 struct cache_detail *detail);
  static void cache_fresh_unlocked(struct cache_head *head,
@@ -105,6 +106,8 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
                         if (cache_is_expired(detail, tmp)) {
                                 hlist_del_init_rcu(&tmp->cache_list);
                                 detail->entries --;
+                               if (cache_is_valid(tmp) == -EAGAIN)
+                                       set_bit(CACHE_NEGATIVE, &tmp->flags);
                                 cache_fresh_locked(tmp, 0, detail);
                                 freeme = tmp;
                                 break;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c

index 9f3bdbc..cc02569 100644 (file)
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -904,7 +904,9 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
         goto release_netdev;
  
  free_sw_resources:
+       up_read(&device_offload_lock);
         tls_sw_free_resources_rx(sk);
+       down_read(&device_offload_lock);
  release_ctx:
         ctx->priv_ctx_rx = NULL;
  release_netdev:
@@ -939,8 +941,6 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
         }
  out:
         up_read(&device_offload_lock);
-       kfree(tls_ctx->rx.rec_seq);
-       kfree(tls_ctx->rx.iv);
         tls_sw_release_resources_rx(sk);
  }
  
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c

index 54c3a75..a3ebd4b 100644 (file)
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -194,6 +194,9 @@ static void update_chksum(struct sk_buff *skb, int headln)
  
  static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
  {
+       struct sock *sk = skb->sk;
+       int delta;
+
         skb_copy_header(nskb, skb);
  
         skb_put(nskb, skb->len);
@@ -201,11 +204,15 @@ static void complete_skb(struct sk_buff *nskb, struct sk_buff *skb, int headln)
         update_chksum(nskb, headln);
  
         nskb->destructor = skb->destructor;
-       nskb->sk = skb->sk;
+       nskb->sk = sk;
         skb->destructor = NULL;
         skb->sk = NULL;
-       refcount_add(nskb->truesize - skb->truesize,
-                    &nskb->sk->sk_wmem_alloc);
+
+       delta = nskb->truesize - skb->truesize;
+       if (likely(delta < 0))
+               WARN_ON_ONCE(refcount_sub_and_test(-delta, &sk->sk_wmem_alloc));
+       else if (delta)
+               refcount_add(delta, &sk->sk_wmem_alloc);
  }
  
  /* This function may be called after the user socket is already
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c

index 9547cea..478603f 100644 (file)
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -293,11 +293,8 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
  #endif
         }
  
-       if (ctx->rx_conf == TLS_SW) {
-               kfree(ctx->rx.rec_seq);
-               kfree(ctx->rx.iv);
+       if (ctx->rx_conf == TLS_SW)
                 tls_sw_free_resources_rx(sk);
-       }
  
  #ifdef CONFIG_TLS_DEVICE
         if (ctx->rx_conf == TLS_HW)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c

index b50ced8..29d6af4 100644 (file)
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2078,6 +2078,9 @@ void tls_sw_release_resources_rx(struct sock *sk)
         struct tls_context *tls_ctx = tls_get_ctx(sk);
         struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
  
+       kfree(tls_ctx->rx.rec_seq);
+       kfree(tls_ctx->rx.iv);
+
         if (ctx->aead_recv) {
                 kfree_skb(ctx->recv_pkt);
                 ctx->recv_pkt = NULL;
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests

index 2dc95fd..ea5938e 100755 (executable)
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -6,12 +6,14 @@ if [ $(id -u) != 0 ]; then
         exit 0
  fi
  
+ret=0
  echo "--------------------"
  echo "running psock_fanout test"
  echo "--------------------"
  ./in_netns.sh ./psock_fanout
  if [ $? -ne 0 ]; then
         echo "[FAIL]"
+       ret=1
  else
         echo "[PASS]"
  fi
@@ -22,6 +24,7 @@ echo "--------------------"
  ./in_netns.sh ./psock_tpacket
  if [ $? -ne 0 ]; then
         echo "[FAIL]"
+       ret=1
  else
         echo "[PASS]"
  fi
@@ -32,6 +35,8 @@ echo "--------------------"
  ./in_netns.sh ./txring_overwrite
  if [ $? -ne 0 ]; then
         echo "[FAIL]"
+       ret=1
  else
         echo "[PASS]"
  fi
+exit $ret
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests

index b093f39..14e41fa 100755 (executable)
--- a/tools/testing/selftests/net/run_netsocktests
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -7,7 +7,7 @@ echo "--------------------"
  ./socket
  if [ $? -ne 0 ]; then
         echo "[FAIL]"
+       exit 1
  else
         echo "[PASS]"
  fi
-
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile

index c9ff2b4..a37cb11 100644 (file)
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,6 +1,6 @@
  # SPDX-License-Identifier: GPL-2.0
  # Makefile for netfilter selftests
  
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
  
  include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh

new file mode 100755 (executable)

index 0000000..b48e183
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without nft tool"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+cleanup() {
+       for i in 1 2;do ip netns del nsclient$i;done
+       for i in 1 2;do ip netns del nsrouter$i;done
+}
+
+ipv4() {
+    echo -n 192.168.$1.2
+}
+
+ipv6 () {
+    echo -n dead:$1::2
+}
+
+check_counter()
+{
+       ns=$1
+       name=$2
+       expect=$3
+       local lret=0
+
+       cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
+       if [ $? -ne 0 ]; then
+               echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+               ip netns exec $ns nft list counter inet filter "$name" 1>&2
+               lret=1
+       fi
+
+       return $lret
+}
+
+check_unknown()
+{
+       expect="packets 0 bytes 0"
+       for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+               check_counter $n "unknown" "$expect"
+               if [ $? -ne 0 ] ;then
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+  ip netns add $n
+  ip -net $n link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
+DEV=veth0
+ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+
+DEV=veth0
+ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+
+DEV=veth0
+for i in 1 2; do
+    ip -net nsclient$i link set $DEV up
+    ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
+    ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
+done
+
+ip -net nsrouter1 link set eth1 up
+ip -net nsrouter1 link set veth0 up
+
+ip -net nsrouter2 link set eth1 up
+ip -net nsrouter2 link set eth2 up
+
+ip -net nsclient1 route add default via 192.168.1.1
+ip -net nsclient1 -6 route add default via dead:1::1
+
+ip -net nsclient2 route add default via 192.168.2.1
+ip -net nsclient2 route add default via dead:2::1
+
+i=3
+ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
+ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
+ip -net nsrouter1 addr add dead:1::1/64 dev eth1
+ip -net nsrouter1 addr add dead:3::1/64 dev veth0
+ip -net nsrouter1 route add default via 192.168.3.10
+ip -net nsrouter1 -6 route add default via dead:3::10
+
+ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
+ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
+ip -net nsrouter2 addr add dead:2::1/64 dev eth1
+ip -net nsrouter2 addr add dead:3::10/64 dev eth2
+ip -net nsrouter2 route add default via 192.168.3.1
+ip -net nsrouter2 route add default via dead:3::1
+
+sleep 2
+for i in 4 6; do
+       ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
+       ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in nsrouter1 nsrouter2; do
+ip netns exec $netns nft -f - <<EOF
+table inet filter {
+       counter unknown { }
+       counter related { }
+       chain forward {
+               type filter hook forward priority 0; policy accept;
+               meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+               meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+               meta l4proto { icmp, icmpv6 } ct state new,established accept
+               counter name "unknown" drop
+       }
+}
+EOF
+done
+
+ip netns exec nsclient1 nft -f - <<EOF
+table inet filter {
+       counter unknown { }
+       counter related { }
+       chain input {
+               type filter hook input priority 0; policy accept;
+               meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+               meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+               counter name "unknown" drop
+       }
+}
+EOF
+
+ip netns exec nsclient2 nft -f - <<EOF
+table inet filter {
+       counter unknown { }
+       counter new { }
+       counter established { }
+
+       chain input {
+               type filter hook input priority 0; policy accept;
+               meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+               meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+               meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+               counter name "unknown" drop
+       }
+       chain output {
+               type filter hook output priority 0; policy accept;
+               meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+               meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+               meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+               counter name "unknown" drop
+       }
+}
+EOF
+
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec nsrouter1 nft -f - <<EOF
+table ip nat {
+       chain postrouting {
+               type nat hook postrouting priority 0; policy accept;
+               ip protocol icmp oifname "veth0" counter masquerade
+       }
+}
+table ip6 nat {
+       chain postrouting {
+               type nat hook postrouting priority 0; policy accept;
+               ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+       }
+}
+EOF
+
+ip netns exec nsrouter2 ip link set eth1  mtu 1280
+ip netns exec nsclient2 ip link set veth0 mtu 1280
+sleep 1
+
+ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
+if [ $? -ne 0 ]; then
+       echo "ERROR: netns ip routing/connectivity broken" 1>&2
+       cleanup
+       exit 1
+fi
+ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
+if [ $? -ne 0 ]; then
+       echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+       cleanup
+       exit 1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+       ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in nsrouter1 nsrouter2 nsclient1;do
+       check_counter "$netns" "related" "$expect"
+       if [ $? -ne 0 ]; then
+               ret=1
+       fi
+done
+
+expect="packets 2 bytes 2076"
+check_counter nsclient2 "new" "$expect"
+if [ $? -ne 0 ]; then
+       ret=1
+fi
+
+ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
+if [ $? -eq 0 ]; then
+       echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+       ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+check_counter "nsrouter2" "related" "$expect"
+if [ $? -ne 0 ]; then
+       ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in nsrouter1 nsclient1;do
+       check_counter "$netns" "related" "$expect"
+       if [ $? -ne 0 ]; then
+               ret=1
+       fi
+done
+
+ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
+if [ $? -eq 0 ]; then
+       echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+       ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in nsrouter1 nsclient1;do
+       check_counter "$netns" "related" "$expect"
+       if [ $? -ne 0 ]; then
+               ret=1
+       fi
+done
+
+if [ $ret -eq 0 ];then
+       echo "PASS: icmp mtu error had RELATED state"
+else
+       echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+cleanup
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh

index 8ec7668..3194007 100755 (executable)
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -321,6 +321,7 @@ EOF
  
  test_masquerade6()
  {
+       local natflags=$1
         local lret=0
  
         ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -354,13 +355,13 @@ ip netns exec ns0 nft -f - <<EOF
  table ip6 nat {
         chain postrouting {
                 type nat hook postrouting priority 0; policy accept;
-               meta oif veth0 masquerade
+               meta oif veth0 masquerade $natflags
         }
  }
  EOF
         ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
         if [ $? -ne 0 ] ; then
-               echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+               echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags"
                 lret=1
         fi
  
@@ -397,19 +398,26 @@ EOF
                 fi
         done
  
+       ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)"
+               lret=1
+       fi
+
         ip netns exec ns0 nft flush chain ip6 nat postrouting
         if [ $? -ne 0 ]; then
                 echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
                 lret=1
         fi
  
-       test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+       test $lret -eq 0 && echo "PASS: IPv6 masquerade $natflags for ns2"
  
         return $lret
  }
  
  test_masquerade()
  {
+       local natflags=$1
         local lret=0
  
         ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
@@ -417,7 +425,7 @@ test_masquerade()
  
         ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
         if [ $? -ne 0 ] ; then
-               echo "ERROR: canot ping ns1 from ns2"
+               echo "ERROR: cannot ping ns1 from ns2 $natflags"
                 lret=1
         fi
  
@@ -443,13 +451,13 @@ ip netns exec ns0 nft -f - <<EOF
  table ip nat {
         chain postrouting {
                 type nat hook postrouting priority 0; policy accept;
-               meta oif veth0 masquerade
+               meta oif veth0 masquerade $natflags
         }
  }
  EOF
         ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
         if [ $? -ne 0 ] ; then
-               echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+               echo "ERROR: cannot ping ns1 from ns2 with active ip masquere $natflags"
                 lret=1
         fi
  
@@ -485,13 +493,19 @@ EOF
                 fi
         done
  
+       ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+       if [ $? -ne 0 ] ; then
+               echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)"
+               lret=1
+       fi
+
         ip netns exec ns0 nft flush chain ip nat postrouting
         if [ $? -ne 0 ]; then
                 echo "ERROR: Could not flush nat postrouting" 1>&2
                 lret=1
         fi
  
-       test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+       test $lret -eq 0 && echo "PASS: IP masquerade $natflags for ns2"
  
         return $lret
  }
@@ -750,8 +764,12 @@ test_local_dnat
  test_local_dnat6
  
  reset_counters
-test_masquerade
-test_masquerade6
+test_masquerade ""
+test_masquerade6 ""
+
+reset_counters
+test_masquerade "fully-random"
+test_masquerade6 "fully-random"
  
  reset_counters
  test_redirect
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 26 Apr 2019 17:39:46 +0000 (10:39 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 26 Apr 2019 17:39:46 +0000 (10:39 -0700)
Documentation/devicetree/bindings/net/davinci_emac.txt		patch \| blob \| history
Documentation/devicetree/bindings/net/ethernet.txt		patch \| blob \| history
Documentation/devicetree/bindings/net/macb.txt		patch \| blob \| history
Documentation/networking/decnet.txt		patch \| blob \| history
Documentation/networking/ip-sysctl.txt		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
arch/alpha/kernel/syscalls/syscall.tbl		patch \| blob \| history
arch/arm/tools/syscall.tbl		patch \| blob \| history
arch/arm64/include/asm/unistd.h		patch \| blob \| history
arch/arm64/include/asm/unistd32.h		patch \| blob \| history
arch/ia64/kernel/syscalls/syscall.tbl		patch \| blob \| history
arch/m68k/kernel/syscalls/syscall.tbl		patch \| blob \| history
arch/microblaze/kernel/syscalls/syscall.tbl		patch \| blob \| history
arch/mips/ath79/setup.c		patch \| blob \| history
arch/mips/kernel/scall64-o32.S		patch \| blob \| history
arch/mips/kernel/syscalls/syscall_n32.tbl		patch \| blob \| history
arch/mips/kernel/syscalls/syscall_n64.tbl		patch \| blob \| history
arch/mips/kernel/syscalls/syscall_o32.tbl		patch \| blob \| history
arch/parisc/kernel/syscalls/syscall.tbl		patch \| blob \| history
arch/powerpc/kernel/syscalls/syscall.tbl		patch \| blob \| history
arch/s390/kernel/syscalls/syscall.tbl		patch \| blob \| history
arch/sh/kernel/syscalls/syscall.tbl		patch \| blob \| history
arch/sparc/kernel/syscalls/syscall.tbl		patch \| blob \| history
arch/xtensa/kernel/syscalls/syscall.tbl		patch \| blob \| history
crypto/lrw.c		patch \| blob \| history
crypto/xts.c		patch \| blob \| history
drivers/atm/firestream.c		patch \| blob \| history
drivers/irqchip/irq-ath79-misc.c		patch \| blob \| history
drivers/net/ethernet/atheros/atlx/atl1.c		patch \| blob \| history
drivers/net/ethernet/atheros/atlx/atl1.h		patch \| blob \| history
drivers/net/ethernet/atheros/atlx/atl2.c		patch \| blob \| history
drivers/net/ethernet/atheros/atlx/atl2.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/en_main.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlx5/core/port.c		patch \| blob \| history
drivers/net/ethernet/mellanox/mlxsw/pci_hw.h		patch \| blob \| history
drivers/net/ethernet/mellanox/mlxsw/spectrum.c		patch \| blob \| history
drivers/net/ethernet/netronome/nfp/abm/cls.c		patch \| blob \| history
drivers/net/ethernet/socionext/netsec.c		patch \| blob \| history
drivers/net/ethernet/stmicro/stmmac/norm_desc.c		patch \| blob \| history
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c		patch \| blob \| history
drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c		patch \| blob \| history
drivers/net/phy/spi_ks8995.c		patch \| blob \| history
drivers/net/team/team.c		patch \| blob \| history
drivers/net/vrf.c		patch \| blob \| history
drivers/nfc/st95hf/core.c		patch \| blob \| history
drivers/of/of_net.c		patch \| blob \| history
drivers/s390/net/ctcm_main.c		patch \| blob \| history
fs/btrfs/file-item.c		patch \| blob \| history
fs/btrfs/ordered-data.c		patch \| blob \| history
fs/ceph/dir.c		patch \| blob \| history
fs/ceph/inode.c		patch \| blob \| history
fs/ceph/mds_client.c		patch \| blob \| history
fs/ceph/snap.c		patch \| blob \| history
fs/cifs/file.c		patch \| blob \| history
fs/cifs/inode.c		patch \| blob \| history
fs/cifs/misc.c		patch \| blob \| history
fs/cifs/smb2pdu.c		patch \| blob \| history
fs/nfsd/nfs3proc.c		patch \| blob \| history
fs/nfsd/nfs3xdr.c		patch \| blob \| history
fs/nfsd/nfs4callback.c		patch \| blob \| history
fs/nfsd/nfs4state.c		patch \| blob \| history
fs/nfsd/state.h		patch \| blob \| history
include/linux/etherdevice.h		patch \| blob \| history
include/net/netfilter/nf_conntrack.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_l4proto.h		patch \| blob \| history
net/bridge/netfilter/ebtables.c		patch \| blob \| history
net/ipv4/route.c		patch \| blob \| history
net/ipv4/sysctl_net_ipv4.c		patch \| blob \| history
net/ipv6/addrlabel.c		patch \| blob \| history
net/ncsi/ncsi-rsp.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_core.c		patch \| blob \| history
net/netfilter/nf_conntrack_core.c		patch \| blob \| history
net/netfilter/nf_conntrack_netlink.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto_icmp.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto_icmpv6.c		patch \| blob \| history
net/netfilter/nf_nat_core.c		patch \| blob \| history
net/netfilter/nf_tables_api.c		patch \| blob \| history
net/netfilter/nfnetlink_log.c		patch \| blob \| history
net/netfilter/nfnetlink_queue.c		patch \| blob \| history
net/netfilter/xt_time.c		patch \| blob \| history
net/rds/ib_fmr.c		patch \| blob \| history
net/rds/ib_rdma.c		patch \| blob \| history
net/rose/rose_loopback.c		patch \| blob \| history
net/rxrpc/input.c		patch \| blob \| history
net/rxrpc/local_object.c		patch \| blob \| history
net/sunrpc/cache.c		patch \| blob \| history
net/tls/tls_device.c		patch \| blob \| history
net/tls/tls_device_fallback.c		patch \| blob \| history
net/tls/tls_main.c		patch \| blob \| history
net/tls/tls_sw.c		patch \| blob \| history
tools/testing/selftests/net/run_afpackettests		patch \| blob \| history
tools/testing/selftests/net/run_netsocktests		patch \| blob \| history
tools/testing/selftests/netfilter/Makefile		patch \| blob \| history
tools/testing/selftests/netfilter/conntrack_icmp_related.sh	[new file with mode: 0755]	patch \| blob
tools/testing/selftests/netfilter/nft_nat.sh		patch \| blob \| history