Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

author David S. Miller <davem@davemloft.net>

Wed, 11 Aug 2021 09:22:26 +0000 (10:22 +0100)

committer David S. Miller <davem@davemloft.net>

Wed, 11 Aug 2021 09:22:26 +0000 (10:22 +0100)
author David S. Miller <davem@davemloft.net>
Wed, 11 Aug 2021 09:22:26 +0000 (10:22 +0100)
committer David S. Miller <davem@davemloft.net>
Wed, 11 Aug 2021 09:22:26 +0000 (10:22 +0100)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index bdb2200..ee0569a 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4945,8 +4945,6 @@
         sa1100ir        [NET]
                         See drivers/net/irda/sa1100_ir.c.
  
-       sbni=           [NET] Granch SBNI12 leased line adapter
-
         sched_verbose   [KNL] Enables verbose scheduler debug messages.
  
         schedstats=     [KNL,X86] Enable or disable scheduled statistics.
diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst

index 459e6b6..0c9120e 100644 (file)
--- a/Documentation/arm64/tagged-address-abi.rst
+++ b/Documentation/arm64/tagged-address-abi.rst
@@ -45,14 +45,24 @@ how the user addresses are used by the kernel:
  
  1. User addresses not accessed by the kernel but used for address space
     management (e.g. ``mprotect()``, ``madvise()``). The use of valid
-   tagged pointers in this context is allowed with the exception of
-   ``brk()``, ``mmap()`` and the ``new_address`` argument to
-   ``mremap()`` as these have the potential to alias with existing
-   user addresses.
-
-   NOTE: This behaviour changed in v5.6 and so some earlier kernels may
-   incorrectly accept valid tagged pointers for the ``brk()``,
-   ``mmap()`` and ``mremap()`` system calls.
+   tagged pointers in this context is allowed with these exceptions:
+
+   - ``brk()``, ``mmap()`` and the ``new_address`` argument to
+     ``mremap()`` as these have the potential to alias with existing
+      user addresses.
+
+     NOTE: This behaviour changed in v5.6 and so some earlier kernels may
+     incorrectly accept valid tagged pointers for the ``brk()``,
+     ``mmap()`` and ``mremap()`` system calls.
+
+   - The ``range.start``, ``start`` and ``dst`` arguments to the
+     ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from
+     ``userfaultfd()``, as fault addresses subsequently obtained by reading
+     the file descriptor will be untagged, which may otherwise confuse
+     tag-unaware programs.
+
+     NOTE: This behaviour changed in v5.14 and so some earlier kernels may
+     incorrectly accept valid tagged pointers for this system call.
  
  2. User addresses accessed by the kernel (e.g. ``write()``). This ABI
     relaxation is disabled by default and the application thread needs to
diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml

index a7b5807..fb547e2 100644 (file)
--- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
+++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
@@ -104,6 +104,12 @@ properties:
            maximum: 32
      maxItems: 1
  
+  power-domains:
+    description:
+      Power domain provider node and an args specifier containing
+      the can device id value.
+    maxItems: 1
+
    can-transceiver:
      $ref: can-transceiver.yaml#
  
diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml

index 4853ab7..ed88ba4 100644 (file)
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -87,18 +87,16 @@ properties:
        - const: ipa-setup-ready
  
    interconnects:
-    minItems: 2
      items:
-      - description: Path leading to system memory
-      - description: Path between the AP and IPA config space
-      - description: Path leading to internal memory
+      - description: Interconnect path between IPA and main memory
+      - description: Interconnect path between IPA and internal memory
+      - description: Interconnect path between IPA and the AP subsystem
  
    interconnect-names:
-    minItems: 2
      items:
        - const: memory
-      - const: config
        - const: imem
+      - const: config
  
    qcom,smem-states:
      $ref: /schemas/types.yaml#/definitions/phandle-array
@@ -209,11 +207,11 @@ examples:
  
                  interconnects =
                          <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_EBI1>,
-                        <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>,
-                        <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>;
+                        <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>,
+                        <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>;
                  interconnect-names = "memory",
-                                     "config",
-                                     "imem";
+                                     "imem",
+                                     "config";
  
                  qcom,smem-states = <&ipa_smp2p_out 0>,
                                     <&ipa_smp2p_out 1>;
diff --git a/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml b/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml

index ee936d1..c2930d6 100644 (file)
--- a/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml
+++ b/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml
@@ -114,7 +114,7 @@ properties:
  
    ports:
      $ref: /schemas/graph.yaml#/properties/ports
-    properties:
+    patternProperties:
        port(@[0-9a-f]+)?:
          $ref: audio-graph-port.yaml#
          unevaluatedProperties: false
diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst

index 62f2aab..31cfd7d 100644 (file)
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -501,6 +501,18 @@ fail_over_mac
         This option was added in bonding version 3.2.0.  The "follow"
         policy was added in bonding version 3.3.0.
  
+lacp_active
+       Option specifying whether to send LACPDU frames periodically.
+
+       off or 0
+               LACPDU frames acts as "speak when spoken to".
+
+       on or 1
+               LACPDU frames are sent along the configured links
+               periodically. See lacp_rate for more details.
+
+       The default is on.
+
  lacp_rate
  
         Option specifying the rate in which we'll ask our link partner
diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst

index 3e2221f..5f13905 100644 (file)
--- a/Documentation/networking/filter.rst
+++ b/Documentation/networking/filter.rst
@@ -320,13 +320,6 @@ Examples for low-level BPF:
    ret #-1
    drop: ret #0
  
-**(Accelerated) VLAN w/ id 10**::
-
-  ld vlan_tci
-  jneq #10, drop
-  ret #-1
-  drop: ret #0
-
  **icmp random packet sampling, 1 in 4**::
  
    ldh [12]
@@ -358,6 +351,22 @@ Examples for low-level BPF:
    bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
    good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */
  
+Examples for low-level BPF extension:
+
+**Packet for interface index 13**::
+
+  ld ifidx
+  jneq #13, drop
+  ret #-1
+  drop: ret #0
+
+**(Accelerated) VLAN w/ id 10**::
+
+  ld vlan_tci
+  jneq #10, drop
+  ret #-1
+  drop: ret #0
+
  The above example code can be placed into a file (here called "foo"), and
  then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
  and cls_bpf understands and can directly be loaded with. Example with above
diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst

index 91b2cf7..e26532f 100644 (file)
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -228,6 +228,23 @@ before posting to the mailing list. The patchwork build bot instance
  gets overloaded very easily and netdev@vger really doesn't need more
  traffic if we can help it.
  
+netdevsim is great, can I extend it for my out-of-tree tests?
+-------------------------------------------------------------
+
+No, `netdevsim` is a test vehicle solely for upstream tests.
+(Please add your tests under tools/testing/selftests/.)
+
+We also give no guarantees that `netdevsim` won't change in the future
+in a way which would break what would normally be considered uAPI.
+
+Is netdevsim considered a "user" of an API?
+-------------------------------------------
+
+Linux kernel has a long standing rule that no API should be added unless
+it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are
+strongly encouraged when adding new APIs, but `netdevsim` in itself
+is **not** considered a use case/user.
+
  Any other tips to help ensure my net/net-next patch gets OK'd?
  --------------------------------------------------------------
  Attention to detail.  Re-read your own work as if you were the
diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst

index 9c918f7..1ee2141 100644 (file)
--- a/Documentation/networking/operstates.rst
+++ b/Documentation/networking/operstates.rst
@@ -73,7 +73,9 @@ IF_OPER_LOWERLAYERDOWN (3):
   state (f.e. VLAN).
  
  IF_OPER_TESTING (4):
- Unused in current kernel.
+ Interface is in testing mode, for example executing driver self-tests
+ or media (cable) test. It can't be used for normal traffic until tests
+ complete.
  
  IF_OPER_DORMANT (5):
   Interface is L1 up, but waiting for an external event, f.e. for a
@@ -111,7 +113,7 @@ it as lower layer.
  
  Note that for certain kind of soft-devices, which are not managing any
  real hardware, it is possible to set this bit from userspace.  One
-should use TVL IFLA_CARRIER to do so.
+should use TLV IFLA_CARRIER to do so.
  
  netif_carrier_ok() can be used to query that bit.
  
diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst

index b71e09f..f99be80 100644 (file)
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -191,7 +191,7 @@ Documentation written by Tom Zanussi
                                  with the event, in nanoseconds.  May be
                                 modified by .usecs to have timestamps
                                 interpreted as microseconds.
-    cpu                    int  the cpu on which the event occurred.
+    common_cpu             int  the cpu on which the event occurred.
      ====================== ==== =======================================
  
  Extended error information
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst

index c7b165c..dae68e6 100644 (file)
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -855,7 +855,7 @@ in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
  use PPIs designated for specific cpus.  The irq field is interpreted
  like this::
  
-  bits:  |  31 ... 28  | 27 ... 24 | 23  ... 16 | 15 ... 0 |
+  bits:  |  31 ... 28  | 27 ... 24 | 23  ... 16 | 15 ... 0 |
    field: | vcpu2_index | irq_type  | vcpu_index |  irq_id  |
  
  The irq_type field has the following values:
@@ -2149,10 +2149,10 @@ prior to calling the KVM_RUN ioctl.
  Errors:
  
    ======   ============================================================
-  ENOENT   no such register
-  EINVAL   invalid register ID, or no such register or used with VMs in
+  ENOENT   no such register
+  EINVAL   invalid register ID, or no such register or used with VMs in
             protected virtualization mode on s390
-  EPERM    (arm64) register access not allowed before vcpu finalization
+  EPERM    (arm64) register access not allowed before vcpu finalization
    ======   ============================================================
  
  (These error codes are indicative only: do not rely on a specific error
@@ -2590,10 +2590,10 @@ following id bit patterns::
  Errors include:
  
    ======== ============================================================
-  ENOENT   no such register
-  EINVAL   invalid register ID, or no such register or used with VMs in
+  ENOENT   no such register
+  EINVAL   invalid register ID, or no such register or used with VMs in
             protected virtualization mode on s390
-  EPERM    (arm64) register access not allowed before vcpu finalization
+  EPERM    (arm64) register access not allowed before vcpu finalization
    ======== ============================================================
  
  (These error codes are indicative only: do not rely on a specific error
@@ -3112,13 +3112,13 @@ current state.  "addr" is ignored.
  Errors:
  
    ======     =================================================================
-  EINVAL     the target is unknown, or the combination of features is invalid.
-  ENOENT     a features bit specified is unknown.
+  EINVAL     the target is unknown, or the combination of features is invalid.
+  ENOENT     a features bit specified is unknown.
    ======     =================================================================
  
  This tells KVM what type of CPU to present to the guest, and what
-optional features it should have.  This will cause a reset of the cpu
-registers to their initial values.  If this is not called, KVM_RUN will
+optional features it should have.  This will cause a reset of the cpu
+registers to their initial values.  If this is not called, KVM_RUN will
  return ENOEXEC for that vcpu.
  
  The initial values are defined as:
@@ -3239,8 +3239,8 @@ VCPU matching underlying host.
  Errors:
  
    =====      ==============================================================
-  E2BIG      the reg index list is too big to fit in the array specified by
-             the user (the number required will be written into n).
+  E2BIG      the reg index list is too big to fit in the array specified by
+             the user (the number required will be written into n).
    =====      ==============================================================
  
  ::
@@ -3288,7 +3288,7 @@ specific device.
  ARM/arm64 divides the id field into two parts, a device id and an
  address type id specific to the individual device::
  
-  bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
+  bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
    field: |        0x00000000      |     device id   |  addr type id  |
  
  ARM/arm64 currently only require this when using the in-kernel GIC
@@ -7049,7 +7049,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
  trap and emulate MSRs that are outside of the scope of KVM as well as
  limit the attack surface on KVM's MSR emulation code.
  
-8.28 KVM_CAP_ENFORCE_PV_CPUID
+8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID
  -----------------------------
  
  Architectures: x86
diff --git a/MAINTAINERS b/MAINTAINERS

index 73beb91..41fcfdb 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -445,7 +445,7 @@ F:  drivers/platform/x86/wmi.c
  F:     include/uapi/linux/wmi.h
  
  ACRN HYPERVISOR SERVICE MODULE
-M:     Shuo Liu <shuo.a.liu@intel.com>
+M:     Fei Li <fei1.li@intel.com>
  L:     acrn-dev@lists.projectacrn.org (subscribers-only)
  S:     Supported
  W:     https://projectacrn.org
@@ -7859,9 +7859,9 @@ S:        Maintained
  F:     drivers/input/touchscreen/goodix.c
  
  GOOGLE ETHERNET DRIVERS
-M:     Catherine Sullivan <csully@google.com>
-R:     Sagi Shahar <sagis@google.com>
-R:     Jon Olson <jonolson@google.com>
+M:     Jeroen de Borst <jeroendb@google.com>
+R:     Catherine Sullivan <csully@google.com>
+R:     David Awogbemila <awogbemila@google.com>
  L:     netdev@vger.kernel.org
  S:     Supported
  F:     Documentation/networking/device_drivers/ethernet/google/gve.rst
@@ -11347,6 +11347,12 @@ L:     netdev@vger.kernel.org
  S:     Supported
  F:     drivers/net/phy/mxl-gpy.c
  
+MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER
+R:     Yasushi SHOJI <yashi@spacecubics.com>
+L:     linux-can@vger.kernel.org
+S:     Maintained
+F:     drivers/net/can/usb/mcba_usb.c
+
  MCAN MMIO DEVICE DRIVER
  M:     Chandrasekar Ramakrishnan <rcsekar@samsung.com>
  L:     linux-can@vger.kernel.org
@@ -15488,6 +15494,8 @@ M:      Pan, Xinhui <Xinhui.Pan@amd.com>
  L:     amd-gfx@lists.freedesktop.org
  S:     Supported
  T:     git https://gitlab.freedesktop.org/agd5f/linux.git
+B:     https://gitlab.freedesktop.org/drm/amd/-/issues
+C:     irc://irc.oftc.net/radeon
  F:     drivers/gpu/drm/amd/
  F:     drivers/gpu/drm/radeon/
  F:     include/uapi/drm/amdgpu_drm.h
@@ -19143,7 +19151,7 @@ M:      Mauro Carvalho Chehab <mchehab@kernel.org>
  L:     linux-usb@vger.kernel.org
  S:     Maintained
  F:     Documentation/devicetree/bindings/phy/hisilicon,hi3670-usb3.yaml
-F:     drivers/phy/hisilicon/phy-kirin970-usb3.c
+F:     drivers/phy/hisilicon/phy-hi3670-usb3.c
  
  USB ISP116X DRIVER
  M:     Olav Kongas <ok@artecdesign.ee>
@@ -19821,6 +19829,14 @@ L:     netdev@vger.kernel.org
  S:     Supported
  F:     drivers/ptp/ptp_vmw.c
  
+VMWARE VMCI DRIVER
+M:     Jorgen Hansen <jhansen@vmware.com>
+M:     Vishnu Dasa <vdasa@vmware.com>
+L:     linux-kernel@vger.kernel.org
+L:     pv-drivers@vmware.com (private)
+S:     Maintained
+F:     drivers/misc/vmw_vmci/
+
  VMWARE VMMOUSE SUBDRIVER
  M:     "VMware Graphics" <linux-graphics-maintainer@vmware.com>
  M:     "VMware, Inc." <pv-drivers@vmware.com>
diff --git a/Makefile b/Makefile

index e4f5895..b6ee64d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
  VERSION = 5
  PATCHLEVEL = 14
  SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc4
  NAME = Opossums on Parade
  
  # *DOCUMENTATION*
@@ -546,7 +546,6 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \
  PHONY += scripts_basic
  scripts_basic:
         $(Q)$(MAKE) $(build)=scripts/basic
-       $(Q)rm -f .tmp_quiet_recordmcount
  
  PHONY += outputmakefile
  ifdef building_out_of_srctree
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig

index 77d3280..6c50877 100644 (file)
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -14,7 +14,6 @@ config ALPHA
         select PCI_SYSCALL if PCI
         select HAVE_AOUT
         select HAVE_ASM_MODVERSIONS
-       select HAVE_IDE
         select HAVE_PCSPKR_PLATFORM
         select HAVE_PERF_EVENTS
         select NEED_DMA_MAP_STATE
@@ -532,7 +531,7 @@ config SMP
           will run faster if you say N here.
  
           See also the SMP-HOWTO available at
-         <http://www.tldp.org/docs.html#howto>.
+         <https://www.tldp.org/docs.html#howto>.
  
           If you don't know what to do here, say N.
  
diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c

index 00266e6..b4faba2 100644 (file)
--- a/arch/alpha/boot/bootp.c
+++ b/arch/alpha/boot/bootp.c
@@ -23,7 +23,7 @@
  #include "ksize.h"
  
  extern unsigned long switch_to_osf_pal(unsigned long nr,
-       struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
+       struct pcb_struct *pcb_va, struct pcb_struct *pcb_pa,
         unsigned long *vptb);
  
  extern void move_stack(unsigned long new_stack);
diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c

index 43af718..90a2b34 100644 (file)
--- a/arch/alpha/boot/bootpz.c
+++ b/arch/alpha/boot/bootpz.c
@@ -200,7 +200,7 @@ extern char _end;
         START_ADDR      KSEG address of the entry point of kernel code.
  
         ZERO_PGE        KSEG address of page full of zeroes, but 
-                       upon entry to kerne cvan be expected
+                       upon entry to kernel, it can be expected
                         to hold the parameter list and possible
                         INTRD information.
  
diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c

index d651922..325d4dd 100644 (file)
--- a/arch/alpha/boot/misc.c
+++ b/arch/alpha/boot/misc.c
@@ -30,7 +30,7 @@ extern long srm_printk(const char *, ...)
       __attribute__ ((format (printf, 1, 2)));
  
  /*
- * gzip delarations
+ * gzip declarations
   */
  #define OF(args)  args
  #define STATIC static
diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig

index dd2dd9f..7f1ca30 100644 (file)
--- a/arch/alpha/configs/defconfig
+++ b/arch/alpha/configs/defconfig
@@ -70,3 +70,4 @@ CONFIG_DEBUG_INFO=y
  CONFIG_ALPHA_LEGACY_START_ADDRESS=y
  CONFIG_MATHEMU=y
  CONFIG_CRYPTO_HMAC=y
+CONFIG_DEVTMPFS=y
diff --git a/arch/alpha/include/asm/compiler.h b/arch/alpha/include/asm/compiler.h

index 5159ba2..ae64595 100644 (file)
--- a/arch/alpha/include/asm/compiler.h
+++ b/arch/alpha/include/asm/compiler.h
@@ -4,15 +4,4 @@
  
  #include <uapi/asm/compiler.h>
  
-/* Some idiots over in <linux/compiler.h> thought inline should imply
-   always_inline.  This breaks stuff.  We'll include this file whenever
-   we run into such problems.  */
-
-#include <linux/compiler.h>
-#undef inline
-#undef __inline__
-#undef __inline
-#undef __always_inline
-#define __always_inline                inline __attribute__((always_inline))
-
  #endif /* __ALPHA_COMPILER_H */
diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h

index 11c688c..f21baba 100644 (file)
--- a/arch/alpha/include/asm/syscall.h
+++ b/arch/alpha/include/asm/syscall.h
@@ -9,4 +9,10 @@ static inline int syscall_get_arch(struct task_struct *task)
         return AUDIT_ARCH_ALPHA;
  }
  
+static inline long syscall_get_return_value(struct task_struct *task,
+                                           struct pt_regs *regs)
+{
+       return regs->r0;
+}
+
  #endif /* _ASM_ALPHA_SYSCALL_H */
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h

index 6b3daba..1dd9baf 100644 (file)
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -129,6 +129,8 @@
  
  #define SO_NETNS_COOKIE                71
  
+#define SO_BUF_LOCK            72
+
  #if !defined(__KERNEL__)
  
  #if __BITS_PER_LONG == 64
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c

index d5367a1..d31167e 100644 (file)
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -834,7 +834,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
                         return -EFAULT;
                 state = &current_thread_info()->ieee_state;
  
-               /* Update softare trap enable bits.  */
+               /* Update software trap enable bits.  */
                 *state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK);
  
                 /* Update the real fpcr.  */
@@ -854,7 +854,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer,
                 state = &current_thread_info()->ieee_state;
                 exc &= IEEE_STATUS_MASK;
  
-               /* Update softare trap enable bits.  */
+               /* Update software trap enable bits.  */
                 swcr = (*state & IEEE_SW_MASK) | exc;
                 *state |= exc;
  
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c

index e7a59d9..efcf732 100644 (file)
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -574,7 +574,7 @@ static void alpha_pmu_start(struct perf_event *event, int flags)
   * Check that CPU performance counters are supported.
   * - currently support EV67 and later CPUs.
   * - actually some later revisions of the EV6 have the same PMC model as the
- *     EV67 but we don't do suffiently deep CPU detection to detect them.
+ *     EV67 but we don't do sufficiently deep CPU detection to detect them.
   *     Bad luck to the very few people who might have one, I guess.
   */
  static int supported_cpu(void)
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c

index ef0c08e..a5123ea 100644 (file)
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -256,7 +256,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
                 childstack->r26 = (unsigned long) ret_from_kernel_thread;
                 childstack->r9 = usp;   /* function */
                 childstack->r10 = kthread_arg;
-               childregs->hae = alpha_mv.hae_cache,
+               childregs->hae = alpha_mv.hae_cache;
                 childti->pcb.usp = 0;
                 return 0;
         }
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c

index 7d56c21..b4fbbba 100644 (file)
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -319,18 +319,19 @@ setup_memory(void *kernel_end)
                        i, cluster->usage, cluster->start_pfn,
                        cluster->start_pfn + cluster->numpages);
  
-               /* Bit 0 is console/PALcode reserved.  Bit 1 is
-                  non-volatile memory -- we might want to mark
-                  this for later.  */
-               if (cluster->usage & 3)
-                       continue;
-
                 end = cluster->start_pfn + cluster->numpages;
                 if (end > max_low_pfn)
                         max_low_pfn = end;
  
                 memblock_add(PFN_PHYS(cluster->start_pfn),
                              cluster->numpages << PAGE_SHIFT);
+
+               /* Bit 0 is console/PALcode reserved.  Bit 1 is
+                  non-volatile memory -- we might want to mark
+                  this for later.  */
+               if (cluster->usage & 3)
+                       memblock_reserve(PFN_PHYS(cluster->start_pfn),
+                                        cluster->numpages << PAGE_SHIFT);
         }
  
         /*
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c

index 4b2575f..cb64e47 100644 (file)
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -582,7 +582,7 @@ void
  smp_send_stop(void)
  {
         cpumask_t to_whom;
-       cpumask_copy(&to_whom, cpu_possible_mask);
+       cpumask_copy(&to_whom, cpu_online_mask);
         cpumask_clear_cpu(smp_processor_id(), &to_whom);
  #ifdef DEBUG_IPI_MSG
         if (hard_smp_processor_id() != boot_cpu_id)
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c

index 53adf43..96fd6ff 100644 (file)
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -212,7 +212,7 @@ nautilus_init_pci(void)
  
         /* Use default IO. */
         pci_add_resource(&bridge->windows, &ioport_resource);
-       /* Irongate PCI memory aperture, calculate requred size before
+       /* Irongate PCI memory aperture, calculate required size before
            setting it up. */
         pci_add_resource(&bridge->windows, &irongate_mem);
  
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c

index 921d4b6..5398f98 100644 (file)
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -730,7 +730,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
         long error;
  
         /* Check the UAC bits to decide what the user wants us to do
-          with the unaliged access.  */
+          with the unaligned access.  */
  
         if (!(current_thread_info()->status & TS_UAC_NOPRINT)) {
                 if (__ratelimit(&ratelimit)) {
diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c

index d568cd9..f7cef66 100644 (file)
--- a/arch/alpha/math-emu/math.c
+++ b/arch/alpha/math-emu/math.c
@@ -65,7 +65,7 @@ static long (*save_emul) (unsigned long pc);
  long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long);
  long do_alpha_fp_emul(unsigned long);
  
-int init_module(void)
+static int alpha_fp_emul_init_module(void)
  {
         save_emul_imprecise = alpha_fp_emul_imprecise;
         save_emul = alpha_fp_emul;
@@ -73,12 +73,14 @@ int init_module(void)
         alpha_fp_emul = do_alpha_fp_emul;
         return 0;
  }
+module_init(alpha_fp_emul_init_module);
  
-void cleanup_module(void)
+static void alpha_fp_emul_cleanup_module(void)
  {
         alpha_fp_emul_imprecise = save_emul_imprecise;
         alpha_fp_emul = save_emul;
  }
+module_exit(alpha_fp_emul_cleanup_module);
  
  #undef  alpha_fp_emul_imprecise
  #define alpha_fp_emul_imprecise                do_alpha_fp_emul_imprecise
@@ -401,3 +403,5 @@ alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask)
  egress:
         return si_code;
  }
+
+EXPORT_SYMBOL(__udiv_qrnnd);
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig

index 82f908f..2fb7012 100644 (file)
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -95,7 +95,6 @@ config ARM
         select HAVE_FUNCTION_TRACER if !XIP_KERNEL
         select HAVE_GCC_PLUGINS
         select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
-       select HAVE_IDE if PCI || ISA || PCMCIA
         select HAVE_IRQ_TIME_ACCOUNTING
         select HAVE_KERNEL_GZIP
         select HAVE_KERNEL_LZ4
@@ -361,7 +360,6 @@ config ARCH_FOOTBRIDGE
         bool "FootBridge"
         select CPU_SA110
         select FOOTBRIDGE
-       select HAVE_IDE
         select NEED_MACH_IO_H if !MMU
         select NEED_MACH_MEMORY_H
         help
@@ -430,7 +428,6 @@ config ARCH_PXA
         select GENERIC_IRQ_MULTI_HANDLER
         select GPIO_PXA
         select GPIOLIB
-       select HAVE_IDE
         select IRQ_DOMAIN
         select PLAT_PXA
         select SPARSE_IRQ
@@ -446,7 +443,6 @@ config ARCH_RPC
         select ARM_HAS_SG_CHAIN
         select CPU_SA110
         select FIQ
-       select HAVE_IDE
         select HAVE_PATA_PLATFORM
         select ISA_DMA_API
         select LEGACY_TIMER_TICK
@@ -469,7 +465,6 @@ config ARCH_SA1100
         select CPU_SA1100
         select GENERIC_IRQ_MULTI_HANDLER
         select GPIOLIB
-       select HAVE_IDE
         select IRQ_DOMAIN
         select ISA
         select NEED_MACH_MEMORY_H
@@ -505,7 +500,6 @@ config ARCH_OMAP1
         select GENERIC_IRQ_CHIP
         select GENERIC_IRQ_MULTI_HANDLER
         select GPIOLIB
-       select HAVE_IDE
         select HAVE_LEGACY_CLK
         select IRQ_DOMAIN
         select NEED_MACH_IO_H if PCCARD
diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig

index de11030..1d3aef8 100644 (file)
--- a/arch/arm/mach-davinci/Kconfig
+++ b/arch/arm/mach-davinci/Kconfig
@@ -9,7 +9,6 @@ menuconfig ARCH_DAVINCI
         select PM_GENERIC_DOMAINS_OF if PM && OF
         select REGMAP_MMIO
         select RESET_CONTROLLER
-       select HAVE_IDE
         select PINCTRL_SINGLE
  
  if ARCH_DAVINCI
diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c

index d23970b..f70fb9c 100644 (file)
--- a/arch/arm/mach-rpc/riscpc.c
+++ b/arch/arm/mach-rpc/riscpc.c
@@ -49,6 +49,7 @@ static int __init parse_tag_acorn(const struct tag *tag)
                 fallthrough;    /* ??? */
         case 256:
                 vram_size += PAGE_SIZE * 256;
+               break;
         default:
                 break;
         }
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c

index 897634d..a951276 100644 (file)
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1602,6 +1602,9 @@ exit:
                 rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx);
                 emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code));
                 break;
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               break;
         /* ST: *(size *)(dst + off) = imm */
         case BPF_ST | BPF_MEM | BPF_W:
         case BPF_ST | BPF_MEM | BPF_H:
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi

index ca38d0d..f4eaab3 100644 (file)
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -579,7 +579,7 @@
                         };
  
                         flexcan1: can@308c0000 {
-                               compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan";
+                               compatible = "fsl,imx8mp-flexcan";
                                 reg = <0x308c0000 0x10000>;
                                 interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>;
                                 clocks = <&clk IMX8MP_CLK_IPG_ROOT>,
@@ -594,7 +594,7 @@
                         };
  
                         flexcan2: can@308d0000 {
-                               compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan";
+                               compatible = "fsl,imx8mp-flexcan";
                                 reg = <0x308d0000 0x10000>;
                                 interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>;
                                 clocks = <&clk IMX8MP_CLK_IPG_ROOT>,
diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi

index 0686923..51e1709 100644 (file)
--- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi
@@ -1063,7 +1063,7 @@
         status = "okay";
         extcon = <&usb2_id>;
  
-       usb@7600000 {
+       dwc3@7600000 {
                 extcon = <&usb2_id>;
                 dr_mode = "otg";
                 maximum-speed = "high-speed";
@@ -1074,7 +1074,7 @@
         status = "okay";
         extcon = <&usb3_id>;
  
-       usb@6a00000 {
+       dwc3@6a00000 {
                 extcon = <&usb3_id>;
                 dr_mode = "otg";
         };
diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi

index 95d6cb8..f39bc10 100644 (file)
--- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi
+++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi
@@ -443,7 +443,7 @@
                         resets = <&gcc GCC_USB0_BCR>;
                         status = "disabled";
  
-                       dwc_0: usb@8a00000 {
+                       dwc_0: dwc3@8a00000 {
                                 compatible = "snps,dwc3";
                                 reg = <0x8a00000 0xcd00>;
                                 interrupts = <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>;
@@ -484,7 +484,7 @@
                         resets = <&gcc GCC_USB1_BCR>;
                         status = "disabled";
  
-                       dwc_1: usb@8c00000 {
+                       dwc_1: dwc3@8c00000 {
                                 compatible = "snps,dwc3";
                                 reg = <0x8c00000 0xcd00>;
                                 interrupts = <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi

index 0e1bc46..78c55ca 100644 (file)
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -2566,7 +2566,7 @@
                         power-domains = <&gcc USB30_GDSC>;
                         status = "disabled";
  
-                       usb@6a00000 {
+                       dwc3@6a00000 {
                                 compatible = "snps,dwc3";
                                 reg = <0x06a00000 0xcc00>;
                                 interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>;
@@ -2873,7 +2873,7 @@
                         qcom,select-utmi-as-pipe-clk;
                         status = "disabled";
  
-                       usb@7600000 {
+                       dwc3@7600000 {
                                 compatible = "snps,dwc3";
                                 reg = <0x07600000 0xcc00>;
                                 interrupts = <0 138 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi

index 6f294f9..e9d3ce2 100644 (file)
--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi
@@ -1964,7 +1964,7 @@
  
                         resets = <&gcc GCC_USB_30_BCR>;
  
-                       usb3_dwc3: usb@a800000 {
+                       usb3_dwc3: dwc3@a800000 {
                                 compatible = "snps,dwc3";
                                 reg = <0x0a800000 0xcd00>;
                                 interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi

index f8a5530..a80c578 100644 (file)
--- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi
@@ -337,7 +337,7 @@
  &usb3 {
         status = "okay";
  
-       usb@7580000 {
+       dwc3@7580000 {
                 dr_mode = "host";
         };
  };
diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi

index 9c4be02..339790b 100644 (file)
--- a/arch/arm64/boot/dts/qcom/qcs404.dtsi
+++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi
@@ -544,7 +544,7 @@
                         assigned-clock-rates = <19200000>, <200000000>;
                         status = "disabled";
  
-                       usb@7580000 {
+                       dwc3@7580000 {
                                 compatible = "snps,dwc3";
                                 reg = <0x07580000 0xcd00>;
                                 interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
@@ -573,7 +573,7 @@
                         assigned-clock-rates = <19200000>, <133333333>;
                         status = "disabled";
  
-                       usb@78c0000 {
+                       dwc3@78c0000 {
                                 compatible = "snps,dwc3";
                                 reg = <0x078c0000 0xcc00>;
                                 interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi

index 7af551a..a9a052f 100644 (file)
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -110,11 +110,6 @@
                         no-map;
                 };
  
-               ipa_fw_mem: memory@8b700000 {
-                       reg = <0 0x8b700000 0 0x10000>;
-                       no-map;
-               };
-
                 rmtfs_mem: memory@94600000 {
                         compatible = "qcom,rmtfs-mem";
                         reg = <0x0 0x94600000 0x0 0x200000>;
@@ -2761,7 +2756,7 @@
                                         <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3 0>;
                         interconnect-names = "usb-ddr", "apps-usb";
  
-                       usb_1_dwc3: usb@a600000 {
+                       usb_1_dwc3: dwc3@a600000 {
                                 compatible = "snps,dwc3";
                                 reg = <0 0x0a600000 0 0xe000>;
                                 interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi

index 5eb2b58..a8c274a 100644 (file)
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -7,7 +7,6 @@
  
  #include <dt-bindings/clock/qcom,gcc-sc7280.h>
  #include <dt-bindings/clock/qcom,rpmh.h>
-#include <dt-bindings/interconnect/qcom,sc7280.h>
  #include <dt-bindings/interrupt-controller/arm-gic.h>
  #include <dt-bindings/mailbox/qcom-ipcc.h>
  #include <dt-bindings/power/qcom-aoss-qmp.h>
@@ -64,11 +63,6 @@
                         no-map;
                         reg = <0x0 0x80b00000 0x0 0x100000>;
                 };
-
-               ipa_fw_mem: memory@8b700000 {
-                       reg = <0 0x8b700000 0 0x10000>;
-                       no-map;
-               };
         };
  
         cpus {
@@ -514,43 +508,6 @@
                         qcom,bcm-voters = <&apps_bcm_voter>;
                 };
  
-               ipa: ipa@1e40000 {
-                       compatible = "qcom,sc7280-ipa";
-
-                       iommus = <&apps_smmu 0x480 0x0>,
-                                <&apps_smmu 0x482 0x0>;
-                       reg = <0 0x1e40000 0 0x8000>,
-                             <0 0x1e50000 0 0x4ad0>,
-                             <0 0x1e04000 0 0x23000>;
-                       reg-names = "ipa-reg",
-                                   "ipa-shared",
-                                   "gsi";
-
-                       interrupts-extended = <&intc 0 654 IRQ_TYPE_EDGE_RISING>,
-                                             <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>,
-                                             <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>,
-                                             <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>;
-                       interrupt-names = "ipa",
-                                         "gsi",
-                                         "ipa-clock-query",
-                                         "ipa-setup-ready";
-
-                       clocks = <&rpmhcc RPMH_IPA_CLK>;
-                       clock-names = "core";
-
-                       interconnects = <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>,
-                                       <&gem_noc MASTER_APPSS_PROC 0 &cnoc2 SLAVE_IPA_CFG 0>;
-                       interconnect-names = "memory",
-                                            "config";
-
-                       qcom,smem-states = <&ipa_smp2p_out 0>,
-                                          <&ipa_smp2p_out 1>;
-                       qcom,smem-state-names = "ipa-clock-enabled-valid",
-                                               "ipa-clock-enabled";
-
-                       status = "disabled";
-               };
-
                 tcsr_mutex: hwlock@1f40000 {
                         compatible = "qcom,tcsr-mutex", "syscon";
                         reg = <0 0x01f40000 0 0x40000>;
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi

index 1796ae8..0a86fe7 100644 (file)
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -3781,7 +3781,7 @@
                                         <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_0 0>;
                         interconnect-names = "usb-ddr", "apps-usb";
  
-                       usb_1_dwc3: usb@a600000 {
+                       usb_1_dwc3: dwc3@a600000 {
                                 compatible = "snps,dwc3";
                                 reg = <0 0x0a600000 0 0xcd00>;
                                 interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
@@ -3829,7 +3829,7 @@
                                         <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_1 0>;
                         interconnect-names = "usb-ddr", "apps-usb";
  
-                       usb_2_dwc3: usb@a800000 {
+                       usb_2_dwc3: dwc3@a800000 {
                                 compatible = "snps,dwc3";
                                 reg = <0 0x0a800000 0 0xcd00>;
                                 interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi

index 612dda0..eef9d79 100644 (file)
--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
@@ -2344,7 +2344,7 @@
  
                         resets = <&gcc GCC_USB30_PRIM_BCR>;
  
-                       usb_1_dwc3: usb@a600000 {
+                       usb_1_dwc3: dwc3@a600000 {
                                 compatible = "snps,dwc3";
                                 reg = <0 0x0a600000 0 0xcd00>;
                                 interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index 3155c9e..0625bf2 100644 (file)
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -947,7 +947,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                 vma_shift = get_vma_page_shift(vma, hva);
         }
  
-       shared = (vma->vm_flags & VM_PFNMAP);
+       shared = (vma->vm_flags & VM_SHARED);
  
         switch (vma_shift) {
  #ifndef __PAGETABLE_PMD_FOLDED
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c

index dccf98a..41c23f4 100644 (file)
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -823,6 +823,19 @@ emit_cond_jmp:
                         return ret;
                 break;
  
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               /*
+                * Nothing required here.
+                *
+                * In case of arm64, we rely on the firmware mitigation of
+                * Speculative Store Bypass as controlled via the ssbd kernel
+                * parameter. Whenever the mitigation is enabled, it works
+                * for all of the kernel code with no need to provide any
+                * additional instructions.
+                */
+               break;
+
         /* ST: *(size *)(dst + off) = imm */
         case BPF_ST | BPF_MEM | BPF_W:
         case BPF_ST | BPF_MEM | BPF_H:
diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu

index b5e14d5..c30baa0 100644 (file)
--- a/arch/h8300/Kconfig.cpu
+++ b/arch/h8300/Kconfig.cpu
@@ -44,7 +44,6 @@ config H8300_H8MAX
         bool "H8MAX"
         select H83069
         select RAMKERNEL
-       select HAVE_IDE
         help
           H8MAX Evaluation Board Support
           More Information. (Japanese Only)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig

index cf425c2..4993c7a 100644 (file)
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -25,7 +25,6 @@ config IA64
         select HAVE_ASM_MODVERSIONS
         select HAVE_UNSTABLE_SCHED_CLOCK
         select HAVE_EXIT_THREAD
-       select HAVE_IDE
         select HAVE_KPROBES
         select HAVE_KRETPROBES
         select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig

index 96989ad..d632a1d 100644 (file)
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -23,7 +23,6 @@ config M68K
         select HAVE_DEBUG_BUGVERBOSE
         select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED
         select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
-       select HAVE_IDE
         select HAVE_MOD_ARCH_SPECIFIC
         select HAVE_UID16
         select MMU_GATHER_NO_RANGE if MMU
diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine

index d964c1f..6a07a68 100644 (file)
--- a/arch/m68k/Kconfig.machine
+++ b/arch/m68k/Kconfig.machine
@@ -33,6 +33,7 @@ config MAC
         depends on MMU
         select MMU_MOTOROLA if MMU
         select HAVE_ARCH_NVRAM_OPS
+       select HAVE_PATA_PLATFORM
         select LEGACY_TIMER_TICK
         help
           This option enables support for the Apple Macintosh series of
diff --git a/arch/m68k/coldfire/m525x.c b/arch/m68k/coldfire/m525x.c

index 2c4d2ca..4853751 100644 (file)
--- a/arch/m68k/coldfire/m525x.c
+++ b/arch/m68k/coldfire/m525x.c
@@ -26,7 +26,7 @@ DEFINE_CLK(pll, "pll.0", MCF_CLK);
  DEFINE_CLK(sys, "sys.0", MCF_BUSCLK);
  
  static struct clk_lookup m525x_clk_lookup[] = {
-       CLKDEV_INIT(NULL, "pll.0", &pll),
+       CLKDEV_INIT(NULL, "pll.0", &clk_pll),
         CLKDEV_INIT(NULL, "sys.0", &clk_sys),
         CLKDEV_INIT("mcftmr.0", NULL, &clk_sys),
         CLKDEV_INIT("mcftmr.1", NULL, &clk_sys),
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig

index cee6087..6dfb27d 100644 (file)
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -71,7 +71,6 @@ config MIPS
         select HAVE_FUNCTION_TRACER
         select HAVE_GCC_PLUGINS
         select HAVE_GENERIC_VDSO
-       select HAVE_IDE
         select HAVE_IOREMAP_PROT
         select HAVE_IRQ_EXIT_ON_IRQ_STACK
         select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h

index cdf404a..1eaf6a1 100644 (file)
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -140,6 +140,8 @@
  
  #define SO_NETNS_COOKIE                71
  
+#define SO_BUF_LOCK            72
+
  #if !defined(__KERNEL__)
  
  #if __BITS_PER_LONG == 64
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c

index 939dd06..3a73e93 100644 (file)
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -1355,6 +1355,9 @@ jeq_common:
                 }
                 break;
  
+       case BPF_ST | BPF_NOSPEC: /* speculation barrier */
+               break;
+
         case BPF_ST | BPF_B | BPF_MEM:
         case BPF_ST | BPF_H | BPF_MEM:
         case BPF_ST | BPF_W | BPF_MEM:
diff --git a/arch/nds32/mm/mmap.c b/arch/nds32/mm/mmap.c

index c206b31..1bdf5e7 100644 (file)
--- a/arch/nds32/mm/mmap.c
+++ b/arch/nds32/mm/mmap.c
@@ -59,7 +59,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
  
                 vma = find_vma(mm, addr);
                 if (TASK_SIZE - len >= addr &&
-                   (!vma || addr + len <= vma->vm_start))
+                   (!vma || addr + len <= vm_start_gap(vma)))
                         return addr;
         }
  
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig

index bde9907..4f8c1fb 100644 (file)
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -3,7 +3,6 @@ config PARISC
         def_bool y
         select ARCH_32BIT_OFF_T if !64BIT
         select ARCH_MIGHT_HAVE_PC_PARPORT
-       select HAVE_IDE
         select HAVE_FUNCTION_TRACER
         select HAVE_FUNCTION_GRAPH_TRACER
         select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h

index 5b5351c..8baaad5 100644 (file)
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -121,6 +121,8 @@
  
  #define SO_NETNS_COOKIE                0x4045
  
+#define SO_BUF_LOCK            0x4046
+
  #if !defined(__KERNEL__)
  
  #if __BITS_PER_LONG == 64
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig

index d01e340..663766f 100644 (file)
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -220,7 +220,6 @@ config PPC
         select HAVE_HARDLOCKUP_DETECTOR_ARCH    if PPC_BOOK3S_64 && SMP
         select HAVE_HARDLOCKUP_DETECTOR_PERF    if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
         select HAVE_HW_BREAKPOINT               if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
-       select HAVE_IDE
         select HAVE_IOREMAP_PROT
         select HAVE_IRQ_EXIT_ON_IRQ_STACK
         select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile

index 2813e3f..3c5baaa 100644 (file)
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -27,6 +27,13 @@ KASAN_SANITIZE := n
  
  ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
         -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
+
+# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true
+# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is
+# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code
+# generation is minimal, it will just use r29 instead.
+ccflags-y += $(call cc-option, -ffixed-r30)
+
  asflags-y := -D__VDSO64__ -s
  
  targets += vdso64.lds
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 1d1fcc2..085fb8e 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2697,8 +2697,10 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
                 HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
         if (cpu_has_feature(CPU_FTR_HVMODE)) {
                 vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
                 if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
                         vcpu->arch.hfscr |= HFSCR_TM;
+#endif
         }
         if (cpu_has_feature(CPU_FTR_TM_COMP))
                 vcpu->arch.hfscr |= HFSCR_TM;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c

index 8543ad5..898f942 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -302,6 +302,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
         if (vcpu->kvm->arch.l1_ptcr == 0)
                 return H_NOT_AVAILABLE;
  
+       if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
+               return H_BAD_MODE;
+
         /* copy parameters in */
         hv_ptr = kvmppc_get_gpr(vcpu, 4);
         regs_ptr = kvmppc_get_gpr(vcpu, 5);
@@ -322,6 +325,23 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
         if (l2_hv.vcpu_token >= NR_CPUS)
                 return H_PARAMETER;
  
+       /*
+        * L1 must have set up a suspended state to enter the L2 in a
+        * transactional state, and only in that case. These have to be
+        * filtered out here to prevent causing a TM Bad Thing in the
+        * host HRFID. We could synthesize a TM Bad Thing back to the L1
+        * here but there doesn't seem like much point.
+        */
+       if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
+               if (!MSR_TM_ACTIVE(l2_regs.msr))
+                       return H_BAD_MODE;
+       } else {
+               if (l2_regs.msr & MSR_TS_MASK)
+                       return H_BAD_MODE;
+               if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
+                       return H_BAD_MODE;
+       }
+
         /* translate lpid */
         l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
         if (!l2)
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c

index 83f592e..961b3d7 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -317,6 +317,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
          */
         mtspr(SPRN_HDEC, hdec);
  
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tm_return_to_guest:
+#endif
         mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
         mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
         mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
@@ -415,11 +418,23 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
                  * is in real suspend mode and is trying to transition to
                  * transactional mode.
                  */
-               if (local_paca->kvm_hstate.fake_suspend &&
+               if (!local_paca->kvm_hstate.fake_suspend &&
                                 (vcpu->arch.shregs.msr & MSR_TS_S)) {
                         if (kvmhv_p9_tm_emulation_early(vcpu)) {
-                               /* Prevent it being handled again. */
-                               trap = 0;
+                               /*
+                                * Go straight back into the guest with the
+                                * new NIP/MSR as set by TM emulation.
+                                */
+                               mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+                               mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
+
+                               /*
+                                * tm_return_to_guest re-loads SRR0/1, DAR,
+                                * DSISR after RI is cleared, in case they had
+                                * been clobbered by a MCE.
+                                */
+                               __mtmsrd(0, 1); /* clear RI */
+                               goto tm_return_to_guest;
                         }
                 }
  #endif
@@ -499,6 +514,10 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
          * If we are in real mode, only switch MMU on after the MMU is
          * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
          */
+       if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+           vcpu->arch.shregs.msr & MSR_TS_MASK)
+               msr |= MSR_TS_S;
+
         __mtmsrd(msr, 0);
  
         end_timing(vcpu);
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c

index c5e6775..0f847f1 100644 (file)
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -242,6 +242,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
          * value so we can restore it on the way out.
          */
         orig_rets = args.rets;
+       if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) {
+               /*
+                * Don't overflow our args array: ensure there is room for
+                * at least rets[0] (even if the call specifies 0 nret).
+                *
+                * Each handler must then check for the correct nargs and nret
+                * values, but they may always return failure in rets[0].
+                */
+               rc = -EINVAL;
+               goto fail;
+       }
         args.rets = &args.args[be32_to_cpu(args.nargs)];
  
         mutex_lock(&vcpu->kvm->arch.rtas_token_lock);
@@ -269,9 +280,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
  fail:
         /*
          * We only get here if the guest has called RTAS with a bogus
-        * args pointer. That means we can't get to the args, and so we
-        * can't fail the RTAS call. So fail right out to userspace,
-        * which should kill the guest.
+        * args pointer or nargs/nret values that would overflow the
+        * array. That means we can't get to the args, and so we can't
+        * fail the RTAS call. So fail right out to userspace, which
+        * should kill the guest.
+        *
+        * SLOF should actually pass the hcall return value from the
+        * rtas handler call in r3, so enter_rtas could be modified to
+        * return a failure indication in r3 and we could return such
+        * errors to the guest rather than failing to host userspace.
+        * However old guests that don't test for failure could then
+        * continue silently after errors, so for now we won't do this.
          */
         return rc;
  }
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c

index be33b53..b4e6f70 100644 (file)
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -2048,9 +2048,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
         {
                 struct kvm_enable_cap cap;
                 r = -EFAULT;
-               vcpu_load(vcpu);
                 if (copy_from_user(&cap, argp, sizeof(cap)))
                         goto out;
+               vcpu_load(vcpu);
                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
                 vcpu_put(vcpu);
                 break;
@@ -2074,9 +2074,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
         case KVM_DIRTY_TLB: {
                 struct kvm_dirty_tlb dirty;
                 r = -EFAULT;
-               vcpu_load(vcpu);
                 if (copy_from_user(&dirty, argp, sizeof(dirty)))
                         goto out;
+               vcpu_load(vcpu);
                 r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
                 vcpu_put(vcpu);
                 break;
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c

index 34bb158..beb12cb 100644 (file)
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -737,6 +737,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
                         }
                         break;
  
+               /*
+                * BPF_ST NOSPEC (speculation barrier)
+                */
+               case BPF_ST | BPF_NOSPEC:
+                       break;
+
                 /*
                  * BPF_ST(X)
                  */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c

index de85958..b87a63d 100644 (file)
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -627,6 +627,12 @@ emit_clear:
                         }
                         break;
  
+               /*
+                * BPF_ST NOSPEC (speculation barrier)
+                */
+               case BPF_ST | BPF_NOSPEC:
+                       break;
+
                 /*
                  * BPF_ST(X)
                  */
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c

index 9b88e3c..534b031 100644 (file)
--- a/arch/powerpc/platforms/pasemi/idle.c
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -42,6 +42,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs)
         switch (regs->msr & SRR1_WAKEMASK) {
         case SRR1_WAKEDEC:
                 set_dec(1);
+               break;
         case SRR1_WAKEEE:
                 /*
                  * Handle these when interrupts get re-enabled and we take
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c

index 631a0d5..6b08866 100644 (file)
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -77,7 +77,7 @@
  #include "../../../../drivers/pci/pci.h"
  
  DEFINE_STATIC_KEY_FALSE(shared_processor);
-EXPORT_SYMBOL_GPL(shared_processor);
+EXPORT_SYMBOL(shared_processor);
  
  int CMO_PrPSP = -1;
  int CMO_SecPSP = -1;
diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h

index 6d98cd9..7b3483b 100644 (file)
--- a/arch/riscv/include/asm/efi.h
+++ b/arch/riscv/include/asm/efi.h
@@ -27,10 +27,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
  
  #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
  
-/* Load initrd at enough distance from DRAM start */
+/* Load initrd anywhere in system RAM */
  static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr)
  {
-       return image_addr + SZ_256M;
+       return ULONG_MAX;
  }
  
  #define alloc_screen_info(x...)                (&screen_info)
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c

index ff467b9..ac75936 100644 (file)
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -132,8 +132,12 @@ unsigned long get_wchan(struct task_struct *task)
  {
         unsigned long pc = 0;
  
-       if (likely(task && task != current && !task_is_running(task)))
+       if (likely(task && task != current && !task_is_running(task))) {
+               if (!try_get_task_stack(task))
+                       return 0;
                 walk_stackframe(task, NULL, save_wchan, &pc);
+               put_task_stack(task);
+       }
         return pc;
  }
  
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S

index bceb062..63bc691 100644 (file)
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -30,23 +30,23 @@ ENTRY(__asm_copy_from_user)
          * t0 - end of uncopied dst
          */
         add     t0, a0, a2
-       bgtu    a0, t0, 5f
  
         /*
          * Use byte copy only if too small.
+        * SZREG holds 4 for RV32 and 8 for RV64
          */
-       li      a3, 8*SZREG /* size must be larger than size in word_copy */
+       li      a3, 9*SZREG /* size must be larger than size in word_copy */
         bltu    a2, a3, .Lbyte_copy_tail
  
         /*
-        * Copy first bytes until dst is align to word boundary.
+        * Copy first bytes until dst is aligned to word boundary.
          * a0 - start of dst
          * t1 - start of aligned dst
          */
         addi    t1, a0, SZREG-1
         andi    t1, t1, ~(SZREG-1)
         /* dst is already aligned, skip */
-       beq     a0, t1, .Lskip_first_bytes
+       beq     a0, t1, .Lskip_align_dst
  1:
         /* a5 - one byte for copying data */
         fixup lb      a5, 0(a1), 10f
@@ -55,7 +55,7 @@ ENTRY(__asm_copy_from_user)
         addi    a0, a0, 1       /* dst */
         bltu    a0, t1, 1b      /* t1 - start of aligned dst */
  
-.Lskip_first_bytes:
+.Lskip_align_dst:
         /*
          * Now dst is aligned.
          * Use shift-copy if src is misaligned.
@@ -72,10 +72,9 @@ ENTRY(__asm_copy_from_user)
          *
          * a0 - start of aligned dst
          * a1 - start of aligned src
-        * a3 - a1 & mask:(SZREG-1)
          * t0 - end of aligned dst
          */
-       addi    t0, t0, -(8*SZREG-1) /* not to over run */
+       addi    t0, t0, -(8*SZREG) /* not to over run */
  2:
         fixup REG_L   a4,        0(a1), 10f
         fixup REG_L   a5,    SZREG(a1), 10f
@@ -97,7 +96,7 @@ ENTRY(__asm_copy_from_user)
         addi    a1, a1, 8*SZREG
         bltu    a0, t0, 2b
  
-       addi    t0, t0, 8*SZREG-1 /* revert to original value */
+       addi    t0, t0, 8*SZREG /* revert to original value */
         j       .Lbyte_copy_tail
  
  .Lshift_copy:
@@ -107,7 +106,7 @@ ENTRY(__asm_copy_from_user)
          * For misaligned copy we still perform aligned word copy, but
          * we need to use the value fetched from the previous iteration and
          * do some shifts.
-        * This is safe because reading less than a word size.
+        * This is safe because reading is less than a word size.
          *
          * a0 - start of aligned dst
          * a1 - start of src
@@ -117,7 +116,7 @@ ENTRY(__asm_copy_from_user)
          */
         /* calculating aligned word boundary for dst */
         andi    t1, t0, ~(SZREG-1)
-       /* Converting unaligned src to aligned arc */
+       /* Converting unaligned src to aligned src */
         andi    a1, a1, ~(SZREG-1)
  
         /*
@@ -125,11 +124,11 @@ ENTRY(__asm_copy_from_user)
          * t3 - prev shift
          * t4 - current shift
          */
-       slli    t3, a3, LGREG
+       slli    t3, a3, 3 /* converting bytes in a3 to bits */
         li      a5, SZREG*8
         sub     t4, a5, t3
  
-       /* Load the first word to combine with seceond word */
+       /* Load the first word to combine with second word */
         fixup REG_L   a5, 0(a1), 10f
  
  3:
@@ -161,7 +160,7 @@ ENTRY(__asm_copy_from_user)
          * a1 - start of remaining src
          * t0 - end of remaining dst
          */
-       bgeu    a0, t0, 5f
+       bgeu    a0, t0, .Lout_copy_user  /* check if end of copy */
  4:
         fixup lb      a5, 0(a1), 10f
         addi    a1, a1, 1       /* src */
@@ -169,7 +168,7 @@ ENTRY(__asm_copy_from_user)
         addi    a0, a0, 1       /* dst */
         bltu    a0, t0, 4b      /* t0 - end of dst */
  
-5:
+.Lout_copy_user:
         /* Disable access to user memory */
         csrc CSR_STATUS, t6
         li      a0, 0
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c

index 269fc64..a14bf39 100644 (file)
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -127,10 +127,17 @@ void __init mem_init(void)
  }
  
  /*
- * The default maximal physical memory size is -PAGE_OFFSET,
- * limit the memory size via mem.
+ * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel,
+ * whereas for 64-bit kernel, the end of the virtual address space is occupied
+ * by the modules/BPF/kernel mappings which reduces the available size of the
+ * linear mapping.
+ * Limit the memory size via mem.
   */
+#ifdef CONFIG_64BIT
+static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G;
+#else
  static phys_addr_t memory_limit = -PAGE_OFFSET;
+#endif
  
  static int __init early_mem(char *p)
  {
@@ -152,7 +159,7 @@ static void __init setup_bootmem(void)
  {
         phys_addr_t vmlinux_end = __pa_symbol(&_end);
         phys_addr_t vmlinux_start = __pa_symbol(&_start);
-       phys_addr_t max_mapped_addr = __pa(~(ulong)0);
+       phys_addr_t __maybe_unused max_mapped_addr;
         phys_addr_t dram_end;
  
  #ifdef CONFIG_XIP_KERNEL
@@ -175,14 +182,21 @@ static void __init setup_bootmem(void)
         memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
  
         dram_end = memblock_end_of_DRAM();
+
+#ifndef CONFIG_64BIT
         /*
          * memblock allocator is not aware of the fact that last 4K bytes of
          * the addressable memory can not be mapped because of IS_ERR_VALUE
          * macro. Make sure that last 4k bytes are not usable by memblock
-        * if end of dram is equal to maximum addressable memory.
+        * if end of dram is equal to maximum addressable memory.  For 64-bit
+        * kernel, this problem can't happen here as the end of the virtual
+        * address space is occupied by the kernel mapping then this check must
+        * be done in create_kernel_page_table.
          */
+       max_mapped_addr = __pa(~(ulong)0);
         if (max_mapped_addr == (dram_end - 1))
                 memblock_set_current_limit(max_mapped_addr - 4096);
+#endif
  
         min_low_pfn = PFN_UP(memblock_start_of_DRAM());
         max_low_pfn = max_pfn = PFN_DOWN(dram_end);
@@ -570,6 +584,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
         BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
         BUG_ON((kernel_map.phys_addr % map_size) != 0);
  
+#ifdef CONFIG_64BIT
+       /*
+        * The last 4K bytes of the addressable memory can not be mapped because
+        * of IS_ERR_VALUE macro.
+        */
+       BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
+#endif
+
         pt_ops.alloc_pte = alloc_pte_early;
         pt_ops.get_pte_virt = get_pte_virt_early;
  #ifndef __PAGETABLE_PMD_FOLDED
@@ -709,6 +731,8 @@ static void __init setup_vm_final(void)
                 if (start <= __pa(PAGE_OFFSET) &&
                     __pa(PAGE_OFFSET) < end)
                         start = __pa(PAGE_OFFSET);
+               if (end >= __pa(PAGE_OFFSET) + memory_limit)
+                       end = __pa(PAGE_OFFSET) + memory_limit;
  
                 map_size = best_map_size(start, end - start);
                 for (pa = start; pa < end; pa += map_size) {
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c

index 81de865..e649742 100644 (file)
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -1251,6 +1251,10 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
                         return -1;
                 break;
  
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               break;
+
         case BPF_ST | BPF_MEM | BPF_B:
         case BPF_ST | BPF_MEM | BPF_H:
         case BPF_ST | BPF_MEM | BPF_W:
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c

index 87e3bf5..3af4131 100644 (file)
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -939,6 +939,10 @@ out_be:
                 emit_ld(rd, 0, RV_REG_T1, ctx);
                 break;
  
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               break;
+
         /* ST: *(size *)(dst + off) = imm */
         case BPF_ST | BPF_MEM | BPF_B:
                 emit_imm(RV_REG_T1, imm, ctx);
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile

index 660c799..e30d3fd 100644 (file)
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -11,6 +11,7 @@ UBSAN_SANITIZE := n
  KASAN_SANITIZE := n
  
  obj-y  := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
+obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o
  obj-all := $(obj-y) piggy.o syms.o
  targets        := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
  targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/compressed/clz_ctz.c

new file mode 100644 (file)

index 0000000..c3ebf24
--- /dev/null
+++ b/arch/s390/boot/compressed/clz_ctz.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/clz_ctz.c"
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig

index 7de253f..b881840 100644 (file)
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -335,7 +335,7 @@ CONFIG_L2TP_DEBUGFS=m
  CONFIG_L2TP_V3=y
  CONFIG_L2TP_IP=m
  CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
+CONFIG_BRIDGE=y
  CONFIG_BRIDGE_MRP=y
  CONFIG_VLAN_8021Q=m
  CONFIG_VLAN_8021Q_GVRP=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig

index b671642..1667a3c 100644 (file)
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -325,7 +325,7 @@ CONFIG_L2TP_DEBUGFS=m
  CONFIG_L2TP_V3=y
  CONFIG_L2TP_IP=m
  CONFIG_L2TP_ETH=m
-CONFIG_BRIDGE=m
+CONFIG_BRIDGE=y
  CONFIG_BRIDGE_MRP=y
  CONFIG_VLAN_8021Q=m
  CONFIG_VLAN_8021Q_GVRP=y
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h

index 9b4473f..161a9e1 100644 (file)
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -445,15 +445,15 @@ struct kvm_vcpu_stat {
         u64 instruction_sigp_init_cpu_reset;
         u64 instruction_sigp_cpu_reset;
         u64 instruction_sigp_unknown;
-       u64 diagnose_10;
-       u64 diagnose_44;
-       u64 diagnose_9c;
-       u64 diagnose_9c_ignored;
-       u64 diagnose_9c_forward;
-       u64 diagnose_258;
-       u64 diagnose_308;
-       u64 diagnose_500;
-       u64 diagnose_other;
+       u64 instruction_diagnose_10;
+       u64 instruction_diagnose_44;
+       u64 instruction_diagnose_9c;
+       u64 diag_9c_ignored;
+       u64 diag_9c_forward;
+       u64 instruction_diagnose_258;
+       u64 instruction_diagnose_308;
+       u64 instruction_diagnose_500;
+       u64 instruction_diagnose_other;
         u64 pfault_sync;
  };
  
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S

index bff50b6..edf5ff1 100644 (file)
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -51,6 +51,7 @@ SECTIONS
  
         .rela.dyn ALIGN(8) : { *(.rela.dyn) }
         .got ALIGN(8)   : { *(.got .toc) }
+       .got.plt ALIGN(8) : { *(.got.plt) }
  
         _end = .;
         PROVIDE(end = .);
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S

index d4fb336..4461ea1 100644 (file)
--- a/arch/s390/kernel/vdso64/vdso64.lds.S
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -51,6 +51,7 @@ SECTIONS
  
         .rela.dyn ALIGN(8) : { *(.rela.dyn) }
         .got ALIGN(8)   : { *(.got .toc) }
+       .got.plt ALIGN(8) : { *(.got.plt) }
  
         _end = .;
         PROVIDE(end = .);
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c

index 02c146f..807fa9d 100644 (file)
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -24,7 +24,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
  
         start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
         end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE;
-       vcpu->stat.diagnose_10++;
+       vcpu->stat.instruction_diagnose_10++;
  
         if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
             || start < 2 * PAGE_SIZE)
@@ -74,7 +74,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
  
         VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx",
                    vcpu->run->s.regs.gprs[rx]);
-       vcpu->stat.diagnose_258++;
+       vcpu->stat.instruction_diagnose_258++;
         if (vcpu->run->s.regs.gprs[rx] & 7)
                 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
         rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
@@ -145,7 +145,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
  static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
  {
         VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
-       vcpu->stat.diagnose_44++;
+       vcpu->stat.instruction_diagnose_44++;
         kvm_vcpu_on_spin(vcpu, true);
         return 0;
  }
@@ -169,7 +169,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
         int tid;
  
         tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
-       vcpu->stat.diagnose_9c++;
+       vcpu->stat.instruction_diagnose_9c++;
  
         /* yield to self */
         if (tid == vcpu->vcpu_id)
@@ -192,7 +192,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
                 VCPU_EVENT(vcpu, 5,
                            "diag time slice end directed to %d: yield forwarded",
                            tid);
-               vcpu->stat.diagnose_9c_forward++;
+               vcpu->stat.diag_9c_forward++;
                 return 0;
         }
  
@@ -203,7 +203,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
         return 0;
  no_yield:
         VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
-       vcpu->stat.diagnose_9c_ignored++;
+       vcpu->stat.diag_9c_ignored++;
         return 0;
  }
  
@@ -213,7 +213,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
         unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff;
  
         VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode);
-       vcpu->stat.diagnose_308++;
+       vcpu->stat.instruction_diagnose_308++;
         switch (subcode) {
         case 3:
                 vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
@@ -245,7 +245,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
  {
         int ret;
  
-       vcpu->stat.diagnose_500++;
+       vcpu->stat.instruction_diagnose_500++;
         /* No virtio-ccw notification? Get out quickly. */
         if (!vcpu->kvm->arch.css_support ||
             (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
@@ -299,7 +299,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
         case 0x500:
                 return __diag_virtio_hypercall(vcpu);
         default:
-               vcpu->stat.diagnose_other++;
+               vcpu->stat.instruction_diagnose_other++;
                 return -EOPNOTSUPP;
         }
  }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index b655a7d..4527ac7 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -163,15 +163,15 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
-       STATS_DESC_COUNTER(VCPU, diagnose_10),
-       STATS_DESC_COUNTER(VCPU, diagnose_44),
-       STATS_DESC_COUNTER(VCPU, diagnose_9c),
-       STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored),
-       STATS_DESC_COUNTER(VCPU, diagnose_9c_forward),
-       STATS_DESC_COUNTER(VCPU, diagnose_258),
-       STATS_DESC_COUNTER(VCPU, diagnose_308),
-       STATS_DESC_COUNTER(VCPU, diagnose_500),
-       STATS_DESC_COUNTER(VCPU, diagnose_other),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
+       STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
+       STATS_DESC_COUNTER(VCPU, diag_9c_forward),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
+       STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
         STATS_DESC_COUNTER(VCPU, pfault_sync)
  };
  static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c

index 2ae419f..8841926 100644 (file)
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1153,6 +1153,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
                         break;
                 }
                 break;
+       /*
+        * BPF_NOSPEC (speculation barrier)
+        */
+       case BPF_ST | BPF_NOSPEC:
+               break;
         /*
          * BPF_ST(X)
          */
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig

index 45a0549..b683b69 100644 (file)
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -39,7 +39,6 @@ config SUPERH
         select HAVE_FUTEX_CMPXCHG if FUTEX
         select HAVE_FTRACE_MCOUNT_RECORD
         select HAVE_HW_BREAKPOINT
-       select HAVE_IDE if HAS_IOPORT_MAP
         select HAVE_IOREMAP_PROT if MMU && !X2TLB
         select HAVE_KERNEL_BZIP2
         select HAVE_KERNEL_GZIP
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig

index c5fa793..f0c0f95 100644 (file)
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -19,7 +19,6 @@ config SPARC
         select OF
         select OF_PROMTREE
         select HAVE_ASM_MODVERSIONS
-       select HAVE_IDE
         select HAVE_ARCH_KGDB if !SMP || SPARC64
         select HAVE_ARCH_TRACEHOOK
         select HAVE_ARCH_SECCOMP if SPARC64
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h

index 92675dc..e80ee86 100644 (file)
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -122,6 +122,8 @@
  
  #define SO_NETNS_COOKIE          0x0050
  
+#define SO_BUF_LOCK              0x0051
+
  #if !defined(__KERNEL__)
  
  
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c

index 4b8d3c6..9a2f20c 100644 (file)
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                         return 1;
                 break;
         }
+       /* speculation barrier */
+       case BPF_ST | BPF_NOSPEC:
+               break;
         /* ST: *(size *)(dst + off) = imm */
         case BPF_ST | BPF_MEM | BPF_W:
         case BPF_ST | BPF_MEM | BPF_H:
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 4927065..88fb922 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -202,7 +202,6 @@ config X86
         select HAVE_FUNCTION_TRACER
         select HAVE_GCC_PLUGINS
         select HAVE_HW_BREAKPOINT
-       select HAVE_IDE
         select HAVE_IOREMAP_PROT
         select HAVE_IRQ_EXIT_ON_IRQ_STACK       if X86_64
         select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c

index 674906f..68f091b 100644 (file)
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -79,9 +79,10 @@ __jump_label_patch(struct jump_entry *entry, enum jump_label_type type)
         return (struct jump_label_patch){.code = code, .size = size};
  }
  
-static inline void __jump_label_transform(struct jump_entry *entry,
-                                         enum jump_label_type type,
-                                         int init)
+static __always_inline void
+__jump_label_transform(struct jump_entry *entry,
+                      enum jump_label_type type,
+                      int init)
  {
         const struct jump_label_patch jlp = __jump_label_patch(entry, type);
  
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c

index b07592c..0b38f94 100644 (file)
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
  
  static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
  {
+       trace_kvm_hv_hypercall_done(result);
         kvm_hv_hypercall_set_result(vcpu, result);
         ++vcpu->stat.hypercalls;
         return kvm_skip_emulated_instruction(vcpu);
@@ -2139,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
  
  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
  {
+       struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
         struct kvm_hv_hcall hc;
         u64 ret = HV_STATUS_SUCCESS;
  
@@ -2173,17 +2175,25 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
         hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
         hc.rep = !!(hc.rep_cnt || hc.rep_idx);
  
-       if (hc.fast && is_xmm_fast_hypercall(&hc))
-               kvm_hv_hypercall_read_xmm(&hc);
-
         trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
                                hc.ingpa, hc.outgpa);
  
-       if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) {
+       if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) {
                 ret = HV_STATUS_ACCESS_DENIED;
                 goto hypercall_complete;
         }
  
+       if (hc.fast && is_xmm_fast_hypercall(&hc)) {
+               if (unlikely(hv_vcpu->enforce_cpuid &&
+                            !(hv_vcpu->cpuid_cache.features_edx &
+                              HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) {
+                       kvm_queue_exception(vcpu, UD_VECTOR);
+                       return 1;
+               }
+
+               kvm_hv_hypercall_read_xmm(&hc);
+       }
+
         switch (hc.code) {
         case HVCALL_NOTIFY_LONG_SPIN_WAIT:
                 if (unlikely(hc.rep)) {
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c

index 698969e..ff005fe 100644 (file)
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
  static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
  {
         ioapic->rtc_status.pending_eoi = 0;
-       bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
+       bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1);
  }
  
  static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h

index 6604017..11e4065 100644 (file)
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -43,13 +43,13 @@ struct kvm_vcpu;
  
  struct dest_map {
         /* vcpu bitmap where IRQ has been sent */
-       DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
+       DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1);
  
         /*
          * Vector sent to a given vcpu, only valid when
          * the vcpu's bit in map is set
          */
-       u8 vectors[KVM_MAX_VCPU_ID];
+       u8 vectors[KVM_MAX_VCPU_ID + 1];
  };
  
  
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 66f7f5b..c4f4fa2 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt)
   * aggregate version in order to make the slab shrinker
   * faster
   */
-static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr)
+static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr)
  {
         kvm->arch.n_used_mmu_pages += nr;
         percpu_counter_add(&kvm_total_used_mmu_pages, nr);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c

index 1d01da6..a8ad78a 100644 (file)
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -646,7 +646,7 @@ out:
  void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
  {
         struct vcpu_svm *svm = to_svm(vcpu);
-       struct vmcb *vmcb = svm->vmcb;
+       struct vmcb *vmcb = svm->vmcb01.ptr;
         bool activated = kvm_vcpu_apicv_active(vcpu);
  
         if (!enable_apicv)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c

index 3bd09c5..61738ff 100644 (file)
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -515,7 +515,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
          * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
          * avic_physical_id.
          */
-       WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
+       WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
  
         /* Copied from vmcb01.  msrpm_base can be overwritten later.  */
         svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
@@ -702,8 +702,8 @@ out:
  }
  
  /* Copy state save area fields which are handled by VMRUN */
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
-                         struct vmcb_save_area *to_save)
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+                         struct vmcb_save_area *from_save)
  {
         to_save->es = from_save->es;
         to_save->cs = from_save->cs;
@@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
         to_save->cpl = 0;
  }
  
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
  {
         to_vmcb->save.fs = from_vmcb->save.fs;
         to_vmcb->save.gs = from_vmcb->save.gs;
@@ -1385,7 +1385,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
  
         svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
  
-       svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save);
+       svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
         nested_load_control_from_vmcb12(svm, ctl);
  
         svm_switch_vmcb(svm, &svm->nested.vmcb02);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c

index 6710d9e..7fbce34 100644 (file)
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock);
  unsigned int max_sev_asid;
  static unsigned int min_sev_asid;
  static unsigned long sev_me_mask;
+static unsigned int nr_asids;
  static unsigned long *sev_asid_bitmap;
  static unsigned long *sev_reclaim_asid_bitmap;
  
@@ -78,11 +79,11 @@ struct enc_region {
  /* Called with the sev_bitmap_lock held, or on shutdown  */
  static int sev_flush_asids(int min_asid, int max_asid)
  {
-       int ret, pos, error = 0;
+       int ret, asid, error = 0;
  
         /* Check if there are any ASIDs to reclaim before performing a flush */
-       pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid);
-       if (pos >= max_asid)
+       asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
+       if (asid > max_asid)
                 return -EBUSY;
  
         /*
@@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
  
         /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
         bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
-                  max_sev_asid);
-       bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid);
+                  nr_asids);
+       bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
  
         return true;
  }
  
  static int sev_asid_new(struct kvm_sev_info *sev)
  {
-       int pos, min_asid, max_asid, ret;
+       int asid, min_asid, max_asid, ret;
         bool retry = true;
         enum misc_res_type type;
  
@@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev)
          * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
          * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
          */
-       min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+       min_asid = sev->es_active ? 1 : min_sev_asid;
         max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
  again:
-       pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
-       if (pos >= max_asid) {
+       asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
+       if (asid > max_asid) {
                 if (retry && __sev_recycle_asids(min_asid, max_asid)) {
                         retry = false;
                         goto again;
@@ -157,11 +158,11 @@ again:
                 goto e_uncharge;
         }
  
-       __set_bit(pos, sev_asid_bitmap);
+       __set_bit(asid, sev_asid_bitmap);
  
         mutex_unlock(&sev_bitmap_lock);
  
-       return pos + 1;
+       return asid;
  e_uncharge:
         misc_cg_uncharge(type, sev->misc_cg, 1);
         put_misc_cg(sev->misc_cg);
@@ -179,17 +180,16 @@ static int sev_get_asid(struct kvm *kvm)
  static void sev_asid_free(struct kvm_sev_info *sev)
  {
         struct svm_cpu_data *sd;
-       int cpu, pos;
+       int cpu;
         enum misc_res_type type;
  
         mutex_lock(&sev_bitmap_lock);
  
-       pos = sev->asid - 1;
-       __set_bit(pos, sev_reclaim_asid_bitmap);
+       __set_bit(sev->asid, sev_reclaim_asid_bitmap);
  
         for_each_possible_cpu(cpu) {
                 sd = per_cpu(svm_data, cpu);
-               sd->sev_vmcbs[pos] = NULL;
+               sd->sev_vmcbs[sev->asid] = NULL;
         }
  
         mutex_unlock(&sev_bitmap_lock);
@@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void)
         min_sev_asid = edx;
         sev_me_mask = 1UL << (ebx & 0x3f);
  
-       /* Initialize SEV ASID bitmaps */
-       sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+       /*
+        * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
+        * even though it's never used, so that the bitmap is indexed by the
+        * actual ASID.
+        */
+       nr_asids = max_sev_asid + 1;
+       sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
         if (!sev_asid_bitmap)
                 goto out;
  
-       sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
+       sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
         if (!sev_reclaim_asid_bitmap) {
                 bitmap_free(sev_asid_bitmap);
                 sev_asid_bitmap = NULL;
@@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void)
                 return;
  
         /* No need to take sev_bitmap_lock, all VMs have been destroyed. */
-       sev_flush_asids(0, max_sev_asid);
+       sev_flush_asids(1, max_sev_asid);
  
         bitmap_free(sev_asid_bitmap);
         bitmap_free(sev_reclaim_asid_bitmap);
@@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
         if (!sev_enabled)
                 return 0;
  
-       sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL);
+       sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
         if (!sd->sev_vmcbs)
                 return -ENOMEM;
  
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index 664d20f..e8ccab5 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1406,8 +1406,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
                 goto error_free_vmsa_page;
         }
  
-       svm_vcpu_init_msrpm(vcpu, svm->msrpm);
-
         svm->vmcb01.ptr = page_address(vmcb01_page);
         svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);
  
@@ -1419,6 +1417,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
         svm_switch_vmcb(svm, &svm->vmcb01);
         init_vmcb(vcpu);
  
+       svm_vcpu_init_msrpm(vcpu, svm->msrpm);
+
         svm_init_osvw(vcpu);
         vcpu->arch.microcode_version = 0x01000065;
  
@@ -1568,8 +1568,11 @@ static void svm_set_vintr(struct vcpu_svm *svm)
  {
         struct vmcb_control_area *control;
  
-       /* The following fields are ignored when AVIC is enabled */
-       WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu));
+       /*
+        * The following fields are ignored when AVIC is enabled
+        */
+       WARN_ON(kvm_apicv_activated(svm->vcpu.kvm));
+
         svm_set_intercept(svm, INTERCEPT_VINTR);
  
         /*
@@ -2147,11 +2150,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload)
         ret = kvm_skip_emulated_instruction(vcpu);
  
         if (vmload) {
-               nested_svm_vmloadsave(vmcb12, svm->vmcb);
+               svm_copy_vmloadsave_state(svm->vmcb, vmcb12);
                 svm->sysenter_eip_hi = 0;
                 svm->sysenter_esp_hi = 0;
-       } else
-               nested_svm_vmloadsave(svm->vmcb, vmcb12);
+       } else {
+               svm_copy_vmloadsave_state(vmcb12, svm->vmcb);
+       }
  
         kvm_vcpu_unmap(vcpu, &map, true);
  
@@ -4344,8 +4348,8 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
  
                 BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
  
-               svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
-                                    map_save.hva + 0x400);
+               svm_copy_vmrun_state(map_save.hva + 0x400,
+                                    &svm->vmcb01.ptr->save);
  
                 kvm_vcpu_unmap(vcpu, &map_save, true);
         }
@@ -4393,8 +4397,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
                                          &map_save) == -EINVAL)
                                 return 1;
  
-                       svm_copy_vmrun_state(map_save.hva + 0x400,
-                                            &svm->vmcb01.ptr->save);
+                       svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
+                                            map_save.hva + 0x400);
  
                         kvm_vcpu_unmap(vcpu, &map_save, true);
                 }
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h

index 7e20907..bd0fe94 100644 (file)
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -464,9 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm);
  void svm_free_nested(struct vcpu_svm *svm);
  int svm_allocate_nested(struct vcpu_svm *svm);
  int nested_svm_vmrun(struct kvm_vcpu *vcpu);
-void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
-                         struct vmcb_save_area *to_save);
-void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
+                         struct vmcb_save_area *from_save);
+void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
  int nested_svm_vmexit(struct vcpu_svm *svm);
  
  static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h

index 9b9a55a..c53b8bf 100644 (file)
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
          * as we mark it dirty unconditionally towards end of vcpu
          * init phase.
          */
-       if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+       if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
             hve->hv_enlightenments_control.msr_bitmap)
                 vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
  }
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h

index b484141..03ebe36 100644 (file)
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall,
                   __entry->outgpa)
  );
  
+TRACE_EVENT(kvm_hv_hypercall_done,
+       TP_PROTO(u64 result),
+       TP_ARGS(result),
+
+       TP_STRUCT__entry(
+               __field(__u64, result)
+       ),
+
+       TP_fast_assign(
+               __entry->result = result;
+       ),
+
+       TP_printk("result 0x%llx", __entry->result)
+);
+
  /*
   * Tracepoint for Xen hypercall.
   */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index a4fd106..e5d5c5e 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3407,7 +3407,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                         return 1;
                 break;
         case MSR_KVM_ASYNC_PF_ACK:
-               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
                         return 1;
                 if (data & 0x1) {
                         vcpu->arch.apf.pageready_pending = false;
@@ -3746,7 +3746,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 msr_info->data = vcpu->arch.apf.msr_int_val;
                 break;
         case MSR_KVM_ASYNC_PF_ACK:
-               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF))
+               if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT))
                         return 1;
  
                 msr_info->data = 0;
@@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
  
  static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
  {
-       return kvm_arch_interrupt_allowed(vcpu) &&
-               kvm_cpu_accept_dm_intr(vcpu);
+       /*
+        * Do not cause an interrupt window exit if an exception
+        * is pending or an event needs reinjection; userspace
+        * might want to inject the interrupt manually using KVM_SET_REGS
+        * or KVM_SET_SREGS.  For that to work, we must be at an
+        * instruction boundary and with no events half-injected.
+        */
+       return (kvm_arch_interrupt_allowed(vcpu) &&
+               kvm_cpu_accept_dm_intr(vcpu) &&
+               !kvm_event_needs_reinjection(vcpu) &&
+               !vcpu->arch.exception.pending);
  }
  
  static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c

index 333650b..0fe6aac 100644 (file)
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1219,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                         }
                         break;
  
+                       /* speculation barrier */
+               case BPF_ST | BPF_NOSPEC:
+                       if (boot_cpu_has(X86_FEATURE_XMM2))
+                               /* Emit 'lfence' */
+                               EMIT3(0x0F, 0xAE, 0xE8);
+                       break;
+
                         /* ST: *(u8*)(dst_reg + off) = imm */
                 case BPF_ST | BPF_MEM | BPF_B:
                         if (is_ereg(dst_reg))
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c

index 3da88de..3bfda5f 100644 (file)
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                         i++;
                         break;
                 }
+               /* speculation barrier */
+               case BPF_ST | BPF_NOSPEC:
+                       if (boot_cpu_has(X86_FEATURE_XMM2))
+                               /* Emit 'lfence' */
+                               EMIT3(0x0F, 0xAE, 0xE8);
+                       break;
                 /* ST: *(u8*)(dst_reg + off) = imm */
                 case BPF_ST | BPF_MEM | BPF_H:
                 case BPF_ST | BPF_MEM | BPF_B:
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig

index 2332b21..3878880 100644 (file)
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -327,7 +327,6 @@ config XTENSA_PLATFORM_ISS
  
  config XTENSA_PLATFORM_XT2000
         bool "XT2000"
-       select HAVE_IDE
         help
           XT2000 is the name of Tensilica's feature-rich emulation platform.
           This hardware is capable of running a full Linux distribution.
diff --git a/block/blk-iocost.c b/block/blk-iocost.c

index c2d6bc8..5fac375 100644 (file)
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
                 return -1;
  
         iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost);
+       wait->committed = true;
  
         /*
          * autoremove_wake_function() removes the wait entry only when it
-        * actually changed the task state.  We want the wait always
-        * removed.  Remove explicitly and use default_wake_function().
+        * actually changed the task state. We want the wait always removed.
+        * Remove explicitly and use default_wake_function(). Note that the
+        * order of operations is important as finish_wait() tests whether
+        * @wq_entry is removed without grabbing the lock.
          */
-       list_del_init(&wq_entry->entry);
-       wait->committed = true;
-
         default_wake_function(wq_entry, mode, flags, key);
+       list_del_init_careful(&wq_entry->entry);
         return 0;
  }
  
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c

index c838d81..0f006ca 100644 (file)
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
         percpu_ref_put(&q->q_usage_counter);
  }
  
-static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
-                                  struct blk_mq_hw_ctx *hctx,
-                                  unsigned int hctx_idx)
-{
-       if (hctx->sched_tags) {
-               blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
-               blk_mq_free_rq_map(hctx->sched_tags, set->flags);
-               hctx->sched_tags = NULL;
-       }
-}
-
  static int blk_mq_sched_alloc_tags(struct request_queue *q,
                                    struct blk_mq_hw_ctx *hctx,
                                    unsigned int hctx_idx)
@@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
                 return -ENOMEM;
  
         ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
-       if (ret)
-               blk_mq_sched_free_tags(set, hctx, hctx_idx);
+       if (ret) {
+               blk_mq_free_rq_map(hctx->sched_tags, set->flags);
+               hctx->sched_tags = NULL;
+       }
  
         return ret;
  }
diff --git a/block/genhd.c b/block/genhd.c

index af4d2ab..298ee78 100644 (file)
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev)
         disk_release_events(disk);
         kfree(disk->random);
         xa_destroy(&disk->part_tbl);
-       bdput(disk->part0);
         if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
                 blk_put_queue(disk->queue);
-       kfree(disk);
+       bdput(disk->part0);     /* frees the disk */
  }
  struct class block_class = {
         .name           = "block",
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig

index 9d872ea..8f9940f 100644 (file)
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -370,7 +370,7 @@ config ACPI_TABLE_UPGRADE
  config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD
         bool "Override ACPI tables from built-in initrd"
         depends on ACPI_TABLE_UPGRADE
-       depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION=""
+       depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION_NONE
         help
           This option provides functionality to override arbitrary ACPI tables
           from built-in uncompressed initrd.
diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c

index 5fca182..550b908 100644 (file)
--- a/drivers/acpi/dptf/dptf_pch_fivr.c
+++ b/drivers/acpi/dptf/dptf_pch_fivr.c
@@ -9,6 +9,42 @@
  #include <linux/module.h>
  #include <linux/platform_device.h>
  
+struct pch_fivr_resp {
+       u64 status;
+       u64 result;
+};
+
+static int pch_fivr_read(acpi_handle handle, char *method, struct pch_fivr_resp *fivr_resp)
+{
+       struct acpi_buffer resp = { sizeof(struct pch_fivr_resp), fivr_resp};
+       struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+       struct acpi_buffer format = { sizeof("NN"), "NN" };
+       union acpi_object *obj;
+       acpi_status status;
+       int ret = -EFAULT;
+
+       status = acpi_evaluate_object(handle, method, NULL, &buffer);
+       if (ACPI_FAILURE(status))
+               return ret;
+
+       obj = buffer.pointer;
+       if (!obj || obj->type != ACPI_TYPE_PACKAGE)
+               goto release_buffer;
+
+       status = acpi_extract_package(obj, &format, &resp);
+       if (ACPI_FAILURE(status))
+               goto release_buffer;
+
+       if (fivr_resp->status)
+               goto release_buffer;
+
+       ret = 0;
+
+release_buffer:
+       kfree(buffer.pointer);
+       return ret;
+}
+
  /*
   * Presentation of attributes which are defined for INT1045
   * They are:
@@ -23,15 +59,14 @@ static ssize_t name##_show(struct device *dev,\
                            char *buf)\
  {\
         struct acpi_device *acpi_dev = dev_get_drvdata(dev);\
-       unsigned long long val;\
-       acpi_status status;\
+       struct pch_fivr_resp fivr_resp;\
+       int status;\
  \
-       status = acpi_evaluate_integer(acpi_dev->handle, #method,\
-                                      NULL, &val);\
-       if (ACPI_SUCCESS(status))\
-               return sprintf(buf, "%d\n", (int)val);\
-       else\
-               return -EINVAL;\
+       status = pch_fivr_read(acpi_dev->handle, #method, &fivr_resp);\
+       if (status)\
+               return status;\
+\
+       return sprintf(buf, "%llu\n", fivr_resp.result);\
  }
  
  #define PCH_FIVR_STORE(name, method) \
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c

index dc01fb5..ee78a21 100644 (file)
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -423,13 +423,6 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi,
         }
  }
  
-static bool irq_is_legacy(struct acpi_resource_irq *irq)
-{
-       return irq->triggering == ACPI_EDGE_SENSITIVE &&
-               irq->polarity == ACPI_ACTIVE_HIGH &&
-               irq->shareable == ACPI_EXCLUSIVE;
-}
-
  /**
   * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information.
   * @ares: Input ACPI resource object.
@@ -468,7 +461,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
                 }
                 acpi_dev_get_irqresource(res, irq->interrupts[index],
                                          irq->triggering, irq->polarity,
-                                        irq->shareable, irq_is_legacy(irq));
+                                        irq->shareable, true);
                 break;
         case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
                 ext_irq = &ares->data.extended_irq;
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c

index e7ddd28..d5cedff 100644 (file)
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -860,11 +860,9 @@ EXPORT_SYMBOL(acpi_dev_present);
   * Return the next match of ACPI device if another matching device was present
   * at the moment of invocation, or NULL otherwise.
   *
- * FIXME: The function does not tolerate the sudden disappearance of @adev, e.g.
- * in the case of a hotplug event. That said, the caller should ensure that
- * this will never happen.
- *
   * The caller is responsible for invoking acpi_dev_put() on the returned device.
+ * On the other hand the function invokes  acpi_dev_put() on the given @adev
+ * assuming that its reference counter had been increased beforehand.
   *
   * See additional information in acpi_dev_present() as well.
   */
@@ -880,6 +878,7 @@ acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const cha
         match.hrv = hrv;
  
         dev = bus_find_device(&acpi_bus_type, start, &match, acpi_dev_match_cb);
+       acpi_dev_put(adev);
         return dev ? to_acpi_device(dev) : NULL;
  }
  EXPORT_SYMBOL(acpi_dev_get_next_match_dev);
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c

index 1c50780..fbdbef0 100644 (file)
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -378,19 +378,25 @@ static int lps0_device_attach(struct acpi_device *adev,
                  * AMDI0006:
                  * - should use rev_id 0x0
                  * - function mask = 0x3: Should use Microsoft method
+                * AMDI0007:
+                * - Should use rev_id 0x2
+                * - Should only use AMD method
                  */
                 const char *hid = acpi_device_hid(adev);
-               rev_id = 0;
+               rev_id = strcmp(hid, "AMDI0007") ? 0 : 2;
                 lps0_dsm_func_mask = validate_dsm(adev->handle,
                                         ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid);
                 lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle,
-                                       ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id,
+                                       ACPI_LPS0_DSM_UUID_MICROSOFT, 0,
                                         &lps0_dsm_guid_microsoft);
                 if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") ||
                                                  !strcmp(hid, "AMDI0005"))) {
                         lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1;
                         acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n",
                                           ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask);
+               } else if (lps0_dsm_func_mask_microsoft > 0 && !strcmp(hid, "AMDI0007")) {
+                       lps0_dsm_func_mask_microsoft = -EINVAL;
+                       acpi_handle_debug(adev->handle, "_DSM Using AMD method\n");
                 }
         } else {
                 rev_id = 1;
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c

index ae7189d..b71ea4a 100644 (file)
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -637,6 +637,20 @@ unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf,
  }
  EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
  
+static void ata_pio_xfer(struct ata_queued_cmd *qc, struct page *page,
+               unsigned int offset, size_t xfer_size)
+{
+       bool do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
+       unsigned char *buf;
+
+       buf = kmap_atomic(page);
+       qc->ap->ops->sff_data_xfer(qc, buf + offset, xfer_size, do_write);
+       kunmap_atomic(buf);
+
+       if (!do_write && !PageSlab(page))
+               flush_dcache_page(page);
+}
+
  /**
   *     ata_pio_sector - Transfer a sector of data.
   *     @qc: Command on going
@@ -648,11 +662,9 @@ EXPORT_SYMBOL_GPL(ata_sff_data_xfer32);
   */
  static void ata_pio_sector(struct ata_queued_cmd *qc)
  {
-       int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
         struct ata_port *ap = qc->ap;
         struct page *page;
         unsigned int offset;
-       unsigned char *buf;
  
         if (!qc->cursg) {
                 qc->curbytes = qc->nbytes;
@@ -670,13 +682,20 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
  
         DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read");
  
-       /* do the actual data transfer */
-       buf = kmap_atomic(page);
-       ap->ops->sff_data_xfer(qc, buf + offset, qc->sect_size, do_write);
-       kunmap_atomic(buf);
+       /*
+        * Split the transfer when it splits a page boundary.  Note that the
+        * split still has to be dword aligned like all ATA data transfers.
+        */
+       WARN_ON_ONCE(offset % 4);
+       if (offset + qc->sect_size > PAGE_SIZE) {
+               unsigned int split_len = PAGE_SIZE - offset;
  
-       if (!do_write && !PageSlab(page))
-               flush_dcache_page(page);
+               ata_pio_xfer(qc, page, offset, split_len);
+               ata_pio_xfer(qc, nth_page(page, 1), 0,
+                            qc->sect_size - split_len);
+       } else {
+               ata_pio_xfer(qc, page, offset, qc->sect_size);
+       }
  
         qc->curbytes += qc->sect_size;
         qc->cursg_ofs += qc->sect_size;
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c

index 4f2951c..d0e67ec 100644 (file)
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2167,10 +2167,10 @@ static int hrz_open (struct atm_vcc *atm_vcc)
    
    // Part of the job is done by atm_pcr_goal which gives us a PCR
    // specification which says: EITHER grab the maximum available PCR
-  // (and perhaps a lower bound which we musn't pass), OR grab this
+  // (and perhaps a lower bound which we must not pass), OR grab this
    // amount, rounding down if you have to (and perhaps a lower bound
-  // which we musn't pass) OR grab this amount, rounding up if you
-  // have to (and perhaps an upper bound which we musn't pass). If any
+  // which we must not pass) OR grab this amount, rounding up if you
+  // have to (and perhaps an upper bound which we must not pass). If any
    // bounds ARE passed we fail. Note that rounding is only rounding to
    // match device limitations, we do not round down to satisfy
    // bandwidth availability even if this would not violate any given
diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c

index adc199d..6a30264 100644 (file)
--- a/drivers/base/auxiliary.c
+++ b/drivers/base/auxiliary.c
@@ -231,6 +231,8 @@ EXPORT_SYMBOL_GPL(auxiliary_find_device);
  int __auxiliary_driver_register(struct auxiliary_driver *auxdrv,
                                 struct module *owner, const char *modname)
  {
+       int ret;
+
         if (WARN_ON(!auxdrv->probe) || WARN_ON(!auxdrv->id_table))
                 return -EINVAL;
  
@@ -246,7 +248,11 @@ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv,
         auxdrv->driver.bus = &auxiliary_bus_type;
         auxdrv->driver.mod_name = modname;
  
-       return driver_register(&auxdrv->driver);
+       ret = driver_register(&auxdrv->driver);
+       if (ret)
+               kfree(auxdrv->driver.name);
+
+       return ret;
  }
  EXPORT_SYMBOL_GPL(__auxiliary_driver_register);
  
diff --git a/drivers/base/core.c b/drivers/base/core.c

index cadcade..f636049 100644 (file)
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -574,8 +574,10 @@ static void devlink_remove_symlinks(struct device *dev,
                 return;
         }
  
-       snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
-       sysfs_remove_link(&con->kobj, buf);
+       if (device_is_registered(con)) {
+               snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup));
+               sysfs_remove_link(&con->kobj, buf);
+       }
         snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con));
         sysfs_remove_link(&sup->kobj, buf);
         kfree(buf);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c

index f37b9e3..f0cdff0 100644 (file)
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -88,6 +88,47 @@
  
  static DEFINE_IDR(loop_index_idr);
  static DEFINE_MUTEX(loop_ctl_mutex);
+static DEFINE_MUTEX(loop_validate_mutex);
+
+/**
+ * loop_global_lock_killable() - take locks for safe loop_validate_file() test
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo is about to bind another "struct loop_device", false otherwise
+ *
+ * Returns 0 on success, -EINTR otherwise.
+ *
+ * Since loop_validate_file() traverses on other "struct loop_device" if
+ * is_loop_device() is true, we need a global lock for serializing concurrent
+ * loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
+ */
+static int loop_global_lock_killable(struct loop_device *lo, bool global)
+{
+       int err;
+
+       if (global) {
+               err = mutex_lock_killable(&loop_validate_mutex);
+               if (err)
+                       return err;
+       }
+       err = mutex_lock_killable(&lo->lo_mutex);
+       if (err && global)
+               mutex_unlock(&loop_validate_mutex);
+       return err;
+}
+
+/**
+ * loop_global_unlock() - release locks taken by loop_global_lock_killable()
+ *
+ * @lo: struct loop_device
+ * @global: true if @lo was about to bind another "struct loop_device", false otherwise
+ */
+static void loop_global_unlock(struct loop_device *lo, bool global)
+{
+       mutex_unlock(&lo->lo_mutex);
+       if (global)
+               mutex_unlock(&loop_validate_mutex);
+}
  
  static int max_part;
  static int part_shift;
@@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
         while (is_loop_device(f)) {
                 struct loop_device *l;
  
+               lockdep_assert_held(&loop_validate_mutex);
                 if (f->f_mapping->host->i_rdev == bdev->bd_dev)
                         return -EBADF;
  
                 l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
-               if (l->lo_state != Lo_bound) {
+               if (l->lo_state != Lo_bound)
                         return -EINVAL;
-               }
+               /* Order wrt setting lo->lo_backing_file in loop_configure(). */
+               rmb();
                 f = l->lo_backing_file;
         }
         if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
@@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
  static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
                           unsigned int arg)
  {
-       struct file     *file = NULL, *old_file;
-       int             error;
-       bool            partscan;
+       struct file *file = fget(arg);
+       struct file *old_file;
+       int error;
+       bool partscan;
+       bool is_loop;
  
-       error = mutex_lock_killable(&lo->lo_mutex);
+       if (!file)
+               return -EBADF;
+       is_loop = is_loop_device(file);
+       error = loop_global_lock_killable(lo, is_loop);
         if (error)
-               return error;
+               goto out_putf;
         error = -ENXIO;
         if (lo->lo_state != Lo_bound)
                 goto out_err;
@@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
         if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
                 goto out_err;
  
-       error = -EBADF;
-       file = fget(arg);
-       if (!file)
-               goto out_err;
-
         error = loop_validate_file(file, bdev);
         if (error)
                 goto out_err;
@@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
         loop_update_dio(lo);
         blk_mq_unfreeze_queue(lo->lo_queue);
         partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
-       mutex_unlock(&lo->lo_mutex);
+       loop_global_unlock(lo, is_loop);
+
+       /*
+        * Flush loop_validate_file() before fput(), for l->lo_backing_file
+        * might be pointing at old_file which might be the last reference.
+        */
+       if (!is_loop) {
+               mutex_lock(&loop_validate_mutex);
+               mutex_unlock(&loop_validate_mutex);
+       }
         /*
          * We must drop file reference outside of lo_mutex as dropping
          * the file ref can take open_mutex which creates circular locking
@@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
         return 0;
  
  out_err:
-       mutex_unlock(&lo->lo_mutex);
-       if (file)
-               fput(file);
+       loop_global_unlock(lo, is_loop);
+out_putf:
+       fput(file);
         return error;
  }
  
@@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
                           struct block_device *bdev,
                           const struct loop_config *config)
  {
-       struct file     *file;
-       struct inode    *inode;
+       struct file *file = fget(config->fd);
+       struct inode *inode;
         struct address_space *mapping;
-       int             error;
-       loff_t          size;
-       bool            partscan;
-       unsigned short  bsize;
+       int error;
+       loff_t size;
+       bool partscan;
+       unsigned short bsize;
+       bool is_loop;
+
+       if (!file)
+               return -EBADF;
+       is_loop = is_loop_device(file);
  
         /* This is safe, since we have a reference from open(). */
         __module_get(THIS_MODULE);
  
-       error = -EBADF;
-       file = fget(config->fd);
-       if (!file)
-               goto out;
-
         /*
          * If we don't hold exclusive handle for the device, upgrade to it
          * here to avoid changing device under exclusive owner.
@@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
                         goto out_putf;
         }
  
-       error = mutex_lock_killable(&lo->lo_mutex);
+       error = loop_global_lock_killable(lo, is_loop);
         if (error)
                 goto out_bdev;
  
@@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
         size = get_loop_size(lo, file);
         loop_set_size(lo, size);
  
+       /* Order wrt reading lo_state in loop_validate_file(). */
+       wmb();
+
         lo->lo_state = Lo_bound;
         if (part_shift)
                 lo->lo_flags |= LO_FLAGS_PARTSCAN;
@@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
          * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
          */
         bdgrab(bdev);
-       mutex_unlock(&lo->lo_mutex);
+       loop_global_unlock(lo, is_loop);
         if (partscan)
                 loop_reread_partitions(lo);
         if (!(mode & FMODE_EXCL))
@@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
         return 0;
  
  out_unlock:
-       mutex_unlock(&lo->lo_mutex);
+       loop_global_unlock(lo, is_loop);
  out_bdev:
         if (!(mode & FMODE_EXCL))
                 bd_abort_claiming(bdev, loop_configure);
  out_putf:
         fput(file);
-out:
         /* This is safe: open() is still holding a reference. */
         module_put(THIS_MODULE);
         return error;
@@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
         int lo_number;
         struct loop_worker *pos, *worker;
  
+       /*
+        * Flush loop_configure() and loop_change_fd(). It is acceptable for
+        * loop_validate_file() to succeed, for actual clear operation has not
+        * started yet.
+        */
+       mutex_lock(&loop_validate_mutex);
+       mutex_unlock(&loop_validate_mutex);
+       /*
+        * loop_validate_file() now fails because l->lo_state != Lo_bound
+        * became visible.
+        */
+
         mutex_lock(&lo->lo_mutex);
         if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
                 err = -ENXIO;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c

index 531d390..90b947c 100644 (file)
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4100,8 +4100,6 @@ again:
  
  static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
  {
-       bool need_wait;
-
         dout("%s rbd_dev %p\n", __func__, rbd_dev);
         lockdep_assert_held_write(&rbd_dev->lock_rwsem);
  
@@ -4113,11 +4111,11 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
          */
         rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
         rbd_assert(!completion_done(&rbd_dev->releasing_wait));
-       need_wait = !list_empty(&rbd_dev->running_list);
-       downgrade_write(&rbd_dev->lock_rwsem);
-       if (need_wait)
-               wait_for_completion(&rbd_dev->releasing_wait);
-       up_read(&rbd_dev->lock_rwsem);
+       if (list_empty(&rbd_dev->running_list))
+               return true;
+
+       up_write(&rbd_dev->lock_rwsem);
+       wait_for_completion(&rbd_dev->releasing_wait);
  
         down_write(&rbd_dev->lock_rwsem);
         if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
@@ -4203,15 +4201,11 @@ static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v,
         if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
                 down_write(&rbd_dev->lock_rwsem);
                 if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
-                       /*
-                        * we already know that the remote client is
-                        * the owner
-                        */
-                       up_write(&rbd_dev->lock_rwsem);
-                       return;
+                       dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n",
+                            __func__, rbd_dev, cid.gid, cid.handle);
+               } else {
+                       rbd_set_owner_cid(rbd_dev, &cid);
                 }
-
-               rbd_set_owner_cid(rbd_dev, &cid);
                 downgrade_write(&rbd_dev->lock_rwsem);
         } else {
                 down_read(&rbd_dev->lock_rwsem);
@@ -4236,14 +4230,12 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v,
         if (!rbd_cid_equal(&cid, &rbd_empty_cid)) {
                 down_write(&rbd_dev->lock_rwsem);
                 if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) {
-                       dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n",
+                       dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n",
                              __func__, rbd_dev, cid.gid, cid.handle,
                              rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle);
-                       up_write(&rbd_dev->lock_rwsem);
-                       return;
+               } else {
+                       rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
                 }
-
-               rbd_set_owner_cid(rbd_dev, &rbd_empty_cid);
                 downgrade_write(&rbd_dev->lock_rwsem);
         } else {
                 down_read(&rbd_dev->lock_rwsem);
@@ -4951,6 +4943,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
                 disk->minors = RBD_MINORS_PER_MAJOR;
         }
         disk->fops = &rbd_bd_ops;
+       disk->private_data = rbd_dev;
  
         blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
         /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c

index 09c8ab5..b3691de 100644 (file)
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -914,7 +914,8 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev)
  }
  EXPORT_SYMBOL_GPL(fsl_mc_device_remove);
  
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+                                         u16 if_id)
  {
         struct fsl_mc_device *mc_bus_dev, *endpoint;
         struct fsl_mc_obj_desc endpoint_desc = {{ 0 }};
@@ -925,6 +926,7 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev)
         mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent);
         strcpy(endpoint1.type, mc_dev->obj_desc.type);
         endpoint1.id = mc_dev->obj_desc.id;
+       endpoint1.if_id = if_id;
  
         err = dprc_get_connection(mc_bus_dev->mc_io, 0,
                                   mc_bus_dev->mc_handle,
diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h

index 5b9ea66..bc239a1 100644 (file)
--- a/drivers/bus/mhi/core/internal.h
+++ b/drivers/bus/mhi/core/internal.h
@@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl,
                       struct image_info *img_info);
  void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl);
  int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
-                       struct mhi_chan *mhi_chan);
+                       struct mhi_chan *mhi_chan, unsigned int flags);
  int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl,
                        struct mhi_chan *mhi_chan);
  void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c

index 22acde1..8444823 100644 (file)
--- a/drivers/bus/mhi/core/main.c
+++ b/drivers/bus/mhi/core/main.c
@@ -773,11 +773,18 @@ static void mhi_process_cmd_completion(struct mhi_controller *mhi_cntrl,
         cmd_pkt = mhi_to_virtual(mhi_ring, ptr);
  
         chan = MHI_TRE_GET_CMD_CHID(cmd_pkt);
-       mhi_chan = &mhi_cntrl->mhi_chan[chan];
-       write_lock_bh(&mhi_chan->lock);
-       mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
-       complete(&mhi_chan->completion);
-       write_unlock_bh(&mhi_chan->lock);
+
+       if (chan < mhi_cntrl->max_chan &&
+           mhi_cntrl->mhi_chan[chan].configured) {
+               mhi_chan = &mhi_cntrl->mhi_chan[chan];
+               write_lock_bh(&mhi_chan->lock);
+               mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre);
+               complete(&mhi_chan->completion);
+               write_unlock_bh(&mhi_chan->lock);
+       } else {
+               dev_err(&mhi_cntrl->mhi_dev->dev,
+                       "Completion packet for invalid channel ID: %d\n", chan);
+       }
  
         mhi_del_ring_element(mhi_cntrl, mhi_ring);
  }
@@ -1423,7 +1430,7 @@ exit_unprepare_channel:
  }
  
  int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
-                       struct mhi_chan *mhi_chan)
+                       struct mhi_chan *mhi_chan, unsigned int flags)
  {
         int ret = 0;
         struct device *dev = &mhi_chan->mhi_dev->dev;
@@ -1448,6 +1455,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl,
         if (ret)
                 goto error_pm_state;
  
+       if (mhi_chan->dir == DMA_FROM_DEVICE)
+               mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS);
+       
         /* Pre-allocate buffer for xfer ring */
         if (mhi_chan->pre_alloc) {
                 int nr_el = get_nr_avail_ring_elements(mhi_cntrl,
@@ -1603,7 +1613,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan)
  }
  
  /* Move channel to start state */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags)
  {
         int ret, dir;
         struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
@@ -1614,7 +1624,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev)
                 if (!mhi_chan)
                         continue;
  
-               ret = mhi_prepare_channel(mhi_cntrl, mhi_chan);
+               ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags);
                 if (ret)
                         goto error_open_chan;
         }
diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c

index 19413da..b33b9d7 100644 (file)
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -33,6 +33,8 @@
   * @bar_num: PCI base address register to use for MHI MMIO register space
   * @dma_data_width: DMA transfer word size (32 or 64 bits)
   * @mru_default: default MRU size for MBIM network packets
+ * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead
+ *                of inband wake support (such as sdx24)
   */
  struct mhi_pci_dev_info {
         const struct mhi_controller_config *config;
@@ -42,6 +44,7 @@ struct mhi_pci_dev_info {
         unsigned int bar_num;
         unsigned int dma_data_width;
         unsigned int mru_default;
+       bool sideband_wake;
  };
  
  #define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \
@@ -74,6 +77,22 @@ struct mhi_pci_dev_info {
                 .doorbell_mode_switch = false,          \
         }
  
+#define MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(ch_num, ch_name, el_count, ev_ring) \
+       {                                               \
+               .num = ch_num,                          \
+               .name = ch_name,                        \
+               .num_elements = el_count,               \
+               .event_ring = ev_ring,                  \
+               .dir = DMA_FROM_DEVICE,                 \
+               .ee_mask = BIT(MHI_EE_AMSS),            \
+               .pollcfg = 0,                           \
+               .doorbell = MHI_DB_BRST_DISABLE,        \
+               .lpm_notify = false,                    \
+               .offload_channel = false,               \
+               .doorbell_mode_switch = false,          \
+               .auto_queue = true,                     \
+       }
+
  #define MHI_EVENT_CONFIG_CTRL(ev_ring, el_count) \
         {                                       \
                 .num_elements = el_count,       \
@@ -212,7 +231,7 @@ static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = {
         MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0),
         MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0),
         MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0),
-       MHI_CHANNEL_CONFIG_DL(21, "IPCR", 8, 0),
+       MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 8, 0),
         MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0),
         MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0),
         MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2),
@@ -244,7 +263,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx65_info = {
         .edl = "qcom/sdx65m/edl.mbn",
         .config = &modem_qcom_v1_mhiv_config,
         .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
-       .dma_data_width = 32
+       .dma_data_width = 32,
+       .sideband_wake = false,
  };
  
  static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
@@ -254,7 +274,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = {
         .config = &modem_qcom_v1_mhiv_config,
         .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
         .dma_data_width = 32,
-       .mru_default = 32768
+       .mru_default = 32768,
+       .sideband_wake = false,
  };
  
  static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = {
@@ -262,7 +283,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = {
         .edl = "qcom/prog_firehose_sdx24.mbn",
         .config = &modem_qcom_v1_mhiv_config,
         .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
-       .dma_data_width = 32
+       .dma_data_width = 32,
+       .sideband_wake = true,
  };
  
  static const struct mhi_channel_config mhi_quectel_em1xx_channels[] = {
@@ -304,7 +326,8 @@ static const struct mhi_pci_dev_info mhi_quectel_em1xx_info = {
         .edl = "qcom/prog_firehose_sdx24.mbn",
         .config = &modem_quectel_em1xx_config,
         .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
-       .dma_data_width = 32
+       .dma_data_width = 32,
+       .sideband_wake = true,
  };
  
  static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = {
@@ -342,7 +365,8 @@ static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = {
         .edl = "qcom/sdx55m/edl.mbn",
         .config = &modem_foxconn_sdx55_config,
         .bar_num = MHI_PCI_DEFAULT_BAR_NUM,
-       .dma_data_width = 32
+       .dma_data_width = 32,
+       .sideband_wake = false,
  };
  
  static const struct pci_device_id mhi_pci_id_table[] = {
@@ -643,11 +667,14 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         mhi_cntrl->status_cb = mhi_pci_status_cb;
         mhi_cntrl->runtime_get = mhi_pci_runtime_get;
         mhi_cntrl->runtime_put = mhi_pci_runtime_put;
-       mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
-       mhi_cntrl->wake_put = mhi_pci_wake_put_nop;
-       mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop;
         mhi_cntrl->mru = info->mru_default;
  
+       if (info->sideband_wake) {
+               mhi_cntrl->wake_get = mhi_pci_wake_get_nop;
+               mhi_cntrl->wake_put = mhi_pci_wake_put_nop;
+               mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop;
+       }
+
         err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width));
         if (err)
                 return err;
diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c

index be16076..f9d5b73 100644 (file)
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -92,13 +92,20 @@ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks,
  }
  EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional);
  
+static void devm_clk_bulk_release_all(struct device *dev, void *res)
+{
+       struct clk_bulk_devres *devres = res;
+
+       clk_bulk_put_all(devres->num_clks, devres->clks);
+}
+
  int __must_check devm_clk_bulk_get_all(struct device *dev,
                                        struct clk_bulk_data **clks)
  {
         struct clk_bulk_devres *devres;
         int ret;
  
-       devres = devres_alloc(devm_clk_bulk_release,
+       devres = devres_alloc(devm_clk_bulk_release_all,
                               sizeof(*devres), GFP_KERNEL);
         if (!devres)
                 return -ENOMEM;
diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c

index 18117ce..5c75e3d 100644 (file)
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -526,7 +526,7 @@ struct stm32f4_pll {
  
  struct stm32f4_pll_post_div_data {
         int idx;
-       u8 pll_num;
+       int pll_idx;
         const char *name;
         const char *parent;
         u8 flag;
@@ -557,13 +557,13 @@ static const struct clk_div_table post_divr_table[] = {
  
  #define MAX_POST_DIV 3
  static const struct stm32f4_pll_post_div_data  post_div_data[MAX_POST_DIV] = {
-       { CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q",
+       { CLK_I2SQ_PDIV, PLL_VCO_I2S, "plli2s-q-div", "plli2s-q",
                 CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL},
  
-       { CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q",
+       { CLK_SAIQ_PDIV, PLL_VCO_SAI, "pllsai-q-div", "pllsai-q",
                 CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL },
  
-       { NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT,
+       { NO_IDX, PLL_VCO_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT,
                 STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table },
  };
  
@@ -1774,7 +1774,7 @@ static void __init stm32f4_rcc_init(struct device_node *np)
                                 post_div->width,
                                 post_div->flag_div,
                                 post_div->div_table,
-                               clks[post_div->pll_num],
+                               clks[post_div->pll_idx],
                                 &stm32f4_clk_lock);
  
                 if (post_div->idx != NO_IDX)
diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig

index 5ecc37a..c1ec75a 100644 (file)
--- a/drivers/clk/hisilicon/Kconfig
+++ b/drivers/clk/hisilicon/Kconfig
@@ -18,6 +18,7 @@ config COMMON_CLK_HI3519
  config COMMON_CLK_HI3559A
         bool "Hi3559A Clock Driver"
         depends on ARCH_HISI || COMPILE_TEST
+       select RESET_HISI
         default ARCH_HISI
         help
           Build the clock driver for hi3559a.
diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c

index 800b2fe..b2c142f 100644 (file)
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -467,7 +467,7 @@ DEFINE_CLK_SMD_RPM(msm8936, sysmmnoc_clk, sysmmnoc_a_clk, QCOM_SMD_RPM_BUS_CLK,
  
  static struct clk_smd_rpm *msm8936_clks[] = {
         [RPM_SMD_PCNOC_CLK]             = &msm8916_pcnoc_clk,
-       [RPM_SMD_PCNOC_A_CLK]           = &msm8916_pcnoc_clk,
+       [RPM_SMD_PCNOC_A_CLK]           = &msm8916_pcnoc_a_clk,
         [RPM_SMD_SNOC_CLK]              = &msm8916_snoc_clk,
         [RPM_SMD_SNOC_A_CLK]            = &msm8916_snoc_a_clk,
         [RPM_SMD_BIMC_CLK]              = &msm8916_bimc_clk,
diff --git a/drivers/clk/tegra/clk-sdmmc-mux.c b/drivers/clk/tegra/clk-sdmmc-mux.c

index 316912d..4f2c330 100644 (file)
--- a/drivers/clk/tegra/clk-sdmmc-mux.c
+++ b/drivers/clk/tegra/clk-sdmmc-mux.c
@@ -194,6 +194,15 @@ static void clk_sdmmc_mux_disable(struct clk_hw *hw)
         gate_ops->disable(gate_hw);
  }
  
+static void clk_sdmmc_mux_disable_unused(struct clk_hw *hw)
+{
+       struct tegra_sdmmc_mux *sdmmc_mux = to_clk_sdmmc_mux(hw);
+       const struct clk_ops *gate_ops = sdmmc_mux->gate_ops;
+       struct clk_hw *gate_hw = &sdmmc_mux->gate.hw;
+
+       gate_ops->disable_unused(gate_hw);
+}
+
  static void clk_sdmmc_mux_restore_context(struct clk_hw *hw)
  {
         struct clk_hw *parent = clk_hw_get_parent(hw);
@@ -218,6 +227,7 @@ static const struct clk_ops tegra_clk_sdmmc_mux_ops = {
         .is_enabled = clk_sdmmc_mux_is_enabled,
         .enable = clk_sdmmc_mux_enable,
         .disable = clk_sdmmc_mux_disable,
+       .disable_unused = clk_sdmmc_mux_disable_unused,
         .restore_context = clk_sdmmc_mux_restore_context,
  };
  
diff --git a/drivers/firmware/efi/dev-path-parser.c b/drivers/firmware/efi/dev-path-parser.c

index 10d4457..eb9c65f 100644 (file)
--- a/drivers/firmware/efi/dev-path-parser.c
+++ b/drivers/firmware/efi/dev-path-parser.c
@@ -34,7 +34,6 @@ static long __init parse_acpi_path(const struct efi_dev_path *node,
                         break;
                 if (!adev->pnp.unique_id && node->acpi.uid == 0)
                         break;
-               acpi_dev_put(adev);
         }
         if (!adev)
                 return -ENODEV;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c

index 4b7ee3f..847f33f 100644 (file)
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -896,6 +896,7 @@ static int __init efi_memreserve_map_root(void)
  static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size)
  {
         struct resource *res, *parent;
+       int ret;
  
         res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
         if (!res)
@@ -908,7 +909,17 @@ static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size)
  
         /* we expect a conflict with a 'System RAM' region */
         parent = request_resource_conflict(&iomem_resource, res);
-       return parent ? request_resource(parent, res) : 0;
+       ret = parent ? request_resource(parent, res) : 0;
+
+       /*
+        * Given that efi_mem_reserve_iomem() can be called at any
+        * time, only call memblock_reserve() if the architecture
+        * keeps the infrastructure around.
+        */
+       if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !ret)
+               memblock_reserve(addr, size);
+
+       return ret;
  }
  
  int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c

index aa8da0a..ae87dde 100644 (file)
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -630,8 +630,8 @@ efi_status_t efi_load_initrd_cmdline(efi_loaded_image_t *image,
   * @image:     EFI loaded image protocol
   * @load_addr: pointer to loaded initrd
   * @load_size: size of loaded initrd
- * @soft_limit:        preferred size of allocated memory for loading the initrd
- * @hard_limit:        minimum size of allocated memory
+ * @soft_limit:        preferred address for loading the initrd
+ * @hard_limit:        upper limit address for loading the initrd
   *
   * Return:     status code
   */
diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c

index d8bc013..38722d2 100644 (file)
--- a/drivers/firmware/efi/mokvar-table.c
+++ b/drivers/firmware/efi/mokvar-table.c
@@ -180,7 +180,10 @@ void __init efi_mokvar_table_init(void)
                 pr_err("EFI MOKvar config table is not valid\n");
                 return;
         }
-       efi_mem_reserve(efi.mokvar_table, map_size_needed);
+
+       if (md.type == EFI_BOOT_SERVICES_DATA)
+               efi_mem_reserve(efi.mokvar_table, map_size_needed);
+
         efi_mokvar_table_size = map_size_needed;
  }
  
diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c

index c1955d3..8f66567 100644 (file)
--- a/drivers/firmware/efi/tpm.c
+++ b/drivers/firmware/efi/tpm.c
@@ -62,9 +62,11 @@ int __init efi_tpm_eventlog_init(void)
         tbl_size = sizeof(*log_tbl) + log_tbl->size;
         memblock_reserve(efi.tpm_log, tbl_size);
  
-       if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR ||
-           log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) {
-               pr_warn(FW_BUG "TPM Final Events table missing or invalid\n");
+       if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) {
+               pr_info("TPM Final Events table not present\n");
+               goto out;
+       } else if (log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) {
+               pr_warn(FW_BUG "TPM Final Events table invalid\n");
                 goto out;
         }
  
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c

index 4b9157a..50b321a 100644 (file)
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -405,7 +405,7 @@ static int mpc8xxx_probe(struct platform_device *pdev)
  
         ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn,
                                mpc8xxx_gpio_irq_cascade,
-                              IRQF_SHARED, "gpio-cascade",
+                              IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade",
                                mpc8xxx_gc);
         if (ret) {
                 dev_err(&pdev->dev,
diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c

index 5022e0a..0f5d17f 100644 (file)
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -238,8 +238,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
         struct resource *res;
         int ret, irq;
  
-       irq = platform_get_irq(pdev, 0);
-       if (irq < 0)
+       irq = platform_get_irq_optional(pdev, 0);
+       if (irq < 0 && irq != -ENXIO)
                 return irq;
  
         res = platform_get_resource(pdev, IORESOURCE_IO, 0);
@@ -278,7 +278,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
  
         pm_runtime_enable(&pdev->dev);
  
-       if (irq) {
+       if (irq > 0) {
                 struct irq_chip *irq_chip = &gpio->irq_chip;
                 u8 irq_status;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index c0316ea..8ac6eb9 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -619,6 +619,13 @@ struct amdgpu_video_codec_info {
         u32 max_level;
  };
  
+#define codec_info_build(type, width, height, level) \
+                        .codec_type = type,\
+                        .max_width = width,\
+                        .max_height = height,\
+                        .max_pixels_per_frame = height * width,\
+                        .max_level = level,
+
  struct amdgpu_video_codecs {
         const u32 codec_count;
         const struct amdgpu_video_codec_info *codec_array;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c

index 84a1b4b..6cc0d4f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -26,6 +26,7 @@
  #include <linux/slab.h>
  #include <linux/power_supply.h>
  #include <linux/pm_runtime.h>
+#include <linux/suspend.h>
  #include <acpi/video.h>
  #include <acpi/actbl.h>
  
@@ -1042,7 +1043,7 @@ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
  #if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE)
         if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
                 if (adev->flags & AMD_IS_APU)
-                       return true;
+                       return pm_suspend_target_state == PM_SUSPEND_TO_IDLE;
         }
  #endif
         return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index d303e88..f3fd5ec 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3504,13 +3504,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
         r = amdgpu_device_get_job_timeout_settings(adev);
         if (r) {
                 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
-               goto failed_unmap;
+               return r;
         }
  
         /* early init functions */
         r = amdgpu_device_ip_early_init(adev);
         if (r)
-               goto failed_unmap;
+               return r;
  
         /* doorbell bar mapping and doorbell index init*/
         amdgpu_device_doorbell_init(adev);
@@ -3736,10 +3736,6 @@ release_ras_con:
  failed:
         amdgpu_vf_error_trans_all(adev);
  
-failed_unmap:
-       iounmap(adev->rmmio);
-       adev->rmmio = NULL;
-
         return r;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

index abb9288..361b86b 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1190,6 +1190,10 @@ static const struct pci_device_id pciidlist[] = {
         /* Van Gogh */
         {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU},
  
+       /* Yellow Carp */
+       {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+       {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
+
         /* Navy_Flounder */
         {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
         {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c

index d0d9bc4..854fc49 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -255,6 +255,15 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
         if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
                 return -EPERM;
  
+       /* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings
+        * for debugger access to invisible VRAM. Should have used MAP_SHARED
+        * instead. Clearing VM_MAYWRITE prevents the mapping from ever
+        * becoming writable and makes is_cow_mapping(vm_flags) false.
+        */
+       if (is_cow_mapping(vma->vm_flags) &&
+           !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+               vma->vm_flags &= ~VM_MAYWRITE;
+
         return drm_gem_ttm_mmap(obj, vma);
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

index f5e9c02..a64b2c7 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3300,6 +3300,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] =
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000)
  };
@@ -3379,6 +3380,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] =
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
@@ -3445,6 +3447,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] =
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000),
+       SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000),
         SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG,  0x00000020, 0x00000020)
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c

index 94a2c07..94d029d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -64,32 +64,13 @@
  #include "smuio_v11_0.h"
  #include "smuio_v11_0_6.h"
  
-#define codec_info_build(type, width, height, level) \
-                        .codec_type = type,\
-                        .max_width = width,\
-                        .max_height = height,\
-                        .max_pixels_per_frame = height * width,\
-                        .max_level = level,
-
  static const struct amd_ip_funcs nv_common_ip_funcs;
  
  /* Navi */
  static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] =
  {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
  };
  
  static const struct amdgpu_video_codecs nv_video_codecs_encode =
@@ -101,55 +82,13 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode =
  /* Navi1x */
  static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
  {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
  };
  
  static const struct amdgpu_video_codecs nv_video_codecs_decode =
@@ -161,62 +100,14 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode =
  /* Sienna Cichlid */
  static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] =
  {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
  };
  
  static const struct amdgpu_video_codecs sc_video_codecs_decode =
@@ -228,80 +119,20 @@ static const struct amdgpu_video_codecs sc_video_codecs_decode =
  /* SRIOV Sienna Cichlid, not const since data is controlled by host */
  static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
  {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
  };
  
  static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] =
  {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 8192 * 4352,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
  };
  
  static struct amdgpu_video_codecs sriov_sc_video_codecs_encode =
@@ -333,6 +164,19 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = {
         .codec_array = NULL,
  };
  
+/* Yellow Carp*/
+static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+};
+
+static const struct amdgpu_video_codecs yc_video_codecs_decode = {
+       .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array),
+       .codec_array = yc_video_codecs_decode_array,
+};
+
  static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
                                  const struct amdgpu_video_codecs **codecs)
  {
@@ -353,12 +197,17 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode,
         case CHIP_NAVY_FLOUNDER:
         case CHIP_DIMGREY_CAVEFISH:
         case CHIP_VANGOGH:
-       case CHIP_YELLOW_CARP:
                 if (encode)
                         *codecs = &nv_video_codecs_encode;
                 else
                         *codecs = &sc_video_codecs_decode;
                 return 0;
+       case CHIP_YELLOW_CARP:
+               if (encode)
+                       *codecs = &nv_video_codecs_encode;
+               else
+                       *codecs = &yc_video_codecs_decode;
+               return 0;
         case CHIP_BEIGE_GOBY:
                 if (encode)
                         *codecs = &bg_video_codecs_encode;
@@ -1387,7 +1236,10 @@ static int nv_common_early_init(void *handle)
                         AMD_PG_SUPPORT_VCN |
                         AMD_PG_SUPPORT_VCN_DPG |
                         AMD_PG_SUPPORT_JPEG;
-               adev->external_rev_id = adev->rev_id + 0x01;
+               if (adev->pdev->device == 0x1681)
+                       adev->external_rev_id = adev->rev_id + 0x19;
+               else
+                       adev->external_rev_id = adev->rev_id + 0x01;
                 break;
         default:
                 /* FIXME: not supported yet */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c

index 618e5b6..536d41f 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -67,7 +67,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
  
         err = psp_init_asd_microcode(psp, chip_name);
         if (err)
-               goto out;
+               return err;
  
         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
         err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
@@ -80,7 +80,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
         } else {
                 err = amdgpu_ucode_validate(adev->psp.ta_fw);
                 if (err)
-                       goto out2;
+                       goto out;
  
                 ta_hdr = (const struct ta_firmware_header_v1_0 *)
                                  adev->psp.ta_fw->data;
@@ -105,10 +105,9 @@ static int psp_v12_0_init_microcode(struct psp_context *psp)
  
         return 0;
  
-out2:
+out:
         release_firmware(adev->psp.ta_fw);
         adev->psp.ta_fw = NULL;
-out:
         if (err) {
                 dev_err(adev->dev,
                         "psp v12.0: Failed to load firmware \"%s\"\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c

index b024364..b7d350b 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -88,20 +88,8 @@
  /* Vega, Raven, Arcturus */
  static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] =
  {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 2304,
-               .max_pixels_per_frame = 4096 * 2304,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
  };
  
  static const struct amdgpu_video_codecs vega_video_codecs_encode =
@@ -113,48 +101,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =
  /* Vega */
  static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
  {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
  };
  
  static const struct amdgpu_video_codecs vega_video_codecs_decode =
@@ -166,55 +118,13 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =
  /* Raven */
  static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
  {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
  };
  
  static const struct amdgpu_video_codecs rv_video_codecs_decode =
@@ -226,55 +136,13 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =
  /* Renoir, Arcturus */
  static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
  {
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 3,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 5,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 52,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 4,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 186,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
-               .max_width = 4096,
-               .max_height = 4096,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
-       {
-               .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9,
-               .max_width = 8192,
-               .max_height = 4352,
-               .max_pixels_per_frame = 4096 * 4096,
-               .max_level = 0,
-       },
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+       {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
  };
  
  static const struct amdgpu_video_codecs rn_video_codecs_decode =
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

index d3a2a5f..b53f49a 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2429,9 +2429,9 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
         max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
         min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
  
-       if (caps->ext_caps->bits.oled == 1 ||
+       if (caps->ext_caps->bits.oled == 1 /*||
             caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
-           caps->ext_caps->bits.hdr_aux_backlight_control == 1)
+           caps->ext_caps->bits.hdr_aux_backlight_control == 1*/)
                 caps->aux_support = true;
  
         if (amdgpu_backlight == 0)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c

index 6e0c5c6..a5331b9 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c
@@ -197,7 +197,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct
  
         REG_UPDATE(DENTIST_DISPCLK_CNTL,
                         DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider);
-//     REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100);
+       REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 1000);
         REG_UPDATE(DENTIST_DISPCLK_CNTL,
                         DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider);
         REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c

index 513676a..af7004b 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
@@ -190,6 +190,10 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
                         &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz,
                         &num_levels);
  
+       /* SOCCLK */
+       dcn3_init_single_clock(clk_mgr, PPCLK_SOCCLK,
+                                       &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz,
+                                       &num_levels);
         // DPREFCLK ???
  
         /* DISPCLK */
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c

index 7b7d884..4a4894e 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -48,6 +48,21 @@
  
  #include "dc_dmub_srv.h"
  
+#include "yellow_carp_offset.h"
+
+#define regCLK1_CLK_PLL_REQ                    0x0237
+#define regCLK1_CLK_PLL_REQ_BASE_IDX           0
+
+#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT    0x0
+#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT   0xc
+#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT   0x10
+#define CLK1_CLK_PLL_REQ__FbMult_int_MASK      0x000001FFL
+#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK     0x0000F000L
+#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK     0xFFFF0000L
+
+#define REG(reg_name) \
+       (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
+
  #define TO_CLK_MGR_DCN31(clk_mgr)\
         container_of(clk_mgr, struct clk_mgr_dcn31, base)
  
@@ -124,10 +139,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
          * also if safe to lower is false, we just go in the higher state
          */
         if (safe_to_lower) {
-               if (new_clocks->z9_support == DCN_Z9_SUPPORT_ALLOW &&
-                               new_clocks->z9_support != clk_mgr_base->clks.z9_support) {
+               if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_ALLOW &&
+                               new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
                         dcn31_smu_set_Z9_support(clk_mgr, true);
-                       clk_mgr_base->clks.z9_support = new_clocks->z9_support;
+                       clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
                 }
  
                 if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
@@ -148,10 +163,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
                         }
                 }
         } else {
-               if (new_clocks->z9_support == DCN_Z9_SUPPORT_DISALLOW &&
-                               new_clocks->z9_support != clk_mgr_base->clks.z9_support) {
+               if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
+                               new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
                         dcn31_smu_set_Z9_support(clk_mgr, false);
-                       clk_mgr_base->clks.z9_support = new_clocks->z9_support;
+                       clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
                 }
  
                 if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
@@ -229,7 +244,32 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base,
  
  static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
  {
-       return 0;
+       /* get FbMult value */
+       struct fixed31_32 pll_req;
+       unsigned int fbmult_frac_val = 0;
+       unsigned int fbmult_int_val = 0;
+
+       /*
+        * Register value of fbmult is in 8.16 format, we are converting to 31.32
+        * to leverage the fix point operations available in driver
+        */
+
+       REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/
+       REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */
+
+       pll_req = dc_fixpt_from_int(fbmult_int_val);
+
+       /*
+        * since fractional part is only 16 bit in register definition but is 32 bit
+        * in our fix point definiton, need to shift left by 16 to obtain correct value
+        */
+       pll_req.value |= fbmult_frac_val << 16;
+
+       /* multiply by REFCLK period */
+       pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
+
+       /* integer part is now VCO frequency in kHz */
+       return dc_fixpt_floor(pll_req);
  }
  
  static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base)
@@ -246,7 +286,7 @@ static void dcn31_init_clocks(struct clk_mgr *clk_mgr)
         clk_mgr->clks.p_state_change_support = true;
         clk_mgr->clks.prev_p_state_change_support = true;
         clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
-       clk_mgr->clks.z9_support = DCN_Z9_SUPPORT_UNKNOWN;
+       clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
  }
  
  static bool dcn31_are_clock_states_equal(struct dc_clocks *a,
@@ -260,7 +300,7 @@ static bool dcn31_are_clock_states_equal(struct dc_clocks *a,
                 return false;
         else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
                 return false;
-       else if (a->z9_support != b->z9_support)
+       else if (a->zstate_support != b->zstate_support)
                 return false;
         else if (a->dtbclk_en != b->dtbclk_en)
                 return false;
@@ -592,6 +632,7 @@ void dcn31_clk_mgr_construct(
         clk_mgr->base.dprefclk_ss_percentage = 0;
         clk_mgr->base.dprefclk_ss_divider = 1000;
         clk_mgr->base.ss_on_dprefclk = false;
+       clk_mgr->base.dfs_ref_freq_khz = 48000;
  
         clk_mgr->smu_wm_set.wm_set = (struct dcn31_watermarks *)dm_helpers_allocate_gpu_mem(
                                 clk_mgr->base.base.ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h

index cc21cf7..f8f1005 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h
@@ -27,60 +27,6 @@
  #define __DCN31_CLK_MGR_H__
  #include "clk_mgr_internal.h"
  
-//CLK1_CLK_PLL_REQ
-#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT                                                                   0x0
-#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT                                                                  0xc
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT                                                                  0x10
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK                                                                     0x000001FFL
-#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK                                                                    0x0000F000L
-#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK                                                                    0xFFFF0000L
-//CLK1_CLK0_DFS_CNTL
-#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT                                                               0x0
-#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK                                                                 0x0000007FL
-/*DPREF clock related*/
-#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT                                                               0x0
-#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK                                                                 0x0000007FL
-#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT                                                               0x0
-#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK                                                                 0x0000007FL
-#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT                                                               0x0
-#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK                                                                 0x0000007FL
-#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT                                                               0x0
-#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK                                                                 0x0000007FL
-
-//CLK3_0_CLK3_CLK_PLL_REQ
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT                                                            0x0
-#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT                                                           0xc
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT                                                           0x10
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK                                                              0x000001FFL
-#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK                                                             0x0000F000L
-#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK                                                             0xFFFF0000L
-
-#define mmCLK0_CLK3_DFS_CNTL                            0x16C60
-#define mmCLK00_CLK0_CLK3_DFS_CNTL                      0x16C60
-#define mmCLK01_CLK0_CLK3_DFS_CNTL                      0x16E60
-#define mmCLK02_CLK0_CLK3_DFS_CNTL                      0x17060
-#define mmCLK03_CLK0_CLK3_DFS_CNTL                      0x17260
-
-#define mmCLK0_CLK_PLL_REQ                              0x16C10
-#define mmCLK00_CLK0_CLK_PLL_REQ                        0x16C10
-#define mmCLK01_CLK0_CLK_PLL_REQ                        0x16E10
-#define mmCLK02_CLK0_CLK_PLL_REQ                        0x17010
-#define mmCLK03_CLK0_CLK_PLL_REQ                        0x17210
-
-#define mmCLK1_CLK_PLL_REQ                              0x1B00D
-#define mmCLK10_CLK1_CLK_PLL_REQ                        0x1B00D
-#define mmCLK11_CLK1_CLK_PLL_REQ                        0x1B20D
-#define mmCLK12_CLK1_CLK_PLL_REQ                        0x1B40D
-#define mmCLK13_CLK1_CLK_PLL_REQ                        0x1B60D
-
-#define mmCLK2_CLK_PLL_REQ                              0x17E0D
-
-/*AMCLK*/
-#define mmCLK11_CLK1_CLK0_DFS_CNTL                      0x1B23F
-#define mmCLK11_CLK1_CLK_PLL_REQ                        0x1B20D
-#endif
-
  struct dcn31_watermarks;
  
  struct dcn31_smu_watermark_set {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c

index 6da226b..9fb8c46 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1820,8 +1820,7 @@ bool perform_link_training_with_retries(
                                          */
                                         panel_mode = DP_PANEL_MODE_DEFAULT;
                                 }
-                       } else
-                               panel_mode = DP_PANEL_MODE_DEFAULT;
+                       }
                 }
  #endif
  
@@ -4650,7 +4649,10 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link)
                 }
         }
  
-       if (link->dpcd_caps.panel_mode_edp) {
+       if (link->dpcd_caps.panel_mode_edp &&
+               (link->connector_signal == SIGNAL_TYPE_EDP ||
+                (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
+                 link->is_internal_display))) {
                 return DP_PANEL_MODE_EDP;
         }
  
@@ -4914,9 +4916,7 @@ bool dc_link_set_default_brightness_aux(struct dc_link *link)
  {
         uint32_t default_backlight;
  
-       if (link &&
-               (link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 ||
-               link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) {
+       if (link && link->dpcd_sink_ext_caps.bits.oled == 1) {
                 if (!dc_link_read_default_bl_aux(link, &default_backlight))
                         default_backlight = 150000;
                 // if < 5 nits or > 5000, it might be wrong readback
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c

index a6a6724..1596f6b 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1062,7 +1062,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx)
          * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
          * did not show such problems, so this seems to be the exception.
          */
-       if (plane_state->ctx->dce_version != DCE_VERSION_11_0)
+       if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
                 pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
         else
                 pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h

index 45640f1..8dcea8f 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -354,10 +354,10 @@ enum dcn_pwr_state {
  };
  
  #if defined(CONFIG_DRM_AMD_DC_DCN)
-enum dcn_z9_support_state {
-       DCN_Z9_SUPPORT_UNKNOWN,
-       DCN_Z9_SUPPORT_ALLOW,
-       DCN_Z9_SUPPORT_DISALLOW,
+enum dcn_zstate_support_state {
+       DCN_ZSTATE_SUPPORT_UNKNOWN,
+       DCN_ZSTATE_SUPPORT_ALLOW,
+       DCN_ZSTATE_SUPPORT_DISALLOW,
  };
  #endif
  /*
@@ -378,7 +378,7 @@ struct dc_clocks {
         int dramclk_khz;
         bool p_state_change_support;
  #if defined(CONFIG_DRM_AMD_DC_DCN)
-       enum dcn_z9_support_state z9_support;
+       enum dcn_zstate_support_state zstate_support;
         bool dtbclk_en;
  #endif
         enum dcn_pwr_state pwr_state;
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h

index df6539e..0464a8f 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h
@@ -636,6 +636,7 @@ struct dce_hwseq_registers {
         uint32_t ODM_MEM_PWR_CTRL3;
         uint32_t DMU_MEM_PWR_CNTL;
         uint32_t MMHUBBUB_MEM_PWR_CNTL;
+       uint32_t DCHUBBUB_ARB_HOSTVM_CNTL;
  };
   /* set field name */
  #define HWS_SF(blk_name, reg_name, field_name, post_fix)\
@@ -1110,7 +1111,8 @@ struct dce_hwseq_registers {
         type DOMAIN_POWER_FORCEON;\
         type DOMAIN_POWER_GATE;\
         type DOMAIN_PGFSM_PWR_STATUS;\
-       type HPO_HDMISTREAMCLK_G_GATE_DIS;
+       type HPO_HDMISTREAMCLK_G_GATE_DIS;\
+       type DISABLE_HOSTVM_FORCE_ALLOW_PSTATE;
  
  struct dce_hwseq_shift {
         HWSEQ_REG_FIELD_LIST(uint8_t)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c

index 673b93f..cb9767d 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c
@@ -217,6 +217,8 @@ static void dpp1_dscl_set_lb(
         const struct line_buffer_params *lb_params,
         enum lb_memory_config mem_size_config)
  {
+       uint32_t max_partitions = 63; /* Currently hardcoded on all ASICs before DCN 3.2 */
+
         /* LB */
         if (dpp->base.caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) {
                 /* DSCL caps: pixel data processed in fixed format */
@@ -239,9 +241,12 @@ static void dpp1_dscl_set_lb(
                         LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */
         }
  
+       if (dpp->base.caps->max_lb_partitions == 31)
+               max_partitions = 31;
+
         REG_SET_2(LB_MEMORY_CTRL, 0,
                 MEMORY_CONFIG, mem_size_config,
-               LB_MAX_PARTITIONS, 63);
+               LB_MAX_PARTITIONS, max_partitions);
  }
  
  static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c

index 1b05a37..b173fa3 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -2093,8 +2093,10 @@ int dcn20_populate_dml_pipes_from_context(
                                 - timing->v_border_bottom;
                 pipes[pipe_cnt].pipe.dest.htotal = timing->h_total;
                 pipes[pipe_cnt].pipe.dest.vtotal = v_total;
-               pipes[pipe_cnt].pipe.dest.hactive = timing->h_addressable;
-               pipes[pipe_cnt].pipe.dest.vactive = timing->v_addressable;
+               pipes[pipe_cnt].pipe.dest.hactive =
+                       timing->h_addressable + timing->h_border_left + timing->h_border_right;
+               pipes[pipe_cnt].pipe.dest.vactive =
+                       timing->v_addressable + timing->v_border_top + timing->v_border_bottom;
                 pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE;
                 pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0;
                 if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING)
@@ -3079,6 +3081,37 @@ static bool is_dtbclk_required(struct dc *dc, struct dc_state *context)
         return false;
  }
  
+static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struct dc_state *context)
+{
+       int plane_count;
+       int i;
+
+       plane_count = 0;
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               if (context->res_ctx.pipe_ctx[i].plane_state)
+                       plane_count++;
+       }
+
+       /*
+        * Zstate is allowed in following scenarios:
+        *      1. Single eDP with PSR enabled
+        *      2. 0 planes (No memory requests)
+        *      3. Single eDP without PSR but > 5ms stutter period
+        */
+       if (plane_count == 0)
+               return DCN_ZSTATE_SUPPORT_ALLOW;
+       else if (context->stream_count == 1 &&  context->streams[0]->signal == SIGNAL_TYPE_EDP) {
+               struct dc_link *link = context->streams[0]->sink->link;
+
+               if ((link->link_index == 0 && link->psr_settings.psr_feature_enabled)
+                               || context->bw_ctx.dml.vba.StutterPeriod > 5000.0)
+                       return DCN_ZSTATE_SUPPORT_ALLOW;
+               else
+                       return DCN_ZSTATE_SUPPORT_DISALLOW;
+       } else
+               return DCN_ZSTATE_SUPPORT_DISALLOW;
+}
+
  void dcn20_calculate_dlg_params(
                 struct dc *dc, struct dc_state *context,
                 display_e2e_pipe_params_st *pipes,
@@ -3086,7 +3119,6 @@ void dcn20_calculate_dlg_params(
                 int vlevel)
  {
         int i, pipe_idx;
-       int plane_count;
  
         /* Writeback MCIF_WB arbitration parameters */
         dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt);
@@ -3102,17 +3134,7 @@ void dcn20_calculate_dlg_params(
                                                         != dm_dram_clock_change_unsupported;
         context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
  
-       context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ?
-                       DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW;
-
-       plane_count = 0;
-       for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (context->res_ctx.pipe_ctx[i].plane_state)
-                       plane_count++;
-       }
-
-       if (plane_count == 0)
-               context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW;
+       context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context);
  
         context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context);
  
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c

index f3d98e3..bf0a198 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -109,6 +109,7 @@ struct _vcs_dpi_ip_params_st dcn2_1_ip = {
         .max_page_table_levels = 4,
         .pte_chunk_size_kbytes = 2,
         .meta_chunk_size_kbytes = 2,
+       .min_meta_chunk_size_bytes = 256,
         .writeback_chunk_size_kbytes = 2,
         .line_buffer_size_bits = 789504,
         .is_line_buffer_bpp_fixed = 0,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c

index 2140b75..23a52d4 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
@@ -383,13 +383,6 @@ bool dpp3_get_optimal_number_of_taps(
         int min_taps_y, min_taps_c;
         enum lb_memory_config lb_config;
  
-       /* Some ASICs does not support  FP16 scaling, so we reject modes require this*/
-       if (scl_data->viewport.width  != scl_data->h_active &&
-               scl_data->viewport.height != scl_data->v_active &&
-               dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT &&
-               scl_data->format == PIXEL_FORMAT_FP16)
-               return false;
-
         if (scl_data->viewport.width > scl_data->h_active &&
                 dpp->ctx->dc->debug.max_downscale_src_width != 0 &&
                 scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width)
@@ -1440,15 +1433,6 @@ bool dpp3_construct(
         dpp->tf_shift = tf_shift;
         dpp->tf_mask = tf_mask;
  
-       dpp->lb_pixel_depth_supported =
-               LB_PIXEL_DEPTH_18BPP |
-               LB_PIXEL_DEPTH_24BPP |
-               LB_PIXEL_DEPTH_30BPP |
-               LB_PIXEL_DEPTH_36BPP;
-
-       dpp->lb_bits_per_entry = LB_BITS_PER_ENTRY;
-       dpp->lb_memory_size = LB_TOTAL_NUMBER_OF_ENTRIES; /*0x1404*/
-
         return true;
  }
  
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h

index 3fa86cd..ac644ae 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h
@@ -154,6 +154,7 @@
         SRI(COLOR_KEYER_BLUE, CNVC_CFG, id), \
         SRI(CURSOR_CONTROL, CURSOR0_, id),\
         SRI(OBUF_MEM_PWR_CTRL, DSCL, id),\
+       SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \
         SRI(DSCL_MEM_PWR_CTRL, DSCL, id)
  
  #define DPP_REG_LIST_DCN30(id)\
@@ -163,8 +164,6 @@
         SRI(CM_SHAPER_LUT_DATA, CM, id),\
         SRI(CM_MEM_PWR_CTRL2, CM, id), \
         SRI(CM_MEM_PWR_STATUS2, CM, id), \
-       SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \
-       SRI(DSCL_MEM_PWR_CTRL, DSCL, id), \
         SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_B, CM, id),\
         SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_G, CM, id),\
         SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_R, CM, id),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c

index 16a75ba..7d3ff5d 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
@@ -1398,11 +1398,18 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
                         dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
                         dcn3_02_soc.clock_limits[i].dppclk_mhz  = max_dppclk_mhz;
                         dcn3_02_soc.clock_limits[i].phyclk_mhz  = max_phyclk_mhz;
-                       dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz;
+                       /* Populate from bw_params for DTBCLK, SOCCLK */
+                       if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+                               dcn3_02_soc.clock_limits[i].dtbclk_mhz  = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz;
+                       else
+                               dcn3_02_soc.clock_limits[i].dtbclk_mhz  = bw_params->clk_table.entries[i].dtbclk_mhz;
+                       if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+                               dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz;
+                       else
+                               dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
                         /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */
-                       /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
+                       /* FCLK, PHYCLK_D18, DSCCLK */
                         dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz;
-                       dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[0].socclk_mhz;
                         dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz;
                 }
                 /* re-init DML with updated bb */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c

index 34b8946..833ab13 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -1326,11 +1326,18 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
                         dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
                         dcn3_03_soc.clock_limits[i].dppclk_mhz  = max_dppclk_mhz;
                         dcn3_03_soc.clock_limits[i].phyclk_mhz  = max_phyclk_mhz;
-                       dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz;
+                       /* Populate from bw_params for DTBCLK, SOCCLK */
+                       if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0)
+                               dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz;
+                       else
+                               dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
+                       if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
+                               dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz;
+                       else
+                               dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
                         /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */
-                       /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */
+                       /* FCLK, PHYCLK_D18, DSCCLK */
                         dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz;
-                       dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[0].socclk_mhz;
                         dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz;
                 }
                 /* re-init DML with updated bb */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c

index 836864a..6ac6faf 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
@@ -47,6 +47,7 @@
  #include "dce/dmub_outbox.h"
  #include "dc_link_dp.h"
  #include "inc/link_dpcd.h"
+#include "dcn10/dcn10_hw_sequencer.h"
  
  #define DC_LOGGER_INIT(logger)
  
@@ -594,3 +595,20 @@ bool dcn31_is_abm_supported(struct dc *dc,
         }
         return false;
  }
+
+static void apply_riommu_invalidation_wa(struct dc *dc)
+{
+       struct dce_hwseq *hws = dc->hwseq;
+
+       if (!hws->wa.early_riommu_invalidation)
+               return;
+
+       REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, 0);
+}
+
+void dcn31_init_pipes(struct dc *dc, struct dc_state *context)
+{
+       dcn10_init_pipes(dc, context);
+       apply_riommu_invalidation_wa(dc);
+
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h

index ff72f0f..40dfebe 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
@@ -52,5 +52,6 @@ void dcn31_reset_hw_ctx_wrap(
                 struct dc_state *context);
  bool dcn31_is_abm_supported(struct dc *dc,
                 struct dc_state *context, struct dc_stream_state *stream);
+void dcn31_init_pipes(struct dc *dc, struct dc_state *context);
  
  #endif /* __DC_HWSS_DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c

index e3048f8..aaf2dbd 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
@@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
         .set_flip_control_gsl = dcn20_set_flip_control_gsl,
         .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync,
         .calc_vupdate_position = dcn10_calc_vupdate_position,
-       .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations,
         .set_backlight_level = dcn21_set_backlight_level,
         .set_abm_immediate_disable = dcn21_set_abm_immediate_disable,
         .set_pipe = dcn21_set_pipe,
@@ -104,7 +103,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
  };
  
  static const struct hwseq_private_funcs dcn31_private_funcs = {
-       .init_pipes = dcn10_init_pipes,
+       .init_pipes = dcn31_init_pipes,
         .update_plane_addr = dcn20_update_plane_addr,
         .plane_atomic_disconnect = dcn10_plane_atomic_disconnect,
         .update_mpcc = dcn20_update_mpcc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c

index c67bc95..38c010a 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -220,6 +220,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
         .sr_exit_z8_time_us = 402.0,
         .sr_enter_plus_exit_z8_time_us = 520.0,
         .writeback_latency_us = 12.0,
+       .dram_channel_width_bytes = 4,
         .round_trip_ping_latency_dcfclk_cycles = 106,
         .urgent_latency_pixel_data_only_us = 4.0,
         .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
@@ -741,6 +742,7 @@ static const struct dccg_mask dccg_mask = {
  
  #define HWSEQ_DCN31_REG_LIST()\
         SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \
+       SR(DCHUBBUB_ARB_HOSTVM_CNTL), \
         SR(DIO_MEM_PWR_CTRL), \
         SR(ODM_MEM_PWR_CTRL3), \
         SR(DMU_MEM_PWR_CNTL), \
@@ -801,6 +803,7 @@ static const struct dce_hwseq_registers hwseq_reg = {
  #define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\
         HWSEQ_DCN_MASK_SH_LIST(mask_sh), \
         HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \
+       HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \
         HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
         HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \
         HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \
@@ -1299,6 +1302,7 @@ static struct dce_hwseq *dcn31_hwseq_create(
                 hws->regs = &hwseq_reg;
                 hws->shifts = &hwseq_shift;
                 hws->masks = &hwseq_mask;
+               hws->wa.early_riommu_invalidation = true;
         }
         return hws;
  }
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c

index c26e742..6655bb9 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -841,6 +841,9 @@ static bool CalculatePrefetchSchedule(
         else
                 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
  
+       // Limit to prevent overflow in DST_Y_PREFETCH register
+       *DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75);
+
         dml_print("DML: VStartup: %d\n", VStartup);
         dml_print("DML: TCalc: %f\n", TCalc);
         dml_print("DML: TWait: %f\n", TWait);
@@ -4889,7 +4892,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                 }
                         } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true)
                                         && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0]
-                                               || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode));
+                                               || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode));
  
                         if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) {
                                 mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0];
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h

index 2a0db2b..9ac9d5e 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h
@@ -289,6 +289,9 @@ struct dpp_caps {
         /* DSCL processing pixel data in fixed or float format */
         enum dscl_data_processing_format dscl_data_proc_format;
  
+       /* max LB partitions */
+       unsigned int max_lb_partitions;
+
         /* Calculates the number of partitions in the line buffer.
          * The implementation of this function is overloaded for
          * different versions of DSCL LB.
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h

index f7f7e4f..082549f 100644 (file)
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h
@@ -41,6 +41,7 @@ struct dce_hwseq_wa {
         bool DEGVIDCN10_254;
         bool DEGVIDCN21;
         bool disallow_self_refresh_during_multi_plane_transition;
+       bool early_riommu_invalidation;
  };
  
  struct hwseq_wa_state {
diff --git a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h

index 6102660..35fa0d8 100644 (file)
--- a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h
@@ -101,7 +101,8 @@
  #define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow  0x41
  
  #define PPSMC_MSG_GfxDriverResetRecovery       0x42
-#define PPSMC_Message_Count                    0x43
+#define PPSMC_MSG_BoardPowerCalibration        0x43
+#define PPSMC_Message_Count                    0x44
  
  //PPSMC Reset Types
  #define PPSMC_RESET_TYPE_WARM_RESET              0x00
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h

index 89a16dc..1d3765b 100644 (file)
--- a/drivers/gpu/drm/amd/pm/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h
@@ -225,7 +225,8 @@
         __SMU_DUMMY_MAP(DisableDeterminism),            \
         __SMU_DUMMY_MAP(SetUclkDpmMode),                \
         __SMU_DUMMY_MAP(LightSBR),                      \
-       __SMU_DUMMY_MAP(GfxDriverResetRecovery),
+       __SMU_DUMMY_MAP(GfxDriverResetRecovery),        \
+       __SMU_DUMMY_MAP(BoardPowerCalibration),
  
  #undef __SMU_DUMMY_MAP
  #define __SMU_DUMMY_MAP(type)  SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h

index 1962a58..f61b5c9 100644 (file)
--- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h
@@ -34,7 +34,7 @@
  #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xE
  #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x03
  #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF
-#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0x9
+#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0xD
  
  /* MP Apertures */
  #define MP0_Public                     0x03800000
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c

index 9316a72..cb5485c 100644 (file)
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -134,6 +134,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT
         MSG_MAP(DisableDeterminism,                  PPSMC_MSG_DisableDeterminism,              0),
         MSG_MAP(SetUclkDpmMode,                      PPSMC_MSG_SetUclkDpmMode,                  0),
         MSG_MAP(GfxDriverResetRecovery,              PPSMC_MSG_GfxDriverResetRecovery,          0),
+       MSG_MAP(BoardPowerCalibration,               PPSMC_MSG_BoardPowerCalibration,           0),
  };
  
  static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = {
@@ -440,6 +441,39 @@ static int aldebaran_setup_pptable(struct smu_context *smu)
         return ret;
  }
  
+static bool aldebaran_is_primary(struct smu_context *smu)
+{
+       struct amdgpu_device *adev = smu->adev;
+
+       if (adev->smuio.funcs && adev->smuio.funcs->get_die_id)
+               return adev->smuio.funcs->get_die_id(adev) == 0;
+
+       return true;
+}
+
+static int aldebaran_run_board_btc(struct smu_context *smu)
+{
+       u32 smu_version;
+       int ret;
+
+       if (!aldebaran_is_primary(smu))
+               return 0;
+
+       ret = smu_cmn_get_smc_version(smu, NULL, &smu_version);
+       if (ret) {
+               dev_err(smu->adev->dev, "Failed to get smu version!\n");
+               return ret;
+       }
+       if (smu_version <= 0x00441d00)
+               return 0;
+
+       ret = smu_cmn_send_smc_msg(smu, SMU_MSG_BoardPowerCalibration, NULL);
+       if (ret)
+               dev_err(smu->adev->dev, "Board power calibration failed!\n");
+
+       return ret;
+}
+
  static int aldebaran_run_btc(struct smu_context *smu)
  {
         int ret;
@@ -447,6 +481,8 @@ static int aldebaran_run_btc(struct smu_context *smu)
         ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL);
         if (ret)
                 dev_err(smu->adev->dev, "RunDcBtc failed!\n");
+       else
+               ret = aldebaran_run_board_btc(smu);
  
         return ret;
  }
@@ -524,16 +560,6 @@ static int aldebaran_freqs_in_same_level(int32_t frequency1,
         return (abs(frequency1 - frequency2) <= EPSILON);
  }
  
-static bool aldebaran_is_primary(struct smu_context *smu)
-{
-       struct amdgpu_device *adev = smu->adev;
-
-       if (adev->smuio.funcs && adev->smuio.funcs->get_die_id)
-               return adev->smuio.funcs->get_die_id(adev) == 0;
-
-       return true;
-}
-
  static int aldebaran_get_smu_metrics_data(struct smu_context *smu,
                                           MetricsMember_t member,
                                           uint32_t *value)
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c

index 98ae006..f454e04 100644 (file)
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -834,6 +834,9 @@ long drm_ioctl(struct file *filp,
         if (drm_dev_is_unplugged(dev))
                 return -ENODEV;
  
+       if (DRM_IOCTL_TYPE(cmd) != DRM_IOCTL_BASE)
+               return -ENOTTY;
+
         is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END;
  
         if (is_driver_ioctl) {
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c

index 5b6922e..aa667fa 100644 (file)
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -2166,7 +2166,8 @@ static void
  init_vbt_missing_defaults(struct drm_i915_private *i915)
  {
         enum port port;
-       int ports = PORT_A | PORT_B | PORT_C | PORT_D | PORT_E | PORT_F;
+       int ports = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) |
+                   BIT(PORT_D) | BIT(PORT_E) | BIT(PORT_F);
  
         if (!HAS_DDI(i915) && !IS_CHERRYVIEW(i915))
                 return;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c

index 3bad4e0..2d5d217 100644 (file)
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -11361,13 +11361,19 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
                 intel_ddi_init(dev_priv, PORT_B);
                 intel_ddi_init(dev_priv, PORT_C);
                 vlv_dsi_init(dev_priv);
-       } else if (DISPLAY_VER(dev_priv) >= 9) {
+       } else if (DISPLAY_VER(dev_priv) == 10) {
                 intel_ddi_init(dev_priv, PORT_A);
                 intel_ddi_init(dev_priv, PORT_B);
                 intel_ddi_init(dev_priv, PORT_C);
                 intel_ddi_init(dev_priv, PORT_D);
                 intel_ddi_init(dev_priv, PORT_E);
                 intel_ddi_init(dev_priv, PORT_F);
+       } else if (DISPLAY_VER(dev_priv) >= 9) {
+               intel_ddi_init(dev_priv, PORT_A);
+               intel_ddi_init(dev_priv, PORT_B);
+               intel_ddi_init(dev_priv, PORT_C);
+               intel_ddi_init(dev_priv, PORT_D);
+               intel_ddi_init(dev_priv, PORT_E);
         } else if (HAS_DDI(dev_priv)) {
                 u32 found;
  
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

index a8abc9a..4a6419d 100644 (file)
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,10 +25,8 @@
  #include "i915_gem_clflush.h"
  #include "i915_gem_context.h"
  #include "i915_gem_ioctls.h"
-#include "i915_sw_fence_work.h"
  #include "i915_trace.h"
  #include "i915_user_extensions.h"
-#include "i915_memcpy.h"
  
  struct eb_vma {
         struct i915_vma *vma;
@@ -1456,6 +1454,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
                 int err;
                 struct intel_engine_cs *engine = eb->engine;
  
+               /* If we need to copy for the cmdparser, we will stall anyway */
+               if (eb_use_cmdparser(eb))
+                       return ERR_PTR(-EWOULDBLOCK);
+
                 if (!reloc_can_use_engine(engine)) {
                         engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
                         if (!engine)
@@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
         return vma;
  }
  
-struct eb_parse_work {
-       struct dma_fence_work base;
-       struct intel_engine_cs *engine;
-       struct i915_vma *batch;
-       struct i915_vma *shadow;
-       struct i915_vma *trampoline;
-       unsigned long batch_offset;
-       unsigned long batch_length;
-       unsigned long *jump_whitelist;
-       const void *batch_map;
-       void *shadow_map;
-};
-
-static int __eb_parse(struct dma_fence_work *work)
-{
-       struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-       int ret;
-       bool cookie;
-
-       cookie = dma_fence_begin_signalling();
-       ret = intel_engine_cmd_parser(pw->engine,
-                                     pw->batch,
-                                     pw->batch_offset,
-                                     pw->batch_length,
-                                     pw->shadow,
-                                     pw->jump_whitelist,
-                                     pw->shadow_map,
-                                     pw->batch_map);
-       dma_fence_end_signalling(cookie);
-
-       return ret;
-}
-
-static void __eb_parse_release(struct dma_fence_work *work)
-{
-       struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
-
-       if (!IS_ERR_OR_NULL(pw->jump_whitelist))
-               kfree(pw->jump_whitelist);
-
-       if (pw->batch_map)
-               i915_gem_object_unpin_map(pw->batch->obj);
-       else
-               i915_gem_object_unpin_pages(pw->batch->obj);
-
-       i915_gem_object_unpin_map(pw->shadow->obj);
-
-       if (pw->trampoline)
-               i915_active_release(&pw->trampoline->active);
-       i915_active_release(&pw->shadow->active);
-       i915_active_release(&pw->batch->active);
-}
-
-static const struct dma_fence_work_ops eb_parse_ops = {
-       .name = "eb_parse",
-       .work = __eb_parse,
-       .release = __eb_parse_release,
-};
-
-static inline int
-__parser_mark_active(struct i915_vma *vma,
-                    struct intel_timeline *tl,
-                    struct dma_fence *fence)
-{
-       struct intel_gt_buffer_pool_node *node = vma->private;
-
-       return i915_active_ref(&node->active, tl->fence_context, fence);
-}
-
-static int
-parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl)
-{
-       int err;
-
-       mutex_lock(&tl->mutex);
-
-       err = __parser_mark_active(pw->shadow, tl, &pw->base.dma);
-       if (err)
-               goto unlock;
-
-       if (pw->trampoline) {
-               err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma);
-               if (err)
-                       goto unlock;
-       }
-
-unlock:
-       mutex_unlock(&tl->mutex);
-       return err;
-}
-
-static int eb_parse_pipeline(struct i915_execbuffer *eb,
-                            struct i915_vma *shadow,
-                            struct i915_vma *trampoline)
-{
-       struct eb_parse_work *pw;
-       struct drm_i915_gem_object *batch = eb->batch->vma->obj;
-       bool needs_clflush;
-       int err;
-
-       GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset));
-       GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length));
-
-       pw = kzalloc(sizeof(*pw), GFP_KERNEL);
-       if (!pw)
-               return -ENOMEM;
-
-       err = i915_active_acquire(&eb->batch->vma->active);
-       if (err)
-               goto err_free;
-
-       err = i915_active_acquire(&shadow->active);
-       if (err)
-               goto err_batch;
-
-       if (trampoline) {
-               err = i915_active_acquire(&trampoline->active);
-               if (err)
-                       goto err_shadow;
-       }
-
-       pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB);
-       if (IS_ERR(pw->shadow_map)) {
-               err = PTR_ERR(pw->shadow_map);
-               goto err_trampoline;
-       }
-
-       needs_clflush =
-               !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
-
-       pw->batch_map = ERR_PTR(-ENODEV);
-       if (needs_clflush && i915_has_memcpy_from_wc())
-               pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);
-
-       if (IS_ERR(pw->batch_map)) {
-               err = i915_gem_object_pin_pages(batch);
-               if (err)
-                       goto err_unmap_shadow;
-               pw->batch_map = NULL;
-       }
-
-       pw->jump_whitelist =
-               intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len,
-                                                            trampoline);
-       if (IS_ERR(pw->jump_whitelist)) {
-               err = PTR_ERR(pw->jump_whitelist);
-               goto err_unmap_batch;
-       }
-
-       dma_fence_work_init(&pw->base, &eb_parse_ops);
-
-       pw->engine = eb->engine;
-       pw->batch = eb->batch->vma;
-       pw->batch_offset = eb->batch_start_offset;
-       pw->batch_length = eb->batch_len;
-       pw->shadow = shadow;
-       pw->trampoline = trampoline;
-
-       /* Mark active refs early for this worker, in case we get interrupted */
-       err = parser_mark_active(pw, eb->context->timeline);
-       if (err)
-               goto err_commit;
-
-       err = dma_resv_reserve_shared(pw->batch->resv, 1);
-       if (err)
-               goto err_commit;
-
-       err = dma_resv_reserve_shared(shadow->resv, 1);
-       if (err)
-               goto err_commit;
-
-       /* Wait for all writes (and relocs) into the batch to complete */
-       err = i915_sw_fence_await_reservation(&pw->base.chain,
-                                             pw->batch->resv, NULL, false,
-                                             0, I915_FENCE_GFP);
-       if (err < 0)
-               goto err_commit;
-
-       /* Keep the batch alive and unwritten as we parse */
-       dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
-
-       /* Force execution to wait for completion of the parser */
-       dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
-
-       dma_fence_work_commit_imm(&pw->base);
-       return 0;
-
-err_commit:
-       i915_sw_fence_set_error_once(&pw->base.chain, err);
-       dma_fence_work_commit_imm(&pw->base);
-       return err;
-
-err_unmap_batch:
-       if (pw->batch_map)
-               i915_gem_object_unpin_map(batch);
-       else
-               i915_gem_object_unpin_pages(batch);
-err_unmap_shadow:
-       i915_gem_object_unpin_map(shadow->obj);
-err_trampoline:
-       if (trampoline)
-               i915_active_release(&trampoline->active);
-err_shadow:
-       i915_active_release(&shadow->active);
-err_batch:
-       i915_active_release(&eb->batch->vma->active);
-err_free:
-       kfree(pw);
-       return err;
-}
-
  static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
  {
         /*
@@ -2672,7 +2463,15 @@ static int eb_parse(struct i915_execbuffer *eb)
                 goto err_trampoline;
         }
  
-       err = eb_parse_pipeline(eb, shadow, trampoline);
+       err = dma_resv_reserve_shared(shadow->resv, 1);
+       if (err)
+               goto err_trampoline;
+
+       err = intel_engine_cmd_parser(eb->engine,
+                                     eb->batch->vma,
+                                     eb->batch_start_offset,
+                                     eb->batch_len,
+                                     shadow, trampoline);
         if (err)
                 goto err_unpin_batch;
  
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c

index 4df505e..16162fc 100644 (file)
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c
@@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg)
         intel_gt_pm_get(&eb.i915->gt);
  
         for_each_uabi_engine(eb.engine, eb.i915) {
+               if (intel_engine_requires_cmd_parser(eb.engine) ||
+                   intel_engine_using_cmd_parser(eb.engine))
+                       continue;
+
                 reloc_cache_init(&eb.reloc_cache, eb.i915);
                 memset(map, POISON_INUSE, 4096);
  
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c

index 98eb48c..06024d3 100644 (file)
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1977,6 +1977,21 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
         if (drm_WARN_ON(&i915->drm, !engine))
                 return -EINVAL;
  
+       /*
+        * Due to d3_entered is used to indicate skipping PPGTT invalidation on
+        * vGPU reset, it's set on D0->D3 on PCI config write, and cleared after
+        * vGPU reset if in resuming.
+        * In S0ix exit, the device power state also transite from D3 to D0 as
+        * S3 resume, but no vGPU reset (triggered by QEMU devic model). After
+        * S0ix exit, all engines continue to work. However the d3_entered
+        * remains set which will break next vGPU reset logic (miss the expected
+        * PPGTT invalidation).
+        * Engines can only work in D0. Thus the 1st elsp write gives GVT a
+        * chance to clear d3_entered.
+        */
+       if (vgpu->d3_entered)
+               vgpu->d3_entered = false;
+
         execlist = &vgpu->submission.execlist[engine->id];
  
         execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data;
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c

index 3992c25..a3b4d99 100644 (file)
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1145,19 +1145,41 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
  static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
                        struct drm_i915_gem_object *src_obj,
                        unsigned long offset, unsigned long length,
-                      void *dst, const void *src)
+                      bool *needs_clflush_after)
  {
-       bool needs_clflush =
-               !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
-
-       if (src) {
-               GEM_BUG_ON(!needs_clflush);
-               i915_unaligned_memcpy_from_wc(dst, src + offset, length);
-       } else {
-               struct scatterlist *sg;
+       unsigned int src_needs_clflush;
+       unsigned int dst_needs_clflush;
+       void *dst, *src;
+       int ret;
+
+       ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
+       if (ret)
+               return ERR_PTR(ret);
+
+       dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
+       i915_gem_object_finish_access(dst_obj);
+       if (IS_ERR(dst))
+               return dst;
+
+       ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+       if (ret) {
+               i915_gem_object_unpin_map(dst_obj);
+               return ERR_PTR(ret);
+       }
+
+       src = ERR_PTR(-ENODEV);
+       if (src_needs_clflush && i915_has_memcpy_from_wc()) {
+               src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
+               if (!IS_ERR(src)) {
+                       i915_unaligned_memcpy_from_wc(dst,
+                                                     src + offset,
+                                                     length);
+                       i915_gem_object_unpin_map(src_obj);
+               }
+       }
+       if (IS_ERR(src)) {
+               unsigned long x, n, remain;
                 void *ptr;
-               unsigned int x, sg_ofs;
-               unsigned long remain;
  
                 /*
                  * We can avoid clflushing partial cachelines before the write
@@ -1168,40 +1190,34 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
                  * validate up to the end of the batch.
                  */
                 remain = length;
-               if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+               if (dst_needs_clflush & CLFLUSH_BEFORE)
                         remain = round_up(remain,
                                           boot_cpu_data.x86_clflush_size);
  
                 ptr = dst;
                 x = offset_in_page(offset);
-               sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false);
-
-               while (remain) {
-                       unsigned long sg_max = sg->length >> PAGE_SHIFT;
-
-                       for (; remain && sg_ofs < sg_max; sg_ofs++) {
-                               unsigned long len = min(remain, PAGE_SIZE - x);
-                               void *map;
-
-                               map = kmap_atomic(nth_page(sg_page(sg), sg_ofs));
-                               if (needs_clflush)
-                                       drm_clflush_virt_range(map + x, len);
-                               memcpy(ptr, map + x, len);
-                               kunmap_atomic(map);
-
-                               ptr += len;
-                               remain -= len;
-                               x = 0;
-                       }
-
-                       sg_ofs = 0;
-                       sg = sg_next(sg);
+               for (n = offset >> PAGE_SHIFT; remain; n++) {
+                       int len = min(remain, PAGE_SIZE - x);
+
+                       src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+                       if (src_needs_clflush)
+                               drm_clflush_virt_range(src + x, len);
+                       memcpy(ptr, src + x, len);
+                       kunmap_atomic(src);
+
+                       ptr += len;
+                       remain -= len;
+                       x = 0;
                 }
         }
  
+       i915_gem_object_finish_access(src_obj);
+
         memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32));
  
         /* dst_obj is returned with vmap pinned */
+       *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
+
         return dst;
  }
  
@@ -1360,6 +1376,9 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
         if (target_cmd_index == offset)
                 return 0;
  
+       if (IS_ERR(jump_whitelist))
+               return PTR_ERR(jump_whitelist);
+
         if (!test_bit(target_cmd_index, jump_whitelist)) {
                 DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
                           jump_target);
@@ -1369,28 +1388,10 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
         return 0;
  }
  
-/**
- * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser()
- * @batch_length: length of the commands in batch_obj
- * @trampoline: Whether jump trampolines are used.
- *
- * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser().
- * This has to be preallocated, because the command parser runs in signaling context,
- * and may not allocate any memory.
- *
- * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use
- * IS_ERR() to check for errors. Must bre freed() with kfree().
- *
- * NULL is a valid value, meaning no allocation was required.
- */
-unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
-                                                           bool trampoline)
+static unsigned long *alloc_whitelist(u32 batch_length)
  {
         unsigned long *jmp;
  
-       if (trampoline)
-               return NULL;
-
         /*
          * We expect batch_length to be less than 256KiB for known users,
          * i.e. we need at most an 8KiB bitmap allocation which should be
@@ -1415,9 +1416,7 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
   * @batch_offset: byte offset in the batch at which execution starts
   * @batch_length: length of the commands in batch_obj
   * @shadow: validated copy of the batch buffer in question
- * @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist()
- * @shadow_map: mapping to @shadow vma
- * @batch_map: mapping to @batch vma
+ * @trampoline: true if we need to trampoline into privileged execution
   *
   * Parses the specified batch buffer looking for privilege violations as
   * described in the overview.
@@ -1425,21 +1424,21 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
   * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
   * if the batch appears legal but should use hardware parsing
   */
+
  int intel_engine_cmd_parser(struct intel_engine_cs *engine,
                             struct i915_vma *batch,
                             unsigned long batch_offset,
                             unsigned long batch_length,
                             struct i915_vma *shadow,
-                           unsigned long *jump_whitelist,
-                           void *shadow_map,
-                           const void *batch_map)
+                           bool trampoline)
  {
         u32 *cmd, *batch_end, offset = 0;
         struct drm_i915_cmd_descriptor default_desc = noop_desc;
         const struct drm_i915_cmd_descriptor *desc = &default_desc;
+       bool needs_clflush_after = false;
+       unsigned long *jump_whitelist;
         u64 batch_addr, shadow_addr;
         int ret = 0;
-       bool trampoline = !jump_whitelist;
  
         GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd)));
         GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd)));
@@ -1447,8 +1446,18 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
                                      batch->size));
         GEM_BUG_ON(!batch_length);
  
-       cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length,
-                        shadow_map, batch_map);
+       cmd = copy_batch(shadow->obj, batch->obj,
+                        batch_offset, batch_length,
+                        &needs_clflush_after);
+       if (IS_ERR(cmd)) {
+               DRM_DEBUG("CMD: Failed to copy batch\n");
+               return PTR_ERR(cmd);
+       }
+
+       jump_whitelist = NULL;
+       if (!trampoline)
+               /* Defer failure until attempted use */
+               jump_whitelist = alloc_whitelist(batch_length);
  
         shadow_addr = gen8_canonical_addr(shadow->node.start);
         batch_addr = gen8_canonical_addr(batch->node.start + batch_offset);
@@ -1549,6 +1558,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
  
         i915_gem_object_flush_map(shadow->obj);
  
+       if (!IS_ERR_OR_NULL(jump_whitelist))
+               kfree(jump_whitelist);
+       i915_gem_object_unpin_map(shadow->obj);
         return ret;
  }
  
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 38ff2fb..b30397b 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1906,17 +1906,12 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
  int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
  int intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
  void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
-                                                           bool trampoline);
-
  int intel_engine_cmd_parser(struct intel_engine_cs *engine,
                             struct i915_vma *batch,
                             unsigned long batch_offset,
                             unsigned long batch_length,
                             struct i915_vma *shadow,
-                           unsigned long *jump_whitelist,
-                           void *shadow_map,
-                           const void *batch_map);
+                           bool trampoline);
  #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8
  
  /* intel_device_info.c */
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 1014c71..37aef13 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1426,10 +1426,8 @@ i915_request_await_execution(struct i915_request *rq,
  
         do {
                 fence = *child++;
-               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
-                       i915_sw_fence_set_error_once(&rq->submit, fence->error);
+               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
                         continue;
-               }
  
                 if (fence->context == rq->fence.context)
                         continue;
@@ -1527,10 +1525,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
  
         do {
                 fence = *child++;
-               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
-                       i915_sw_fence_set_error_once(&rq->submit, fence->error);
+               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
                         continue;
-               }
  
                 /*
                  * Requests on the same timeline are explicitly ordered, along
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c

index 7eaa92f..e0a10f3 100644 (file)
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -325,7 +325,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
                         info->pipe_mask &= ~BIT(PIPE_C);
                         info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
                 }
-       } else if (HAS_DISPLAY(dev_priv) && GRAPHICS_VER(dev_priv) >= 9) {
+       } else if (HAS_DISPLAY(dev_priv) && DISPLAY_VER(dev_priv) >= 9) {
                 u32 dfsm = intel_de_read(dev_priv, SKL_DFSM);
  
                 if (dfsm & SKL_DFSM_PIPE_A_DISABLE) {
@@ -340,7 +340,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
                         info->pipe_mask &= ~BIT(PIPE_C);
                         info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C);
                 }
-               if (GRAPHICS_VER(dev_priv) >= 12 &&
+
+               if (DISPLAY_VER(dev_priv) >= 12 &&
                     (dfsm & TGL_DFSM_PIPE_D_DISABLE)) {
                         info->pipe_mask &= ~BIT(PIPE_D);
                         info->cpu_transcoder_mask &= ~BIT(TRANSCODER_D);
@@ -352,10 +353,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
                 if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE)
                         info->display.has_fbc = 0;
  
-               if (GRAPHICS_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
+               if (DISPLAY_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
                         info->display.has_dmc = 0;
  
-               if (GRAPHICS_VER(dev_priv) >= 10 &&
+               if (DISPLAY_VER(dev_priv) >= 10 &&
                     (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE))
                         info->display.has_dsc = 0;
         }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c

index d01c4c9..704dace 100644 (file)
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -296,7 +296,7 @@ static const struct dpu_mdp_cfg sc7180_mdp[] = {
  static const struct dpu_mdp_cfg sm8250_mdp[] = {
         {
         .name = "top_0", .id = MDP_TOP,
-       .base = 0x0, .len = 0x45C,
+       .base = 0x0, .len = 0x494,
         .features = 0,
         .highest_bank_bit = 0x3, /* TODO: 2 for LP_DDR4 */
         .clk_ctrls[DPU_CLK_CTRL_VIG0] = {
diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c

index ca96e35..c0423e7 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_catalog.c
+++ b/drivers/gpu/drm/msm/dp/dp_catalog.c
@@ -771,6 +771,7 @@ int dp_catalog_panel_timing_cfg(struct dp_catalog *dp_catalog)
         dp_write_link(catalog, REG_DP_HSYNC_VSYNC_WIDTH_POLARITY,
                                 dp_catalog->width_blanking);
         dp_write_link(catalog, REG_DP_ACTIVE_HOR_VER, dp_catalog->dp_active);
+       dp_write_p0(catalog, MMSS_DP_INTF_CONFIG, 0);
         return 0;
  }
  
diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c

index ee221d8..eaddfd7 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -1526,7 +1526,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl)
          * running. Add the global reset just before disabling the
          * link clocks and core clocks.
          */
-       ret = dp_ctrl_off(&ctrl->dp_ctrl);
+       ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl);
         if (ret) {
                 DRM_ERROR("failed to disable DP controller\n");
                 return ret;
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c

index 051c1be..867388a 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -219,6 +219,7 @@ static int dp_display_bind(struct device *dev, struct device *master,
                 goto end;
         }
  
+       dp->aux->drm_dev = drm;
         rc = dp_aux_register(dp->aux);
         if (rc) {
                 DRM_ERROR("DRM DP AUX register failed\n");
@@ -1311,6 +1312,10 @@ static int dp_pm_resume(struct device *dev)
         else
                 dp->dp_display.is_connected = false;
  
+       dp_display_handle_plugged_change(g_dp_display,
+                               dp->dp_display.is_connected);
+
+
         mutex_unlock(&dp->event_mutex);
  
         return 0;
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c

index eed2a76..bcaddbb 100644 (file)
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -142,6 +142,9 @@ static const struct iommu_flush_ops null_tlb_ops = {
         .tlb_add_page = msm_iommu_tlb_add_page,
  };
  
+static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,
+               unsigned long iova, int flags, void *arg);
+
  struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
  {
         struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(parent->dev);
@@ -157,6 +160,13 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent)
         if (!ttbr1_cfg)
                 return ERR_PTR(-ENODEV);
  
+       /*
+        * Defer setting the fault handler until we have a valid adreno_smmu
+        * to avoid accidentially installing a GPU specific fault handler for
+        * the display's iommu
+        */
+       iommu_set_fault_handler(iommu->domain, msm_fault_handler, iommu);
+
         pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL);
         if (!pagetable)
                 return ERR_PTR(-ENOMEM);
@@ -300,7 +310,6 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
  
         iommu->domain = domain;
         msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU);
-       iommu_set_fault_handler(domain, msm_fault_handler, iommu);
  
         atomic_set(&iommu->pagetables, 0);
  
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c

index 4f3a535..6d07e65 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -149,6 +149,8 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
          */
         if (bo->base.dev)
                 drm_gem_object_release(&bo->base);
+       else
+               dma_resv_fini(&bo->base._resv);
  
         kfree(nvbo);
  }
@@ -330,6 +332,10 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
         if (IS_ERR(nvbo))
                 return PTR_ERR(nvbo);
  
+       nvbo->bo.base.size = size;
+       dma_resv_init(&nvbo->bo.base._resv);
+       drm_vma_node_reset(&nvbo->bo.base.vma_node);
+
         ret = nouveau_bo_init(nvbo, size, align, domain, sg, robj);
         if (ret)
                 return ret;
diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c

index 2229f1a..46029c5 100644 (file)
--- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
+++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
@@ -447,7 +447,6 @@ static int rpi_touchscreen_remove(struct i2c_client *i2c)
         drm_panel_remove(&ts->base);
  
         mipi_dsi_device_unregister(ts->dsi);
-       kfree(ts->dsi);
  
         return 0;
  }
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c

index 21939d4..1b80290 100644 (file)
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -4166,7 +4166,7 @@ static const struct drm_display_mode yes_optoelectronics_ytc700tlag_05_201c_mode
  static const struct panel_desc yes_optoelectronics_ytc700tlag_05_201c = {
         .modes = &yes_optoelectronics_ytc700tlag_05_201c_mode,
         .num_modes = 1,
-       .bpc = 6,
+       .bpc = 8,
         .size = {
                 .width = 154,
                 .height = 90,
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c

index 1b950b4..8d7fd65 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
                 return;
         }
  
+       if (!mem)
+               return;
+
         man = ttm_manager_type(bdev, mem->mem_type);
         list_move_tail(&bo->lru, &man->lru[bo->priority]);
  
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c

index 2f57f82..763fa6f 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev,
  void ttm_mem_io_free(struct ttm_device *bdev,
                      struct ttm_resource *mem)
  {
+       if (!mem)
+               return;
+
         if (!mem->bus.offset && !mem->bus.addr)
                 return;
  
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c

index 5f31ace..74e3b46 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -44,6 +44,8 @@ static unsigned ttm_glob_use_count;
  struct ttm_global ttm_glob;
  EXPORT_SYMBOL(ttm_glob);
  
+struct dentry *ttm_debugfs_root;
+
  static void ttm_global_release(void)
  {
         struct ttm_global *glob = &ttm_glob;
@@ -53,6 +55,7 @@ static void ttm_global_release(void)
                 goto out;
  
         ttm_pool_mgr_fini();
+       debugfs_remove(ttm_debugfs_root);
  
         __free_page(glob->dummy_read_page);
         memset(glob, 0, sizeof(*glob));
@@ -73,6 +76,13 @@ static int ttm_global_init(void)
  
         si_meminfo(&si);
  
+       ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
+       if (IS_ERR(ttm_debugfs_root)) {
+               ret = PTR_ERR(ttm_debugfs_root);
+               ttm_debugfs_root = NULL;
+               goto out;
+       }
+
         /* Limit the number of pages in the pool to about 50% of the total
          * system memory.
          */
@@ -100,6 +110,10 @@ static int ttm_global_init(void)
         debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root,
                                 &glob->bo_count);
  out:
+       if (ret && ttm_debugfs_root)
+               debugfs_remove(ttm_debugfs_root);
+       if (ret)
+               --ttm_glob_use_count;
         mutex_unlock(&ttm_global_mutex);
         return ret;
  }
diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c

index 997c458..7fcdef2 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_module.c
+++ b/drivers/gpu/drm/ttm/ttm_module.c
@@ -72,22 +72,6 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp)
         return tmp;
  }
  
-struct dentry *ttm_debugfs_root;
-
-static int __init ttm_init(void)
-{
-       ttm_debugfs_root = debugfs_create_dir("ttm", NULL);
-       return 0;
-}
-
-static void __exit ttm_exit(void)
-{
-       debugfs_remove(ttm_debugfs_root);
-}
-
-module_init(ttm_init);
-module_exit(ttm_exit);
-
  MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse");
  MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)");
  MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c

index aab1b36..c287673 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -1857,38 +1857,46 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi)
         vc4_hdmi_cec_update_clk_div(vc4_hdmi);
  
         if (vc4_hdmi->variant->external_irq_controller) {
-               ret = devm_request_threaded_irq(&pdev->dev,
-                                               platform_get_irq_byname(pdev, "cec-rx"),
-                                               vc4_cec_irq_handler_rx_bare,
-                                               vc4_cec_irq_handler_rx_thread, 0,
-                                               "vc4 hdmi cec rx", vc4_hdmi);
+               ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-rx"),
+                                          vc4_cec_irq_handler_rx_bare,
+                                          vc4_cec_irq_handler_rx_thread, 0,
+                                          "vc4 hdmi cec rx", vc4_hdmi);
                 if (ret)
                         goto err_delete_cec_adap;
  
-               ret = devm_request_threaded_irq(&pdev->dev,
-                                               platform_get_irq_byname(pdev, "cec-tx"),
-                                               vc4_cec_irq_handler_tx_bare,
-                                               vc4_cec_irq_handler_tx_thread, 0,
-                                               "vc4 hdmi cec tx", vc4_hdmi);
+               ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-tx"),
+                                          vc4_cec_irq_handler_tx_bare,
+                                          vc4_cec_irq_handler_tx_thread, 0,
+                                          "vc4 hdmi cec tx", vc4_hdmi);
                 if (ret)
-                       goto err_delete_cec_adap;
+                       goto err_remove_cec_rx_handler;
         } else {
                 HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, 0xffffffff);
  
-               ret = devm_request_threaded_irq(&pdev->dev, platform_get_irq(pdev, 0),
-                                               vc4_cec_irq_handler,
-                                               vc4_cec_irq_handler_thread, 0,
-                                               "vc4 hdmi cec", vc4_hdmi);
+               ret = request_threaded_irq(platform_get_irq(pdev, 0),
+                                          vc4_cec_irq_handler,
+                                          vc4_cec_irq_handler_thread, 0,
+                                          "vc4 hdmi cec", vc4_hdmi);
                 if (ret)
                         goto err_delete_cec_adap;
         }
  
         ret = cec_register_adapter(vc4_hdmi->cec_adap, &pdev->dev);
         if (ret < 0)
-               goto err_delete_cec_adap;
+               goto err_remove_handlers;
  
         return 0;
  
+err_remove_handlers:
+       if (vc4_hdmi->variant->external_irq_controller)
+               free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi);
+       else
+               free_irq(platform_get_irq(pdev, 0), vc4_hdmi);
+
+err_remove_cec_rx_handler:
+       if (vc4_hdmi->variant->external_irq_controller)
+               free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi);
+
  err_delete_cec_adap:
         cec_delete_adapter(vc4_hdmi->cec_adap);
  
@@ -1897,6 +1905,15 @@ err_delete_cec_adap:
  
  static void vc4_hdmi_cec_exit(struct vc4_hdmi *vc4_hdmi)
  {
+       struct platform_device *pdev = vc4_hdmi->pdev;
+
+       if (vc4_hdmi->variant->external_irq_controller) {
+               free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi);
+               free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi);
+       } else {
+               free_irq(platform_get_irq(pdev, 0), vc4_hdmi);
+       }
+
         cec_unregister_adapter(vc4_hdmi->cec_adap);
  }
  #else
diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig

index 1605549..76937f7 100644 (file)
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -576,7 +576,7 @@ config HID_LOGITECH_HIDPP
         depends on HID_LOGITECH
         select POWER_SUPPLY
         help
-       Support for Logitech devices relyingon the HID++ Logitech specification
+       Support for Logitech devices relying on the HID++ Logitech specification
  
         Say Y if you want support for Logitech devices relying on the HID++
         specification. Such devices are the various Logitech Touchpads (T650,
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c

index 96e2577..8d68796 100644 (file)
--- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c
@@ -58,7 +58,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx)
         cmd_base.cmd_v2.sensor_id = sensor_idx;
         cmd_base.cmd_v2.length  = 16;
  
-       writeq(0x0, privdata->mmio + AMD_C2P_MSG2);
+       writeq(0x0, privdata->mmio + AMD_C2P_MSG1);
         writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0);
  }
  
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c

index 6b8f0d0..dc6bd42 100644 (file)
--- a/drivers/hid/hid-apple.c
+++ b/drivers/hid/hid-apple.c
@@ -501,6 +501,8 @@ static const struct hid_device_id apple_devices[] = {
                         APPLE_RDESC_JIS },
         { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
                 .driver_data = APPLE_HAS_FN },
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI),
+               .driver_data = APPLE_HAS_FN },
         { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
                 .driver_data = APPLE_HAS_FN },
         { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO),
diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c

index fca8fc7..fb807c8 100644 (file)
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -485,9 +485,6 @@ static void asus_kbd_backlight_set(struct led_classdev *led_cdev,
  {
         struct asus_kbd_leds *led = container_of(led_cdev, struct asus_kbd_leds,
                                                  cdev);
-       if (led->brightness == brightness)
-               return;
-
         led->brightness = brightness;
         schedule_work(&led->work);
  }
diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c

index f43a840..4ef1c3b 100644 (file)
--- a/drivers/hid/hid-ft260.c
+++ b/drivers/hid/hid-ft260.c
@@ -742,7 +742,7 @@ static int ft260_is_interface_enabled(struct hid_device *hdev)
         int ret;
  
         ret = ft260_get_system_config(hdev, &cfg);
-       if (ret)
+       if (ret < 0)
                 return ret;
  
         ft260_dbg("interface:  0x%02x\n", interface);
@@ -754,23 +754,16 @@ static int ft260_is_interface_enabled(struct hid_device *hdev)
         switch (cfg.chip_mode) {
         case FT260_MODE_ALL:
         case FT260_MODE_BOTH:
-               if (interface == 1) {
+               if (interface == 1)
                         hid_info(hdev, "uart interface is not supported\n");
-                       return 0;
-               }
-               ret = 1;
+               else
+                       ret = 1;
                 break;
         case FT260_MODE_UART:
-               if (interface == 0) {
-                       hid_info(hdev, "uart is unsupported on interface 0\n");
-                       ret = 0;
-               }
+               hid_info(hdev, "uart interface is not supported\n");
                 break;
         case FT260_MODE_I2C:
-               if (interface == 1) {
-                       hid_info(hdev, "i2c is unsupported on interface 1\n");
-                       ret = 0;
-               }
+               ret = 1;
                 break;
         }
         return ret;
@@ -785,7 +778,7 @@ static int ft260_byte_show(struct hid_device *hdev, int id, u8 *cfg, int len,
         if (ret < 0)
                 return ret;
  
-       return scnprintf(buf, PAGE_SIZE, "%hi\n", *field);
+       return scnprintf(buf, PAGE_SIZE, "%d\n", *field);
  }
  
  static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len,
@@ -797,7 +790,7 @@ static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len,
         if (ret < 0)
                 return ret;
  
-       return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field));
+       return scnprintf(buf, PAGE_SIZE, "%d\n", le16_to_cpu(*field));
  }
  
  #define FT260_ATTR_SHOW(name, reptype, id, type, func)                        \
@@ -1004,11 +997,9 @@ err_hid_stop:
  
  static void ft260_remove(struct hid_device *hdev)
  {
-       int ret;
         struct ft260_device *dev = hid_get_drvdata(hdev);
  
-       ret = ft260_is_interface_enabled(hdev);
-       if (ret <= 0)
+       if (!dev)
                 return;
  
         sysfs_remove_group(&hdev->dev.kobj, &ft260_attr_group);
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c

index 6b1fa97..91bf4d0 100644 (file)
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -784,6 +784,17 @@ static void hid_ishtp_cl_reset_handler(struct work_struct *work)
         }
  }
  
+static void hid_ishtp_cl_resume_handler(struct work_struct *work)
+{
+       struct ishtp_cl_data *client_data = container_of(work, struct ishtp_cl_data, resume_work);
+       struct ishtp_cl *hid_ishtp_cl = client_data->hid_ishtp_cl;
+
+       if (ishtp_wait_resume(ishtp_get_ishtp_device(hid_ishtp_cl))) {
+               client_data->suspended = false;
+               wake_up_interruptible(&client_data->ishtp_resume_wait);
+       }
+}
+
  ishtp_print_log ishtp_hid_print_trace;
  
  /**
@@ -822,6 +833,8 @@ static int hid_ishtp_cl_probe(struct ishtp_cl_device *cl_device)
         init_waitqueue_head(&client_data->ishtp_resume_wait);
  
         INIT_WORK(&client_data->work, hid_ishtp_cl_reset_handler);
+       INIT_WORK(&client_data->resume_work, hid_ishtp_cl_resume_handler);
+
  
         ishtp_hid_print_trace = ishtp_trace_callback(cl_device);
  
@@ -921,7 +934,7 @@ static int hid_ishtp_cl_resume(struct device *device)
  
         hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__,
                         hid_ishtp_cl);
-       client_data->suspended = false;
+       schedule_work(&client_data->resume_work);
         return 0;
  }
  
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h

index f88443a..6a5cc11 100644 (file)
--- a/drivers/hid/intel-ish-hid/ishtp-hid.h
+++ b/drivers/hid/intel-ish-hid/ishtp-hid.h
@@ -135,6 +135,7 @@ struct ishtp_cl_data {
         int multi_packet_cnt;
  
         struct work_struct work;
+       struct work_struct resume_work;
         struct ishtp_cl_device *cl_device;
  };
  
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c

index f0802b0..aa2c516 100644 (file)
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -314,13 +314,6 @@ static int ishtp_cl_device_resume(struct device *dev)
         if (!device)
                 return 0;
  
-       /*
-        * When ISH needs hard reset, it is done asynchrnously, hence bus
-        * resume will  be called before full ISH resume
-        */
-       if (device->ishtp_dev->resume_flag)
-               return 0;
-
         driver = to_ishtp_cl_driver(dev->driver);
         if (driver && driver->driver.pm) {
                 if (driver->driver.pm->resume)
@@ -849,6 +842,28 @@ struct device *ishtp_device(struct ishtp_cl_device *device)
  }
  EXPORT_SYMBOL(ishtp_device);
  
+/**
+ * ishtp_wait_resume() - Wait for IPC resume
+ *
+ * Wait for IPC resume
+ *
+ * Return: resume complete or not
+ */
+bool ishtp_wait_resume(struct ishtp_device *dev)
+{
+       /* 50ms to get resume response */
+       #define WAIT_FOR_RESUME_ACK_MS          50
+
+       /* Waiting to get resume response */
+       if (dev->resume_flag)
+               wait_event_interruptible_timeout(dev->resume_wait,
+                                                !dev->resume_flag,
+                                                msecs_to_jiffies(WAIT_FOR_RESUME_ACK_MS));
+
+       return (!dev->resume_flag);
+}
+EXPORT_SYMBOL_GPL(ishtp_wait_resume);
+
  /**
   * ishtp_get_pci_device() - Return PCI device dev pointer
   * This interface is used to return PCI device pointer
diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig

index dcf3a23..7c2032f 100644 (file)
--- a/drivers/hid/usbhid/Kconfig
+++ b/drivers/hid/usbhid/Kconfig
@@ -38,7 +38,7 @@ config USB_HIDDEV
         help
           Say Y here if you want to support HID devices (from the USB
           specification standpoint) that aren't strictly user interface
-         devices, like monitor controls and Uninterruptable Power Supplies.
+         devices, like monitor controls and Uninterruptible Power Supplies.
  
           This module supports these devices separately using a separate
           event interface on /dev/usb/hiddevX (char 180:96 to 180:111).
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c

index 81d7d12..81ba642 100644 (file)
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2548,6 +2548,9 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac,
                 int slot;
  
                 slot = input_mt_get_slot_by_key(input, hid_data->id);
+               if (slot < 0)
+                       return;
+
                 input_mt_slot(input, slot);
                 input_mt_report_slot_state(input, MT_TOOL_FINGER, prox);
         }
@@ -3831,7 +3834,7 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev,
                     wacom_wac->shared->touch->product == 0xF6) {
                         input_dev->evbit[0] |= BIT_MASK(EV_SW);
                         __set_bit(SW_MUTE_DEVICE, input_dev->swbit);
-                       wacom_wac->shared->has_mute_touch_switch = true;
+                       wacom_wac->has_mute_touch_switch = true;
                 }
                 fallthrough;
  
diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c

index 6d5014e..a6ea1eb 100644 (file)
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -635,8 +635,8 @@ static irqreturn_t mpc_i2c_isr(int irq, void *dev_id)
  
         status = readb(i2c->base + MPC_I2C_SR);
         if (status & CSR_MIF) {
-               /* Read again to allow register to stabilise */
-               status = readb(i2c->base + MPC_I2C_SR);
+               /* Wait up to 100us for transfer to properly complete */
+               readb_poll_timeout(i2c->base + MPC_I2C_SR, status, !(status & CSR_MCF), 0, 100);
                 writeb(0, i2c->base + MPC_I2C_SR);
                 mpc_i2c_do_intr(i2c, status);
                 return IRQ_HANDLED;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c

index d567402..a8688a9 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -120,6 +120,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
         if (!chip_ctx)
                 return -ENOMEM;
         chip_ctx->chip_num = bp->chip_num;
+       chip_ctx->hw_stats_size = bp->hw_ring_stats_size;
  
         rdev->chip_ctx = chip_ctx;
         /* rest members to follow eventually */
@@ -550,6 +551,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
                                        dma_addr_t dma_map,
                                        u32 *fw_stats_ctx_id)
  {
+       struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx;
         struct hwrm_stat_ctx_alloc_output resp = {0};
         struct hwrm_stat_ctx_alloc_input req = {0};
         struct bnxt_en_dev *en_dev = rdev->en_dev;
@@ -566,7 +568,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
         bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
         req.update_period_ms = cpu_to_le32(1000);
         req.stats_dma_addr = cpu_to_le64(dma_map);
-       req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext));
+       req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size);
         req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE;
         bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
                             sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c

index 17f0701..44282a8 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -56,6 +56,7 @@
  static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
                                       struct bnxt_qplib_stats *stats);
  static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+                                     struct bnxt_qplib_chip_ctx *cctx,
                                       struct bnxt_qplib_stats *stats);
  
  /* PBL */
@@ -559,7 +560,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
                 goto fail;
  stats_alloc:
         /* Stats */
-       rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats);
+       rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats);
         if (rc)
                 goto fail;
  
@@ -889,15 +890,12 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
  }
  
  static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+                                     struct bnxt_qplib_chip_ctx *cctx,
                                       struct bnxt_qplib_stats *stats)
  {
         memset(stats, 0, sizeof(*stats));
         stats->fw_id = -1;
-       /* 128 byte aligned context memory is required only for 57500.
-        * However making this unconditional, it does not harm previous
-        * generation.
-        */
-       stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128);
+       stats->size = cctx->hw_stats_size;
         stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
                                         &stats->dma_map, GFP_KERNEL);
         if (!stats->dma) {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h

index c291f49..9103150 100644 (file)
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -54,6 +54,7 @@ struct bnxt_qplib_chip_ctx {
         u16     chip_num;
         u8      chip_rev;
         u8      chip_metal;
+       u16     hw_stats_size;
         struct bnxt_qplib_drv_modes modes;
  };
  
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c

index b1023a7..f1e5515 100644 (file)
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -2845,7 +2845,7 @@ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf,
   * parses fpm commit info and copy base value
   * of hmc objects in hmc_info
   */
-static enum irdma_status_code
+static void
  irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
                               struct irdma_hmc_obj_info *info, u32 *sd)
  {
@@ -2915,7 +2915,6 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf,
         else
                 *sd = (u32)(size >> 21);
  
-       return 0;
  }
  
  /**
@@ -4187,11 +4186,9 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
   * @dev: sc device struct
   * @count: allocate count
   */
-enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count)
  {
         writel(count, dev->hw_regs[IRDMA_AEQALLOC]);
-
-       return 0;
  }
  
  /**
@@ -4434,9 +4431,9 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev,
         ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id,
                                            &commit_fpm_mem, true, wait_type);
         if (!ret_code)
-               ret_code = irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
-                                                        hmc_info->hmc_obj,
-                                                        &hmc_info->sd_table.sd_cnt);
+               irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf,
+                                             hmc_info->hmc_obj,
+                                             &hmc_info->sd_table.sd_cnt);
         print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16,
                              8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE,
                              false);
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c

index 7afb8a6..00de5ee 100644 (file)
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -1920,7 +1920,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf)
   * irdma_set_hw_rsrc - set hw memory resources.
   * @rf: RDMA PCI function
   */
-static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf)
+static void irdma_set_hw_rsrc(struct irdma_pci_f *rf)
  {
         rf->allocated_qps = (void *)(rf->mem_rsrc +
                    (sizeof(struct irdma_arp_entry) * rf->arp_table_size));
@@ -1937,8 +1937,6 @@ static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf)
         spin_lock_init(&rf->arp_lock);
         spin_lock_init(&rf->qptable_lock);
         spin_lock_init(&rf->qh_list_lock);
-
-       return 0;
  }
  
  /**
@@ -2000,9 +1998,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
  
         rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc;
  
-       ret = irdma_set_hw_rsrc(rf);
-       if (ret)
-               goto set_hw_rsrc_fail;
+       irdma_set_hw_rsrc(rf);
  
         set_bit(0, rf->allocated_mrs);
         set_bit(0, rf->allocated_qps);
@@ -2025,9 +2021,6 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
  
         return 0;
  
-set_hw_rsrc_fail:
-       kfree(rf->mem_rsrc);
-       rf->mem_rsrc = NULL;
  mem_rsrc_kzalloc_fail:
         kfree(rf->allocated_ws_nodes);
         rf->allocated_ws_nodes = NULL;
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c

index ea59432..51a4135 100644 (file)
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -215,10 +215,10 @@ static void irdma_remove(struct auxiliary_device *aux_dev)
         pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn));
  }
  
-static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf)
+static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf,
+                                  struct ice_vsi *vsi)
  {
         struct irdma_pci_f *rf = iwdev->rf;
-       struct ice_vsi *vsi = ice_get_main_vsi(pf);
  
         rf->cdev = pf;
         rf->gen_ops.register_qset = irdma_lan_register_qset;
@@ -253,12 +253,15 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
                                                             struct iidc_auxiliary_dev,
                                                             adev);
         struct ice_pf *pf = iidc_adev->pf;
+       struct ice_vsi *vsi = ice_get_main_vsi(pf);
         struct iidc_qos_params qos_info = {};
         struct irdma_device *iwdev;
         struct irdma_pci_f *rf;
         struct irdma_l2params l2params = {};
         int err;
  
+       if (!vsi)
+               return -EIO;
         iwdev = ib_alloc_device(irdma_device, ibdev);
         if (!iwdev)
                 return -ENOMEM;
@@ -268,7 +271,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
                 return -ENOMEM;
         }
  
-       irdma_fill_device_info(iwdev, pf);
+       irdma_fill_device_info(iwdev, pf, vsi);
         rf = iwdev->rf;
  
         if (irdma_ctrl_init_hw(rf)) {
diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h

index 7387b83..874bc25 100644 (file)
--- a/drivers/infiniband/hw/irdma/type.h
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -1222,8 +1222,7 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq,
                                          struct irdma_aeq_init_info *info);
  enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
                                               struct irdma_aeqe_info *info);
-enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev,
-                                                  u32 count);
+void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count);
  
  void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id,
                       int abi_ver);
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c

index a6d52c2..5fb92de 100644 (file)
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -931,7 +931,7 @@ enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp,
  enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
                                              struct irdma_post_rq_info *info)
  {
-       u32 total_size = 0, wqe_idx, i, byte_off;
+       u32 wqe_idx, i, byte_off;
         u32 addl_frag_cnt;
         __le64 *wqe;
         u64 hdr;
@@ -939,9 +939,6 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp,
         if (qp->max_rq_frag_cnt < info->num_sges)
                 return IRDMA_ERR_INVALID_FRAG_COUNT;
  
-       for (i = 0; i < info->num_sges; i++)
-               total_size += info->sg_list[i].len;
-
         wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx);
         if (!wqe)
                 return IRDMA_ERR_QP_TOOMANY_WRS_POSTED;
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c

index 9712f69..717147e 100644 (file)
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -557,7 +557,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
   * @iwqp: qp ptr
   * @init_info: initialize info to return
   */
-static int irdma_setup_virt_qp(struct irdma_device *iwdev,
+static void irdma_setup_virt_qp(struct irdma_device *iwdev,
                                struct irdma_qp *iwqp,
                                struct irdma_qp_init_info *init_info)
  {
@@ -574,8 +574,6 @@ static int irdma_setup_virt_qp(struct irdma_device *iwdev,
                 init_info->sq_pa = qpmr->sq_pbl.addr;
                 init_info->rq_pa = qpmr->rq_pbl.addr;
         }
-
-       return 0;
  }
  
  /**
@@ -914,7 +912,7 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd,
                         }
                 }
                 init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
-               err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info);
+               irdma_setup_virt_qp(iwdev, iwqp, &init_info);
         } else {
                 init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
                 err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c

index 7abeb57..18b55d2 100644 (file)
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -997,7 +997,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
                                   MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD));
         MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
         MLX5_SET(cqc, cqc, uar_page, index);
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
         MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
         if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN)
                 MLX5_SET(cqc, cqc, oi, 1);
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c

index eb9b0a2..e994aef 100644 (file)
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1437,11 +1437,10 @@ out:
         rcu_read_unlock();
  }
  
-static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in)
+static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in)
  {
         if (!MLX5_CAP_GEN(dev->mdev, apu) ||
-           !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
-                     apu_thread_cq))
+           !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq))
                 return false;
  
         return true;
@@ -1501,7 +1500,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
                 err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
                                            cmd_in_len, cmd_out, cmd_out_len);
         } else if (opcode == MLX5_CMD_OP_CREATE_CQ &&
-                  !is_apu_thread_cq(dev, cmd_in)) {
+                  !is_apu_cq(dev, cmd_in)) {
                 obj->flags |= DEVX_OBJ_FLAGS_CQ;
                 obj->core_cq.comp = devx_cq_comp;
                 err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c

index b25e0b3..5282148 100644 (file)
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -8,13 +8,15 @@
  #include "srq.h"
  
  static int
-mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
+mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
+                     struct mlx5_eswitch_rep *rep,
+                     int vport_index)
  {
         struct mlx5_ib_dev *ibdev;
-       int vport_index;
  
         ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
-       vport_index = rep->vport_index;
+       if (!ibdev)
+               return -EINVAL;
  
         ibdev->port[vport_index].rep = rep;
         rep->rep_data[REP_IB].priv = ibdev;
@@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
         return 0;
  }
  
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
+
  static int
  mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
  {
         u32 num_ports = mlx5_eswitch_get_total_vports(dev);
         const struct mlx5_ib_profile *profile;
+       struct mlx5_core_dev *peer_dev;
         struct mlx5_ib_dev *ibdev;
+       u32 peer_num_ports;
         int vport_index;
         int ret;
  
+       vport_index = rep->vport_index;
+
+       if (mlx5_lag_is_shared_fdb(dev)) {
+               peer_dev = mlx5_lag_get_peer_mdev(dev);
+               peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev);
+               if (mlx5_lag_is_master(dev)) {
+                       /* Only 1 ib port is the representor for both uplinks */
+                       num_ports += peer_num_ports - 1;
+               } else {
+                       if (rep->vport == MLX5_VPORT_UPLINK)
+                               return 0;
+                       vport_index += peer_num_ports;
+                       dev = peer_dev;
+               }
+       }
+
         if (rep->vport == MLX5_VPORT_UPLINK)
                 profile = &raw_eth_profile;
         else
-               return mlx5_ib_set_vport_rep(dev, rep);
+               return mlx5_ib_set_vport_rep(dev, rep, vport_index);
  
         ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
         if (!ibdev)
@@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
                 goto fail_add;
  
         rep->rep_data[REP_IB].priv = ibdev;
+       if (mlx5_lag_is_shared_fdb(dev))
+               mlx5_ib_register_peer_vport_reps(dev);
  
         return 0;
  
@@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
  static void
  mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
  {
+       struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw);
         struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
+       int vport_index = rep->vport_index;
         struct mlx5_ib_port *port;
  
-       port = &dev->port[rep->vport_index];
+       if (WARN_ON(!mdev))
+               return;
+
+       if (mlx5_lag_is_shared_fdb(mdev) &&
+           !mlx5_lag_is_master(mdev)) {
+               struct mlx5_core_dev *peer_mdev;
+
+               if (rep->vport == MLX5_VPORT_UPLINK)
+                       return;
+               peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+               vport_index += mlx5_eswitch_get_total_vports(peer_mdev);
+       }
+
+       if (!dev)
+               return;
+
+       port = &dev->port[vport_index];
         write_lock(&port->roce.netdev_lock);
         port->roce.netdev = NULL;
         write_unlock(&port->roce.netdev_lock);
         rep->rep_data[REP_IB].priv = NULL;
         port->rep = NULL;
  
-       if (rep->vport == MLX5_VPORT_UPLINK)
+       if (rep->vport == MLX5_VPORT_UPLINK) {
+               struct mlx5_core_dev *peer_mdev;
+               struct mlx5_eswitch *esw;
+
+               if (mlx5_lag_is_shared_fdb(mdev)) {
+                       peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+                       esw = peer_mdev->priv.eswitch;
+                       mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
+               }
                 __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+       }
  }
  
  static const struct mlx5_eswitch_rep_ops rep_ops = {
@@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = {
         .get_proto_dev = mlx5_ib_rep_to_dev,
  };
  
+static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
+{
+       struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev);
+       struct mlx5_eswitch *esw;
+
+       if (!peer_mdev)
+               return;
+
+       esw = peer_mdev->priv.eswitch;
+       mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB);
+}
+
  struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
                                           u16 vport_num)
  {
@@ -123,7 +186,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
  
         rep = dev->port[port - 1].rep;
  
-       return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn);
+       return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn);
  }
  
  static int mlx5r_rep_probe(struct auxiliary_device *adev,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c

index 094c976..ae05e14 100644 (file)
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -126,6 +126,7 @@ static int get_port_state(struct ib_device *ibdev,
  
  static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
                                            struct net_device *ndev,
+                                          struct net_device *upper,
                                            u32 *port_num)
  {
         struct net_device *rep_ndev;
@@ -137,6 +138,14 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
                 if (!port->rep)
                         continue;
  
+               if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) {
+                       *port_num = i + 1;
+                       return &port->roce;
+               }
+
+               if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
+                       continue;
+
                 read_lock(&port->roce.netdev_lock);
                 rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
                                                   port->rep->vport);
@@ -196,11 +205,12 @@ static int mlx5_netdev_event(struct notifier_block *this,
                 }
  
                 if (ibdev->is_rep)
-                       roce = mlx5_get_rep_roce(ibdev, ndev, &port_num);
+                       roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
                 if (!roce)
                         return NOTIFY_DONE;
-               if ((upper == ndev || (!upper && ndev == roce->netdev))
-                   && ibdev->ib_active) {
+               if ((upper == ndev ||
+                    ((!upper || ibdev->is_rep) && ndev == roce->netdev)) &&
+                   ibdev->ib_active) {
                         struct ib_event ibev = { };
                         enum ib_port_state port_state;
  
@@ -3012,7 +3022,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
         struct mlx5_flow_table *ft;
         int err;
  
-       if (!ns || !mlx5_lag_is_roce(mdev))
+       if (!ns || !mlx5_lag_is_active(mdev))
                 return 0;
  
         err = mlx5_cmd_create_vport_lag(mdev);
@@ -3074,9 +3084,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
  {
         int err;
  
-       err = mlx5_nic_vport_enable_roce(dev->mdev);
-       if (err)
-               return err;
+       if (!dev->is_rep && dev->profile != &raw_eth_profile) {
+               err = mlx5_nic_vport_enable_roce(dev->mdev);
+               if (err)
+                       return err;
+       }
  
         err = mlx5_eth_lag_init(dev);
         if (err)
@@ -3085,7 +3097,8 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
         return 0;
  
  err_disable_roce:
-       mlx5_nic_vport_disable_roce(dev->mdev);
+       if (!dev->is_rep && dev->profile != &raw_eth_profile)
+               mlx5_nic_vport_disable_roce(dev->mdev);
  
         return err;
  }
@@ -3093,7 +3106,8 @@ err_disable_roce:
  static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
  {
         mlx5_eth_lag_cleanup(dev);
-       mlx5_nic_vport_disable_roce(dev->mdev);
+       if (!dev->is_rep && dev->profile != &raw_eth_profile)
+               mlx5_nic_vport_disable_roce(dev->mdev);
  }
  
  static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num,
@@ -3950,12 +3964,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
  
                 /* Register only for native ports */
                 err = mlx5_add_netdev_notifier(dev, port_num);
-               if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev))
-                       /*
-                        * We don't enable ETH interface for
-                        * 1. IB representors
-                        * 2. User disabled ROCE through devlink interface
-                        */
+               if (err)
                         return err;
  
                 err = mlx5_enable_eth(dev);
@@ -3980,8 +3989,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
         ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
  
         if (ll == IB_LINK_LAYER_ETHERNET) {
-               if (!dev->is_rep)
-                       mlx5_disable_eth(dev);
+               mlx5_disable_eth(dev);
  
                 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
                 mlx5_remove_netdev_notifier(dev, port_num);
@@ -4037,7 +4045,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
  {
         const char *name;
  
-       if (!mlx5_lag_is_roce(dev->mdev))
+       if (!mlx5_lag_is_active(dev->mdev))
                 name = "mlx5_%d";
         else
                 name = "mlx5_bond_%d";
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c

index c0ddf7b..bbfcce3 100644 (file)
--- a/drivers/infiniband/hw/mlx5/std_types.c
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -114,14 +114,18 @@ out:
  static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
                                struct mlx5_ib_uapi_query_port *info)
  {
-       struct mlx5_core_dev *mdev = dev->mdev;
         struct mlx5_eswitch_rep *rep;
+       struct mlx5_core_dev *mdev;
         int err;
  
         rep = dev->port[port_num - 1].rep;
         if (!rep)
                 return -EOPNOTSUPP;
  
+       mdev = mlx5_eswitch_get_core_dev(rep->esw);
+       if (!mdev)
+               return -EINVAL;
+
         info->vport = rep->vport;
         info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT;
  
@@ -138,9 +142,9 @@ static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
         if (err)
                 return err;
  
-       if (mlx5_eswitch_vport_match_metadata_enabled(mdev->priv.eswitch)) {
+       if (mlx5_eswitch_vport_match_metadata_enabled(rep->esw)) {
                 info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(
-                       mdev->priv.eswitch, rep->vport);
+                       rep->esw, rep->vport);
                 info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
                 info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0;
         }
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c

index 6aabcb4..be4bcb4 100644 (file)
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -113,13 +113,14 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
         int                     num_buf;
         void                    *vaddr;
         int err;
+       int i;
  
         umem = ib_umem_get(pd->ibpd.device, start, length, access);
         if (IS_ERR(umem)) {
-               pr_warn("err %d from rxe_umem_get\n",
-                       (int)PTR_ERR(umem));
+               pr_warn("%s: Unable to pin memory region err = %d\n",
+                       __func__, (int)PTR_ERR(umem));
                 err = PTR_ERR(umem);
-               goto err1;
+               goto err_out;
         }
  
         mr->umem = umem;
@@ -129,9 +130,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
  
         err = rxe_mr_alloc(mr, num_buf);
         if (err) {
-               pr_warn("err %d from rxe_mr_alloc\n", err);
-               ib_umem_release(umem);
-               goto err1;
+               pr_warn("%s: Unable to allocate memory for map\n",
+                               __func__);
+               goto err_release_umem;
         }
  
         mr->page_shift = PAGE_SHIFT;
@@ -151,10 +152,10 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
  
                         vaddr = page_address(sg_page_iter_page(&sg_iter));
                         if (!vaddr) {
-                               pr_warn("null vaddr\n");
-                               ib_umem_release(umem);
+                               pr_warn("%s: Unable to get virtual address\n",
+                                               __func__);
                                 err = -ENOMEM;
-                               goto err1;
+                               goto err_cleanup_map;
                         }
  
                         buf->addr = (uintptr_t)vaddr;
@@ -177,7 +178,13 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
  
         return 0;
  
-err1:
+err_cleanup_map:
+       for (i = 0; i < mr->num_map; i++)
+               kfree(mr->map[i]);
+       kfree(mr->map);
+err_release_umem:
+       ib_umem_release(umem);
+err_out:
         return err;
  }
  
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c

index 02281d1..508ac29 100644 (file)
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -1573,6 +1573,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
                   struct media_request *req)
  {
         struct vb2_buffer *vb;
+       enum vb2_buffer_state orig_state;
         int ret;
  
         if (q->error) {
@@ -1673,6 +1674,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
          * Add to the queued buffers list, a buffer will stay on it until
          * dequeued in dqbuf.
          */
+       orig_state = vb->state;
         list_add_tail(&vb->queued_entry, &q->queued_list);
         q->queued_count++;
         q->waiting_for_buffers = false;
@@ -1703,8 +1705,17 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
         if (q->streaming && !q->start_streaming_called &&
             q->queued_count >= q->min_buffers_needed) {
                 ret = vb2_start_streaming(q);
-               if (ret)
+               if (ret) {
+                       /*
+                        * Since vb2_core_qbuf will return with an error,
+                        * we should return it to state DEQUEUED since
+                        * the error indicates that the buffer wasn't queued.
+                        */
+                       list_del(&vb->queued_entry);
+                       q->queued_count--;
+                       vb->state = orig_state;
                         return ret;
+               }
         }
  
         dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index);
diff --git a/drivers/media/pci/intel/ipu3/cio2-bridge.c b/drivers/media/pci/intel/ipu3/cio2-bridge.c

index 4657e99..59a36f9 100644 (file)
--- a/drivers/media/pci/intel/ipu3/cio2-bridge.c
+++ b/drivers/media/pci/intel/ipu3/cio2-bridge.c
@@ -173,10 +173,8 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg,
         int ret;
  
         for_each_acpi_dev_match(adev, cfg->hid, NULL, -1) {
-               if (!adev->status.enabled) {
-                       acpi_dev_put(adev);
+               if (!adev->status.enabled)
                         continue;
-               }
  
                 if (bridge->n_sensors >= CIO2_NUM_PORTS) {
                         acpi_dev_put(adev);
@@ -185,7 +183,6 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg,
                 }
  
                 sensor = &bridge->sensors[bridge->n_sensors];
-               sensor->adev = adev;
                 strscpy(sensor->name, cfg->hid, sizeof(sensor->name));
  
                 ret = cio2_bridge_read_acpi_buffer(adev, "SSDB",
@@ -215,6 +212,7 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg,
                         goto err_free_swnodes;
                 }
  
+               sensor->adev = acpi_dev_get(adev);
                 adev->fwnode.secondary = fwnode;
  
                 dev_info(&cio2->dev, "Found supported sensor %s\n",
diff --git a/drivers/media/pci/ngene/ngene-core.c b/drivers/media/pci/ngene/ngene-core.c

index 07f342d..7481f55 100644 (file)
--- a/drivers/media/pci/ngene/ngene-core.c
+++ b/drivers/media/pci/ngene/ngene-core.c
@@ -385,7 +385,7 @@ static int ngene_command_config_free_buf(struct ngene *dev, u8 *config)
  
         com.cmd.hdr.Opcode = CMD_CONFIGURE_FREE_BUFFER;
         com.cmd.hdr.Length = 6;
-       memcpy(&com.cmd.ConfigureBuffers.config, config, 6);
+       memcpy(&com.cmd.ConfigureFreeBuffers.config, config, 6);
         com.in_len = 6;
         com.out_len = 0;
  
diff --git a/drivers/media/pci/ngene/ngene.h b/drivers/media/pci/ngene/ngene.h

index 84f04e0..3d296f1 100644 (file)
--- a/drivers/media/pci/ngene/ngene.h
+++ b/drivers/media/pci/ngene/ngene.h
@@ -407,12 +407,14 @@ enum _BUFFER_CONFIGS {
  
  struct FW_CONFIGURE_FREE_BUFFERS {
         struct FW_HEADER hdr;
-       u8   UVI1_BufferLength;
-       u8   UVI2_BufferLength;
-       u8   TVO_BufferLength;
-       u8   AUD1_BufferLength;
-       u8   AUD2_BufferLength;
-       u8   TVA_BufferLength;
+       struct {
+               u8   UVI1_BufferLength;
+               u8   UVI2_BufferLength;
+               u8   TVO_BufferLength;
+               u8   AUD1_BufferLength;
+               u8   AUD2_BufferLength;
+               u8   TVA_BufferLength;
+       } __packed config;
  } __attribute__ ((__packed__));
  
  struct FW_CONFIGURE_UART {
diff --git a/drivers/media/platform/atmel/Kconfig b/drivers/media/platform/atmel/Kconfig

index 99b5121..dda2f27 100644 (file)
--- a/drivers/media/platform/atmel/Kconfig
+++ b/drivers/media/platform/atmel/Kconfig
@@ -8,6 +8,7 @@ config VIDEO_ATMEL_ISC
         select VIDEOBUF2_DMA_CONTIG
         select REGMAP_MMIO
         select V4L2_FWNODE
+       select VIDEO_ATMEL_ISC_BASE
         help
            This module makes the ATMEL Image Sensor Controller available
            as a v4l2 device.
@@ -19,10 +20,17 @@ config VIDEO_ATMEL_XISC
         select VIDEOBUF2_DMA_CONTIG
         select REGMAP_MMIO
         select V4L2_FWNODE
+       select VIDEO_ATMEL_ISC_BASE
         help
            This module makes the ATMEL eXtended Image Sensor Controller
            available as a v4l2 device.
  
+config VIDEO_ATMEL_ISC_BASE
+       tristate
+       default n
+       help
+         ATMEL ISC and XISC common code base.
+
  config VIDEO_ATMEL_ISI
         tristate "ATMEL Image Sensor Interface (ISI) support"
         depends on VIDEO_V4L2 && OF
diff --git a/drivers/media/platform/atmel/Makefile b/drivers/media/platform/atmel/Makefile

index c5c0155..46d264a 100644 (file)
--- a/drivers/media/platform/atmel/Makefile
+++ b/drivers/media/platform/atmel/Makefile
@@ -1,7 +1,8 @@
  # SPDX-License-Identifier: GPL-2.0-only
-atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o
-atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o
+atmel-isc-objs = atmel-sama5d2-isc.o
+atmel-xisc-objs = atmel-sama7g5-isc.o
  
  obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o
+obj-$(CONFIG_VIDEO_ATMEL_ISC_BASE) += atmel-isc-base.o
  obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o
  obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o
diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c

index 19daa49..136ab7c 100644 (file)
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -378,6 +378,7 @@ int isc_clk_init(struct isc_device *isc)
  
         return 0;
  }
+EXPORT_SYMBOL_GPL(isc_clk_init);
  
  void isc_clk_cleanup(struct isc_device *isc)
  {
@@ -392,6 +393,7 @@ void isc_clk_cleanup(struct isc_device *isc)
                         clk_unregister(isc_clk->clk);
         }
  }
+EXPORT_SYMBOL_GPL(isc_clk_cleanup);
  
  static int isc_queue_setup(struct vb2_queue *vq,
                             unsigned int *nbuffers, unsigned int *nplanes,
@@ -1578,6 +1580,7 @@ irqreturn_t isc_interrupt(int irq, void *dev_id)
  
         return ret;
  }
+EXPORT_SYMBOL_GPL(isc_interrupt);
  
  static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max)
  {
@@ -2212,6 +2215,7 @@ const struct v4l2_async_notifier_operations isc_async_ops = {
         .unbind = isc_async_unbind,
         .complete = isc_async_complete,
  };
+EXPORT_SYMBOL_GPL(isc_async_ops);
  
  void isc_subdev_cleanup(struct isc_device *isc)
  {
@@ -2224,6 +2228,7 @@ void isc_subdev_cleanup(struct isc_device *isc)
  
         INIT_LIST_HEAD(&isc->subdev_entities);
  }
+EXPORT_SYMBOL_GPL(isc_subdev_cleanup);
  
  int isc_pipeline_init(struct isc_device *isc)
  {
@@ -2264,6 +2269,7 @@ int isc_pipeline_init(struct isc_device *isc)
  
         return 0;
  }
+EXPORT_SYMBOL_GPL(isc_pipeline_init);
  
  /* regmap configuration */
  #define ATMEL_ISC_REG_MAX    0xd5c
@@ -2273,4 +2279,9 @@ const struct regmap_config isc_regmap_config = {
         .val_bits       = 32,
         .max_register   = ATMEL_ISC_REG_MAX,
  };
+EXPORT_SYMBOL_GPL(isc_regmap_config);
  
+MODULE_AUTHOR("Songjun Wu");
+MODULE_AUTHOR("Eugen Hristev");
+MODULE_DESCRIPTION("Atmel ISC common code base");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c

index 8370573..795a012 100644 (file)
--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
@@ -37,7 +37,16 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req)
         } else {
                 /* read */
                 requesttype = (USB_TYPE_VENDOR | USB_DIR_IN);
-               pipe = usb_rcvctrlpipe(d->udev, 0);
+
+               /*
+                * Zero-length transfers must use usb_sndctrlpipe() and
+                * rtl28xxu_identify_state() uses a zero-length i2c read
+                * command to determine the chip type.
+                */
+               if (req->size)
+                       pipe = usb_rcvctrlpipe(d->udev, 0);
+               else
+                       pipe = usb_sndctrlpipe(d->udev, 0);
         }
  
         ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value,
@@ -612,9 +621,8 @@ static int rtl28xxu_read_config(struct dvb_usb_device *d)
  static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name)
  {
         struct rtl28xxu_dev *dev = d_to_priv(d);
-       u8 buf[1];
         int ret;
-       struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf};
+       struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL};
  
         dev_dbg(&d->intf->dev, "\n");
  
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c

index 7a6f01a..305ffad 100644 (file)
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -714,23 +714,20 @@ static int at24_probe(struct i2c_client *client)
         }
  
         /*
-        * If the 'label' property is not present for the AT24 EEPROM,
-        * then nvmem_config.id is initialised to NVMEM_DEVID_AUTO,
-        * and this will append the 'devid' to the name of the NVMEM
-        * device. This is purely legacy and the AT24 driver has always
-        * defaulted to this. However, if the 'label' property is
-        * present then this means that the name is specified by the
-        * firmware and this name should be used verbatim and so it is
-        * not necessary to append the 'devid'.
+        * We initialize nvmem_config.id to NVMEM_DEVID_AUTO even if the
+        * label property is set as some platform can have multiple eeproms
+        * with same label and we can not register each of those with same
+        * label. Failing to register those eeproms trigger cascade failure
+        * on such platform.
          */
+       nvmem_config.id = NVMEM_DEVID_AUTO;
+
         if (device_property_present(dev, "label")) {
-               nvmem_config.id = NVMEM_DEVID_NONE;
                 err = device_property_read_string(dev, "label",
                                                   &nvmem_config.name);
                 if (err)
                         return err;
         } else {
-               nvmem_config.id = NVMEM_DEVID_AUTO;
                 nvmem_config.name = dev_name(dev);
         }
  
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig

index 56213a8..995c613 100644 (file)
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -431,10 +431,10 @@ config VSOCKMON
  config MHI_NET
         tristate "MHI network driver"
         depends on MHI_BUS
-       select WWAN
         help
           This is the network driver for MHI bus.  It can be used with
-         QCOM based WWAN modems (like SDX55).  Say Y or M.
+         QCOM based WWAN modems for IP or QMAP/rmnet protocol (like SDX55).
+         Say Y or M.
  
  endif # NET_CORE
  
@@ -606,4 +606,11 @@ config NET_FAILOVER
           a VM with direct attached VF by failing over to the paravirtual
           datapath when the VF is unplugged.
  
+config NETDEV_LEGACY_INIT
+       bool
+       depends on ISA
+       help
+         Drivers that call netdev_boot_setup_check() should select this
+         symbol, everything else no longer needs it.
+
  endif # NETDEVICES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile

index a48a664..7398386 100644 (file)
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -18,7 +18,8 @@ obj-$(CONFIG_MACVLAN) += macvlan.o
  obj-$(CONFIG_MACVTAP) += macvtap.o
  obj-$(CONFIG_MII) += mii.o
  obj-$(CONFIG_MDIO) += mdio.o
-obj-$(CONFIG_NET) += Space.o loopback.o
+obj-$(CONFIG_NET) += loopback.o
+obj-$(CONFIG_NETDEV_LEGACY_INIT) += Space.o
  obj-$(CONFIG_NETCONSOLE) += netconsole.o
  obj-y += phy/
  obj-y += mdio/
@@ -36,7 +37,7 @@ obj-$(CONFIG_GTP) += gtp.o
  obj-$(CONFIG_NLMON) += nlmon.o
  obj-$(CONFIG_NET_VRF) += vrf.o
  obj-$(CONFIG_VSOCKMON) += vsockmon.o
-obj-$(CONFIG_MHI_NET) += mhi/
+obj-$(CONFIG_MHI_NET) += mhi_net.o
  
  #
  # Networking Drivers
diff --git a/drivers/net/Space.c b/drivers/net/Space.c

index df79e73..49e67c9 100644 (file)
--- a/drivers/net/Space.c
+++ b/drivers/net/Space.c
@@ -30,6 +30,148 @@
  #include <linux/netlink.h>
  #include <net/Space.h>
  
+/*
+ * This structure holds boot-time configured netdevice settings. They
+ * are then used in the device probing.
+ */
+struct netdev_boot_setup {
+       char name[IFNAMSIZ];
+       struct ifmap map;
+};
+#define NETDEV_BOOT_SETUP_MAX 8
+
+
+/******************************************************************************
+ *
+ *                   Device Boot-time Settings Routines
+ *
+ ******************************************************************************/
+
+/* Boot time configuration table */
+static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
+/**
+ *     netdev_boot_setup_add   - add new setup entry
+ *     @name: name of the device
+ *     @map: configured settings for the device
+ *
+ *     Adds new setup entry to the dev_boot_setup list.  The function
+ *     returns 0 on error and 1 on success.  This is a generic routine to
+ *     all netdevices.
+ */
+static int netdev_boot_setup_add(char *name, struct ifmap *map)
+{
+       struct netdev_boot_setup *s;
+       int i;
+
+       s = dev_boot_setup;
+       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+               if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
+                       memset(s[i].name, 0, sizeof(s[i].name));
+                       strlcpy(s[i].name, name, IFNAMSIZ);
+                       memcpy(&s[i].map, map, sizeof(s[i].map));
+                       break;
+               }
+       }
+
+       return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
+}
+
+/**
+ * netdev_boot_setup_check     - check boot time settings
+ * @dev: the netdevice
+ *
+ * Check boot time settings for the device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found, 1 if they are.
+ */
+int netdev_boot_setup_check(struct net_device *dev)
+{
+       struct netdev_boot_setup *s = dev_boot_setup;
+       int i;
+
+       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
+               if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
+                   !strcmp(dev->name, s[i].name)) {
+                       dev->irq = s[i].map.irq;
+                       dev->base_addr = s[i].map.base_addr;
+                       dev->mem_start = s[i].map.mem_start;
+                       dev->mem_end = s[i].map.mem_end;
+                       return 1;
+               }
+       }
+       return 0;
+}
+EXPORT_SYMBOL(netdev_boot_setup_check);
+
+/**
+ * netdev_boot_base    - get address from boot time settings
+ * @prefix: prefix for network device
+ * @unit: id for network device
+ *
+ * Check boot time settings for the base address of device.
+ * The found settings are set for the device to be used
+ * later in the device probing.
+ * Returns 0 if no settings found.
+ */
+static unsigned long netdev_boot_base(const char *prefix, int unit)
+{
+       const struct netdev_boot_setup *s = dev_boot_setup;
+       char name[IFNAMSIZ];
+       int i;
+
+       sprintf(name, "%s%d", prefix, unit);
+
+       /*
+        * If device already registered then return base of 1
+        * to indicate not to probe for this interface
+        */
+       if (__dev_get_by_name(&init_net, name))
+               return 1;
+
+       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+               if (!strcmp(name, s[i].name))
+                       return s[i].map.base_addr;
+       return 0;
+}
+
+/*
+ * Saves at boot time configured settings for any netdevice.
+ */
+static int __init netdev_boot_setup(char *str)
+{
+       int ints[5];
+       struct ifmap map;
+
+       str = get_options(str, ARRAY_SIZE(ints), ints);
+       if (!str || !*str)
+               return 0;
+
+       /* Save settings */
+       memset(&map, 0, sizeof(map));
+       if (ints[0] > 0)
+               map.irq = ints[1];
+       if (ints[0] > 1)
+               map.base_addr = ints[2];
+       if (ints[0] > 2)
+               map.mem_start = ints[3];
+       if (ints[0] > 3)
+               map.mem_end = ints[4];
+
+       /* Add new entry to the list */
+       return netdev_boot_setup_add(str, &map);
+}
+
+__setup("netdev=", netdev_boot_setup);
+
+static int __init ether_boot_setup(char *str)
+{
+       return netdev_boot_setup(str);
+}
+__setup("ether=", ether_boot_setup);
+
+
  /* A unified ethernet device probe.  This is the easiest way to have every
   * ethernet adaptor have the name "eth[0123...]".
   */
@@ -77,39 +219,15 @@ static struct devprobe2 isa_probes[] __initdata = {
  #ifdef CONFIG_SMC9194
         {smc_init, 0},
  #endif
-#ifdef CONFIG_CS89x0
-#ifndef CONFIG_CS89x0_PLATFORM
+#ifdef CONFIG_CS89x0_ISA
         {cs89x0_probe, 0},
  #endif
-#endif
-#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_BVME6000_NET)        /* Intel */
-       {i82596_probe, 0},                                      /* I82596 */
-#endif
  #ifdef CONFIG_NI65
         {ni65_probe, 0},
  #endif
         {NULL, 0},
  };
  
-static struct devprobe2 m68k_probes[] __initdata = {
-#ifdef CONFIG_ATARILANCE       /* Lance-based Atari ethernet boards */
-       {atarilance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3LANCE         /* sun3 onboard Lance chip */
-       {sun3lance_probe, 0},
-#endif
-#ifdef CONFIG_SUN3_82586        /* sun3 onboard Intel 82586 chip */
-       {sun3_82586_probe, 0},
-#endif
-#ifdef CONFIG_APNE             /* A1200 PCMCIA NE2000 */
-       {apne_probe, 0},
-#endif
-#ifdef CONFIG_MVME147_NET      /* MVME147 internal Ethernet */
-       {mvme147lance_probe, 0},
-#endif
-       {NULL, 0},
-};
-
  /* Unified ethernet device probe, segmented per architecture and
   * per bus interface. This drives the legacy devices only for now.
   */
@@ -121,8 +239,7 @@ static void __init ethif_probe2(int unit)
         if (base_addr == 1)
                 return;
  
-       (void)(probe_list2(unit, m68k_probes, base_addr == 0) &&
-               probe_list2(unit, isa_probes, base_addr == 0));
+       probe_list2(unit, isa_probes, base_addr == 0);
  }
  
  /*  Statically configured drivers -- order matters here. */
@@ -130,10 +247,6 @@ static int __init net_olddevs_init(void)
  {
         int num;
  
-#ifdef CONFIG_SBNI
-       for (num = 0; num < 8; ++num)
-               sbni_probe(num);
-#endif
         for (num = 0; num < 8; ++num)
                 ethif_probe2(num);
  
@@ -142,9 +255,6 @@ static int __init net_olddevs_init(void)
         cops_probe(1);
         cops_probe(2);
  #endif
-#ifdef CONFIG_LTPC
-       ltpc_probe();
-#endif
  
         return 0;
  }
diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig

index 4391839..90b9f1d 100644 (file)
--- a/drivers/net/appletalk/Kconfig
+++ b/drivers/net/appletalk/Kconfig
@@ -52,7 +52,9 @@ config LTPC
  
  config COPS
         tristate "COPS LocalTalk PC support"
-       depends on DEV_APPLETALK && (ISA || EISA)
+       depends on DEV_APPLETALK && ISA
+       depends on NETDEVICES
+       select NETDEV_LEGACY_INIT
         help
           This allows you to use COPS AppleTalk cards to connect to LocalTalk
           networks. You also need version 1.3.3 or later of the netatalk
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c

index 69c2708..1f8925e 100644 (file)
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -1015,7 +1015,7 @@ static const struct net_device_ops ltpc_netdev = {
         .ndo_set_rx_mode        = set_multicast_list,
  };
  
-struct net_device * __init ltpc_probe(void)
+static struct net_device * __init ltpc_probe(void)
  {
         struct net_device *dev;
         int err = -ENOMEM;
@@ -1221,12 +1221,10 @@ static int __init ltpc_setup(char *str)
  }
  
  __setup("ltpc=", ltpc_setup);
-#endif /* MODULE */
+#endif
  
  static struct net_device *dev_ltpc;
  
-#ifdef MODULE
-
  MODULE_LICENSE("GPL");
  module_param(debug, int, 0);
  module_param_hw(io, int, ioport, 0);
@@ -1244,7 +1242,6 @@ static int __init ltpc_module_init(void)
         return PTR_ERR_OR_ZERO(dev_ltpc);
  }
  module_init(ltpc_module_init);
-#endif
  
  static void __exit ltpc_cleanup(void)
  {
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c

index 6908822..a4a202b 100644 (file)
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -96,7 +96,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker);
  static void ad_mux_machine(struct port *port, bool *update_slave_arr);
  static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
  static void ad_tx_machine(struct port *port);
-static void ad_periodic_machine(struct port *port);
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params);
  static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
  static void ad_agg_selection_logic(struct aggregator *aggregator,
                                    bool *update_slave_arr);
@@ -1294,10 +1294,11 @@ static void ad_tx_machine(struct port *port)
  /**
   * ad_periodic_machine - handle a port's periodic state machine
   * @port: the port we're looking at
+ * @bond_params: bond parameters we will use
   *
   * Turn ntt flag on priodically to perform periodic transmission of lacpdu's.
   */
-static void ad_periodic_machine(struct port *port)
+static void ad_periodic_machine(struct port *port, struct bond_params bond_params)
  {
         periodic_states_t last_state;
  
@@ -1306,8 +1307,8 @@ static void ad_periodic_machine(struct port *port)
  
         /* check if port was reinitialized */
         if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
-           (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))
-          ) {
+           (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) ||
+           !bond_params.lacp_active) {
                 port->sm_periodic_state = AD_NO_PERIODIC;
         }
         /* check if state machine should change state */
@@ -2341,7 +2342,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
                 }
  
                 ad_rx_machine(NULL, port);
-               ad_periodic_machine(port);
+               ad_periodic_machine(port, bond->params);
                 ad_port_selection_logic(port, &update_slave_arr);
                 ad_mux_machine(port, &update_slave_arr);
                 ad_tx_machine(port);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c

index 22e5632..7d3752c 100644 (file)
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -17,7 +17,6 @@
  #include <linux/if_bonding.h>
  #include <linux/if_vlan.h>
  #include <linux/in.h>
-#include <net/ipx.h>
  #include <net/arp.h>
  #include <net/ipv6.h>
  #include <asm/byteorder.h>
@@ -1351,8 +1350,6 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
         if (!is_multicast_ether_addr(eth_data->h_dest)) {
                 switch (skb->protocol) {
                 case htons(ETH_P_IP):
-               case htons(ETH_P_IPX):
-                   /* In case of IPX, it will falback to L2 hash */
                 case htons(ETH_P_IPV6):
                         hash_index = bond_xmit_hash(bond, skb);
                         if (bond->params.tlb_dynamic_lb) {
@@ -1454,35 +1451,6 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
                 hash_size = sizeof(ip6hdr->daddr);
                 break;
         }
-       case ETH_P_IPX: {
-               const struct ipxhdr *ipxhdr;
-
-               if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) {
-                       do_tx_balance = false;
-                       break;
-               }
-               ipxhdr = (struct ipxhdr *)skb_network_header(skb);
-
-               if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) {
-                       /* something is wrong with this packet */
-                       do_tx_balance = false;
-                       break;
-               }
-
-               if (ipxhdr->ipx_type != IPX_TYPE_NCP) {
-                       /* The only protocol worth balancing in
-                        * this family since it has an "ARP" like
-                        * mechanism
-                        */
-                       do_tx_balance = false;
-                       break;
-               }
-
-               eth_data = eth_hdr(skb);
-               hash_start = (char *)eth_data->h_dest;
-               hash_size = ETH_ALEN;
-               break;
-       }
         case ETH_P_ARP:
                 do_tx_balance = false;
                 if (bond_info->rlb_enabled)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c

index bec8cea..365953e 100644 (file)
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -317,6 +317,19 @@ bool bond_sk_check(struct bonding *bond)
         }
  }
  
+static bool bond_xdp_check(struct bonding *bond)
+{
+       switch (BOND_MODE(bond)) {
+       case BOND_MODE_ROUNDROBIN:
+       case BOND_MODE_ACTIVEBACKUP:
+       case BOND_MODE_8023AD:
+       case BOND_MODE_XOR:
+               return true;
+       default:
+               return false;
+       }
+}
+
  /*---------------------------------- VLAN -----------------------------------*/
  
  /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@@ -2133,6 +2146,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
                 bond_update_slave_arr(bond, NULL);
  
  
+       if (!slave_dev->netdev_ops->ndo_bpf ||
+           !slave_dev->netdev_ops->ndo_xdp_xmit) {
+               if (bond->xdp_prog) {
+                       NL_SET_ERR_MSG(extack, "Slave does not support XDP");
+                       slave_err(bond_dev, slave_dev, "Slave does not support XDP\n");
+                       res = -EOPNOTSUPP;
+                       goto err_sysfs_del;
+               }
+       } else {
+               struct netdev_bpf xdp = {
+                       .command = XDP_SETUP_PROG,
+                       .flags   = 0,
+                       .prog    = bond->xdp_prog,
+                       .extack  = extack,
+               };
+
+               if (dev_xdp_prog_count(slave_dev) > 0) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Slave has XDP program loaded, please unload before enslaving");
+                       slave_err(bond_dev, slave_dev,
+                                 "Slave has XDP program loaded, please unload before enslaving\n");
+                       res = -EOPNOTSUPP;
+                       goto err_sysfs_del;
+               }
+
+               res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (res < 0) {
+                       /* ndo_bpf() sets extack error message */
+                       slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res);
+                       goto err_sysfs_del;
+               }
+               if (bond->xdp_prog)
+                       bpf_prog_inc(bond->xdp_prog);
+       }
+
         slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
                    bond_is_active_slave(new_slave) ? "an active" : "a backup",
                    new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
@@ -2252,7 +2300,17 @@ static int __bond_release_one(struct net_device *bond_dev,
         /* recompute stats just before removing the slave */
         bond_get_stats(bond->dev, &bond->bond_stats);
  
-       bond_upper_dev_unlink(bond, slave);
+       if (bond->xdp_prog) {
+               struct netdev_bpf xdp = {
+                       .command = XDP_SETUP_PROG,
+                       .flags   = 0,
+                       .prog    = NULL,
+                       .extack  = NULL,
+               };
+               if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp))
+                       slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n");
+       }
+
         /* unregister rx_handler early so bond_handle_frame wouldn't be called
          * for this slave anymore.
          */
@@ -2261,6 +2319,8 @@ static int __bond_release_one(struct net_device *bond_dev,
         if (BOND_MODE(bond) == BOND_MODE_8023AD)
                 bond_3ad_unbind_slave(slave);
  
+       bond_upper_dev_unlink(bond, slave);
+
         if (bond_mode_can_use_xmit_hash(bond))
                 bond_update_slave_arr(bond, slave);
  
@@ -3613,55 +3673,80 @@ static struct notifier_block bond_netdev_notifier = {
  
  /*---------------------------- Hashing Policies -----------------------------*/
  
+/* Helper to access data in a packet, with or without a backing skb.
+ * If skb is given the data is linearized if necessary via pskb_may_pull.
+ */
+static inline const void *bond_pull_data(struct sk_buff *skb,
+                                        const void *data, int hlen, int n)
+{
+       if (likely(n <= hlen))
+               return data;
+       else if (skb && likely(pskb_may_pull(skb, n)))
+               return skb->head;
+
+       return NULL;
+}
+
  /* L2 hash helper */
-static inline u32 bond_eth_hash(struct sk_buff *skb)
+static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
  {
-       struct ethhdr *ep, hdr_tmp;
+       struct ethhdr *ep;
  
-       ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
-       if (ep)
-               return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
-       return 0;
+       data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+       if (!data)
+               return 0;
+
+       ep = (struct ethhdr *)(data + mhoff);
+       return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto);
  }
  
-static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk,
-                        int *noff, int *proto, bool l34)
+static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data,
+                        int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34)
  {
         const struct ipv6hdr *iph6;
         const struct iphdr *iph;
  
-       if (skb->protocol == htons(ETH_P_IP)) {
-               if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph))))
+       if (l2_proto == htons(ETH_P_IP)) {
+               data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph));
+               if (!data)
                         return false;
-               iph = (const struct iphdr *)(skb->data + *noff);
+
+               iph = (const struct iphdr *)(data + *nhoff);
                 iph_to_flow_copy_v4addrs(fk, iph);
-               *noff += iph->ihl << 2;
+               *nhoff += iph->ihl << 2;
                 if (!ip_is_fragment(iph))
-                       *proto = iph->protocol;
-       } else if (skb->protocol == htons(ETH_P_IPV6)) {
-               if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6))))
+                       *ip_proto = iph->protocol;
+       } else if (l2_proto == htons(ETH_P_IPV6)) {
+               data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6));
+               if (!data)
                         return false;
-               iph6 = (const struct ipv6hdr *)(skb->data + *noff);
+
+               iph6 = (const struct ipv6hdr *)(data + *nhoff);
                 iph_to_flow_copy_v6addrs(fk, iph6);
-               *noff += sizeof(*iph6);
-               *proto = iph6->nexthdr;
+               *nhoff += sizeof(*iph6);
+               *ip_proto = iph6->nexthdr;
         } else {
                 return false;
         }
  
-       if (l34 && *proto >= 0)
-               fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto);
+       if (l34 && *ip_proto >= 0)
+               fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen);
  
         return true;
  }
  
-static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
+static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen)
  {
-       struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
+       struct ethhdr *mac_hdr;
         u32 srcmac_vendor = 0, srcmac_dev = 0;
         u16 vlan;
         int i;
  
+       data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr));
+       if (!data)
+               return 0;
+       mac_hdr = (struct ethhdr *)(data + mhoff);
+
         for (i = 0; i < 3; i++)
                 srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i];
  
@@ -3677,26 +3762,25 @@ static u32 bond_vlan_srcmac_hash(struct sk_buff *skb)
  }
  
  /* Extract the appropriate headers based on bond's xmit policy */
-static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
-                             struct flow_keys *fk)
+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data,
+                             __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk)
  {
         bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34;
-       int noff, proto = -1;
+       int ip_proto = -1;
  
         switch (bond->params.xmit_policy) {
         case BOND_XMIT_POLICY_ENCAP23:
         case BOND_XMIT_POLICY_ENCAP34:
                 memset(fk, 0, sizeof(*fk));
                 return __skb_flow_dissect(NULL, skb, &flow_keys_bonding,
-                                         fk, NULL, 0, 0, 0, 0);
+                                         fk, data, l2_proto, nhoff, hlen, 0);
         default:
                 break;
         }
  
         fk->ports.ports = 0;
         memset(&fk->icmp, 0, sizeof(fk->icmp));
-       noff = skb_network_offset(skb);
-       if (!bond_flow_ip(skb, fk, &noff, &proto, l34))
+       if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34))
                 return false;
  
         /* ICMP error packets contains at least 8 bytes of the header
@@ -3704,22 +3788,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
          * to correlate ICMP error packets within the same flow which
          * generated the error.
          */
-       if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
-               skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data,
-                                     skb_transport_offset(skb),
-                                     skb_headlen(skb));
-               if (proto == IPPROTO_ICMP) {
+       if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
+               skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen);
+               if (ip_proto == IPPROTO_ICMP) {
                         if (!icmp_is_err(fk->icmp.type))
                                 return true;
  
-                       noff += sizeof(struct icmphdr);
-               } else if (proto == IPPROTO_ICMPV6) {
+                       nhoff += sizeof(struct icmphdr);
+               } else if (ip_proto == IPPROTO_ICMPV6) {
                         if (!icmpv6_is_err(fk->icmp.type))
                                 return true;
  
-                       noff += sizeof(struct icmp6hdr);
+                       nhoff += sizeof(struct icmp6hdr);
                 }
-               return bond_flow_ip(skb, fk, &noff, &proto, l34);
+               return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34);
         }
  
         return true;
@@ -3735,33 +3817,26 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow)
         return hash >> 1;
  }
  
-/**
- * bond_xmit_hash - generate a hash value based on the xmit policy
- * @bond: bonding device
- * @skb: buffer to use for headers
- *
- * This function will extract the necessary headers from the skb buffer and use
- * them to generate a hash based on the xmit_policy set in the bonding device
+/* Generate hash based on xmit policy. If @skb is given it is used to linearize
+ * the data as required, but this function can be used without it if the data is
+ * known to be linear (e.g. with xdp_buff).
   */
-u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data,
+                           __be16 l2_proto, int mhoff, int nhoff, int hlen)
  {
         struct flow_keys flow;
         u32 hash;
  
-       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
-           skb->l4_hash)
-               return skb->hash;
-
         if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC)
-               return bond_vlan_srcmac_hash(skb);
+               return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
  
         if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
-           !bond_flow_dissect(bond, skb, &flow))
-               return bond_eth_hash(skb);
+           !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow))
+               return bond_eth_hash(skb, data, mhoff, hlen);
  
         if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
             bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) {
-               hash = bond_eth_hash(skb);
+               hash = bond_eth_hash(skb, data, mhoff, hlen);
         } else {
                 if (flow.icmp.id)
                         memcpy(&hash, &flow.icmp, sizeof(hash));
@@ -3772,6 +3847,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
         return bond_ip_hash(hash, &flow);
  }
  
+/**
+ * bond_xmit_hash - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @skb: buffer to use for headers
+ *
+ * This function will extract the necessary headers from the skb buffer and use
+ * them to generate a hash based on the xmit_policy set in the bonding device
+ */
+u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
+{
+       if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
+           skb->l4_hash)
+               return skb->hash;
+
+       return __bond_xmit_hash(bond, skb, skb->head, skb->protocol,
+                               skb->mac_header, skb->network_header,
+                               skb_headlen(skb));
+}
+
+/**
+ * bond_xmit_hash_xdp - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @xdp: buffer to use for headers
+ *
+ * The XDP variant of bond_xmit_hash.
+ */
+static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
+{
+       struct ethhdr *eth;
+
+       if (xdp->data + sizeof(struct ethhdr) > xdp->data_end)
+               return 0;
+
+       eth = (struct ethhdr *)xdp->data;
+
+       return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0,
+                               sizeof(struct ethhdr), xdp->data_end - xdp->data);
+}
+
  /*-------------------------- Device entry points ----------------------------*/
  
  void bond_work_init_all(struct bonding *bond)
@@ -4420,6 +4534,47 @@ non_igmp:
         return NULL;
  }
  
+static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond,
+                                                       struct xdp_buff *xdp)
+{
+       struct slave *slave;
+       int slave_cnt;
+       u32 slave_id;
+       const struct ethhdr *eth;
+       void *data = xdp->data;
+
+       if (data + sizeof(struct ethhdr) > xdp->data_end)
+               goto non_igmp;
+
+       eth = (struct ethhdr *)data;
+       data += sizeof(struct ethhdr);
+
+       /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */
+       if (eth->h_proto == htons(ETH_P_IP)) {
+               const struct iphdr *iph;
+
+               if (data + sizeof(struct iphdr) > xdp->data_end)
+                       goto non_igmp;
+
+               iph = (struct iphdr *)data;
+
+               if (iph->protocol == IPPROTO_IGMP) {
+                       slave = rcu_dereference(bond->curr_active_slave);
+                       if (slave)
+                               return slave;
+                       return bond_get_slave_by_id(bond, 0);
+               }
+       }
+
+non_igmp:
+       slave_cnt = READ_ONCE(bond->slave_cnt);
+       if (likely(slave_cnt)) {
+               slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
+               return bond_get_slave_by_id(bond, slave_id);
+       }
+       return NULL;
+}
+
  static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
                                         struct net_device *bond_dev)
  {
@@ -4433,8 +4588,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
         return bond_tx_drop(bond_dev, skb);
  }
  
-static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
-                                                     struct sk_buff *skb)
+static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond)
  {
         return rcu_dereference(bond->curr_active_slave);
  }
@@ -4448,7 +4602,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
         struct bonding *bond = netdev_priv(bond_dev);
         struct slave *slave;
  
-       slave = bond_xmit_activebackup_slave_get(bond, skb);
+       slave = bond_xmit_activebackup_slave_get(bond);
         if (slave)
                 return bond_dev_queue_xmit(bond, skb, slave->dev);
  
@@ -4636,6 +4790,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
         return slave;
  }
  
+static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond,
+                                                    struct xdp_buff *xdp)
+{
+       struct bond_up_slave *slaves;
+       unsigned int count;
+       u32 hash;
+
+       hash = bond_xmit_hash_xdp(bond, xdp);
+       slaves = rcu_dereference(bond->usable_slaves);
+       count = slaves ? READ_ONCE(slaves->count) : 0;
+       if (unlikely(!count))
+               return NULL;
+
+       return slaves->arr[hash % count];
+}
+
  /* Use this Xmit function for 3AD as well as XOR modes. The current
   * usable slave array is formed in the control path. The xmit function
   * just calculates hash and sends the packet out.
@@ -4746,7 +4916,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
                 slave = bond_xmit_roundrobin_slave_get(bond, skb);
                 break;
         case BOND_MODE_ACTIVEBACKUP:
-               slave = bond_xmit_activebackup_slave_get(bond, skb);
+               slave = bond_xmit_activebackup_slave_get(bond);
                 break;
         case BOND_MODE_8023AD:
         case BOND_MODE_XOR:
@@ -4920,6 +5090,174 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
         return ret;
  }
  
+static struct net_device *
+bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp)
+{
+       struct bonding *bond = netdev_priv(bond_dev);
+       struct slave *slave;
+
+       /* Caller needs to hold rcu_read_lock() */
+
+       switch (BOND_MODE(bond)) {
+       case BOND_MODE_ROUNDROBIN:
+               slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp);
+               break;
+
+       case BOND_MODE_ACTIVEBACKUP:
+               slave = bond_xmit_activebackup_slave_get(bond);
+               break;
+
+       case BOND_MODE_8023AD:
+       case BOND_MODE_XOR:
+               slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp);
+               break;
+
+       default:
+               /* Should never happen. Mode guarded by bond_xdp_check() */
+               netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond));
+               WARN_ON_ONCE(1);
+               return NULL;
+       }
+
+       if (slave)
+               return slave->dev;
+
+       return NULL;
+}
+
+static int bond_xdp_xmit(struct net_device *bond_dev,
+                        int n, struct xdp_frame **frames, u32 flags)
+{
+       int nxmit, err = -ENXIO;
+
+       rcu_read_lock();
+
+       for (nxmit = 0; nxmit < n; nxmit++) {
+               struct xdp_frame *frame = frames[nxmit];
+               struct xdp_frame *frames1[] = {frame};
+               struct net_device *slave_dev;
+               struct xdp_buff xdp;
+
+               xdp_convert_frame_to_buff(frame, &xdp);
+
+               slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp);
+               if (!slave_dev) {
+                       err = -ENXIO;
+                       break;
+               }
+
+               err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags);
+               if (err < 1)
+                       break;
+       }
+
+       rcu_read_unlock();
+
+       /* If error happened on the first frame then we can pass the error up, otherwise
+        * report the number of frames that were xmitted.
+        */
+       if (err < 0)
+               return (nxmit == 0 ? err : nxmit);
+
+       return nxmit;
+}
+
+static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog,
+                       struct netlink_ext_ack *extack)
+{
+       struct bonding *bond = netdev_priv(dev);
+       struct list_head *iter;
+       struct slave *slave, *rollback_slave;
+       struct bpf_prog *old_prog;
+       struct netdev_bpf xdp = {
+               .command = XDP_SETUP_PROG,
+               .flags   = 0,
+               .prog    = prog,
+               .extack  = extack,
+       };
+       int err;
+
+       ASSERT_RTNL();
+
+       if (!bond_xdp_check(bond))
+               return -EOPNOTSUPP;
+
+       old_prog = bond->xdp_prog;
+       bond->xdp_prog = prog;
+
+       bond_for_each_slave(bond, slave, iter) {
+               struct net_device *slave_dev = slave->dev;
+
+               if (!slave_dev->netdev_ops->ndo_bpf ||
+                   !slave_dev->netdev_ops->ndo_xdp_xmit) {
+                       NL_SET_ERR_MSG(extack, "Slave device does not support XDP");
+                       slave_err(dev, slave_dev, "Slave does not support XDP\n");
+                       err = -EOPNOTSUPP;
+                       goto err;
+               }
+
+               if (dev_xdp_prog_count(slave_dev) > 0) {
+                       NL_SET_ERR_MSG(extack,
+                                      "Slave has XDP program loaded, please unload before enslaving");
+                       slave_err(dev, slave_dev,
+                                 "Slave has XDP program loaded, please unload before enslaving\n");
+                       err = -EOPNOTSUPP;
+                       goto err;
+               }
+
+               err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (err < 0) {
+                       /* ndo_bpf() sets extack error message */
+                       slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err);
+                       goto err;
+               }
+               if (prog)
+                       bpf_prog_inc(prog);
+       }
+
+       if (old_prog)
+               bpf_prog_put(old_prog);
+
+       if (prog)
+               static_branch_inc(&bpf_master_redirect_enabled_key);
+       else
+               static_branch_dec(&bpf_master_redirect_enabled_key);
+
+       return 0;
+
+err:
+       /* unwind the program changes */
+       bond->xdp_prog = old_prog;
+       xdp.prog = old_prog;
+       xdp.extack = NULL; /* do not overwrite original error */
+
+       bond_for_each_slave(bond, rollback_slave, iter) {
+               struct net_device *slave_dev = rollback_slave->dev;
+               int err_unwind;
+
+               if (slave == rollback_slave)
+                       break;
+
+               err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp);
+               if (err_unwind < 0)
+                       slave_err(dev, slave_dev,
+                                 "Error %d when unwinding XDP program change\n", err_unwind);
+               else if (xdp.prog)
+                       bpf_prog_inc(xdp.prog);
+       }
+       return err;
+}
+
+static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return bond_xdp_set(dev, xdp->prog, xdp->extack);
+       default:
+               return -EINVAL;
+       }
+}
+
  static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed)
  {
         if (speed == 0 || speed == SPEED_UNKNOWN)
@@ -5008,6 +5346,9 @@ static const struct net_device_ops bond_netdev_ops = {
         .ndo_features_check     = passthru_features_check,
         .ndo_get_xmit_slave     = bond_xmit_get_slave,
         .ndo_sk_get_lower_dev   = bond_sk_get_lower_dev,
+       .ndo_bpf                = bond_xdp,
+       .ndo_xdp_xmit           = bond_xdp_xmit,
+       .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave,
  };
  
  static const struct device_type bond_type = {
@@ -5477,6 +5818,7 @@ static int bond_check_params(struct bond_params *params)
         params->downdelay = downdelay;
         params->peer_notif_delay = 0;
         params->use_carrier = use_carrier;
+       params->lacp_active = 1;
         params->lacp_fast = lacp_fast;
         params->primary[0] = 0;
         params->primary_reselect = primary_reselect_value;
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c

index 0561ece..5d54e11 100644 (file)
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -100,6 +100,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
         [IFLA_BOND_MIN_LINKS]           = { .type = NLA_U32 },
         [IFLA_BOND_LP_INTERVAL]         = { .type = NLA_U32 },
         [IFLA_BOND_PACKETS_PER_SLAVE]   = { .type = NLA_U32 },
+       [IFLA_BOND_AD_LACP_ACTIVE]      = { .type = NLA_U8 },
         [IFLA_BOND_AD_LACP_RATE]        = { .type = NLA_U8 },
         [IFLA_BOND_AD_SELECT]           = { .type = NLA_U8 },
         [IFLA_BOND_AD_INFO]             = { .type = NLA_NESTED },
@@ -387,6 +388,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[],
                 if (err)
                         return err;
         }
+
+       if (data[IFLA_BOND_AD_LACP_ACTIVE]) {
+               int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]);
+
+               bond_opt_initval(&newval, lacp_active);
+               err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval);
+               if (err)
+                       return err;
+       }
+
         if (data[IFLA_BOND_AD_LACP_RATE]) {
                 int lacp_rate =
                         nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]);
@@ -490,6 +501,7 @@ static size_t bond_get_size(const struct net_device *bond_dev)
                 nla_total_size(sizeof(u32)) +   /* IFLA_BOND_MIN_LINKS */
                 nla_total_size(sizeof(u32)) +   /* IFLA_BOND_LP_INTERVAL */
                 nla_total_size(sizeof(u32)) +  /* IFLA_BOND_PACKETS_PER_SLAVE */
+               nla_total_size(sizeof(u8)) +    /* IFLA_BOND_AD_LACP_ACTIVE */
                 nla_total_size(sizeof(u8)) +    /* IFLA_BOND_AD_LACP_RATE */
                 nla_total_size(sizeof(u8)) +    /* IFLA_BOND_AD_SELECT */
                 nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */
@@ -622,6 +634,10 @@ static int bond_fill_info(struct sk_buff *skb,
                         packets_per_slave))
                 goto nla_put_failure;
  
+       if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE,
+                      bond->params.lacp_active))
+               goto nla_put_failure;
+
         if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE,
                        bond->params.lacp_fast))
                 goto nla_put_failure;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c

index 0cf25de..a8fde3b 100644 (file)
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -58,6 +58,8 @@ static int bond_option_lp_interval_set(struct bonding *bond,
                                        const struct bond_opt_value *newval);
  static int bond_option_pps_set(struct bonding *bond,
                                const struct bond_opt_value *newval);
+static int bond_option_lacp_active_set(struct bonding *bond,
+                                      const struct bond_opt_value *newval);
  static int bond_option_lacp_rate_set(struct bonding *bond,
                                      const struct bond_opt_value *newval);
  static int bond_option_ad_select_set(struct bonding *bond,
@@ -135,6 +137,12 @@ static const struct bond_opt_value bond_intmax_tbl[] = {
         { NULL,      -1,      0}
  };
  
+static const struct bond_opt_value bond_lacp_active[] = {
+       { "off", 0,  0},
+       { "on",  1,  BOND_VALFLAG_DEFAULT},
+       { NULL,  -1, 0}
+};
+
  static const struct bond_opt_value bond_lacp_rate_tbl[] = {
         { "slow", AD_LACP_SLOW, 0},
         { "fast", AD_LACP_FAST, 0},
@@ -283,6 +291,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
                 .values = bond_intmax_tbl,
                 .set = bond_option_updelay_set
         },
+       [BOND_OPT_LACP_ACTIVE] = {
+               .id = BOND_OPT_LACP_ACTIVE,
+               .name = "lacp_active",
+               .desc = "Send LACPDU frames with configured lacp rate or acts as speak when spoken to",
+               .flags = BOND_OPTFLAG_IFDOWN,
+               .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)),
+               .values = bond_lacp_active,
+               .set = bond_option_lacp_active_set
+       },
         [BOND_OPT_LACP_RATE] = {
                 .id = BOND_OPT_LACP_RATE,
                 .name = "lacp_rate",
@@ -1333,6 +1350,16 @@ static int bond_option_pps_set(struct bonding *bond,
         return 0;
  }
  
+static int bond_option_lacp_active_set(struct bonding *bond,
+                                      const struct bond_opt_value *newval)
+{
+       netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n",
+                  newval->string, newval->value);
+       bond->params.lacp_active = newval->value;
+
+       return 0;
+}
+
  static int bond_option_lacp_rate_set(struct bonding *bond,
                                      const struct bond_opt_value *newval)
  {
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c

index 0fb1da3..f3e3bfd 100644 (file)
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -133,6 +133,8 @@ static void bond_info_show_master(struct seq_file *seq)
                 struct ad_info ad_info;
  
                 seq_puts(seq, "\n802.3ad info\n");
+               seq_printf(seq, "LACP active: %s\n",
+                          (bond->params.lacp_active) ? "on" : "off");
                 seq_printf(seq, "LACP rate: %s\n",
                            (bond->params.lacp_fast) ? "fast" : "slow");
                 seq_printf(seq, "Min links: %d\n", bond->params.min_links);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c

index 5f9e9a2..b9e9842 100644 (file)
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -339,10 +339,24 @@ static ssize_t bonding_show_peer_notif_delay(struct device *d,
  static DEVICE_ATTR(peer_notif_delay, 0644,
                    bonding_show_peer_notif_delay, bonding_sysfs_store_option);
  
-/* Show the LACP interval. */
-static ssize_t bonding_show_lacp(struct device *d,
-                                struct device_attribute *attr,
-                                char *buf)
+/* Show the LACP activity and interval. */
+static ssize_t bonding_show_lacp_active(struct device *d,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct bonding *bond = to_bond(d);
+       const struct bond_opt_value *val;
+
+       val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active);
+
+       return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_active);
+}
+static DEVICE_ATTR(lacp_active, 0644,
+                  bonding_show_lacp_active, bonding_sysfs_store_option);
+
+static ssize_t bonding_show_lacp_rate(struct device *d,
+                                     struct device_attribute *attr,
+                                     char *buf)
  {
         struct bonding *bond = to_bond(d);
         const struct bond_opt_value *val;
@@ -352,7 +366,7 @@ static ssize_t bonding_show_lacp(struct device *d,
         return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast);
  }
  static DEVICE_ATTR(lacp_rate, 0644,
-                  bonding_show_lacp, bonding_sysfs_store_option);
+                  bonding_show_lacp_rate, bonding_sysfs_store_option);
  
  static ssize_t bonding_show_min_links(struct device *d,
                                       struct device_attribute *attr,
@@ -738,6 +752,7 @@ static struct attribute *per_bond_attrs[] = {
         &dev_attr_downdelay.attr,
         &dev_attr_updelay.attr,
         &dev_attr_peer_notif_delay.attr,
+       &dev_attr_lacp_active.attr,
         &dev_attr_lacp_rate.attr,
         &dev_attr_ad_select.attr,
         &dev_attr_xmit_hash_policy.attr,
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c

index 54ffb79..7734229 100644 (file)
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -649,7 +649,7 @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv)
  
  static int flexcan_clks_enable(const struct flexcan_priv *priv)
  {
-       int err;
+       int err = 0;
  
         if (priv->clk_ipg) {
                 err = clk_prepare_enable(priv->clk_ipg);
diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c

index dd17b8c..89d9c98 100644 (file)
--- a/drivers/net/can/spi/hi311x.c
+++ b/drivers/net/can/spi/hi311x.c
@@ -218,7 +218,7 @@ static int hi3110_spi_trans(struct spi_device *spi, int len)
         return ret;
  }
  
-static u8 hi3110_cmd(struct spi_device *spi, u8 command)
+static int hi3110_cmd(struct spi_device *spi, u8 command)
  {
         struct hi3110_priv *priv = spi_get_drvdata(spi);
  
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c

index 2b1e575..6c369a3 100644 (file)
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -2304,6 +2304,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id)
                    err, priv->regs_status.intf);
         mcp251xfd_dump(priv);
         mcp251xfd_chip_interrupts_disable(priv);
+       mcp251xfd_timestamp_stop(priv);
  
         return handled;
  }
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c

index 0a37af4..2b5302e 100644 (file)
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -255,6 +255,8 @@ struct ems_usb {
         unsigned int free_slots; /* remember number of available slots */
  
         struct ems_cpc_msg active_params; /* active controller parameters */
+       void *rxbuf[MAX_RX_URBS];
+       dma_addr_t rxbuf_dma[MAX_RX_URBS];
  };
  
  static void ems_usb_read_interrupt_callback(struct urb *urb)
@@ -587,6 +589,7 @@ static int ems_usb_start(struct ems_usb *dev)
         for (i = 0; i < MAX_RX_URBS; i++) {
                 struct urb *urb = NULL;
                 u8 *buf = NULL;
+               dma_addr_t buf_dma;
  
                 /* create a URB, and a buffer for it */
                 urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -596,7 +599,7 @@ static int ems_usb_start(struct ems_usb *dev)
                 }
  
                 buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
-                                        &urb->transfer_dma);
+                                        &buf_dma);
                 if (!buf) {
                         netdev_err(netdev, "No memory left for USB buffer\n");
                         usb_free_urb(urb);
@@ -604,6 +607,8 @@ static int ems_usb_start(struct ems_usb *dev)
                         break;
                 }
  
+               urb->transfer_dma = buf_dma;
+
                 usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2),
                                   buf, RX_BUFFER_SIZE,
                                   ems_usb_read_bulk_callback, dev);
@@ -619,6 +624,9 @@ static int ems_usb_start(struct ems_usb *dev)
                         break;
                 }
  
+               dev->rxbuf[i] = buf;
+               dev->rxbuf_dma[i] = buf_dma;
+
                 /* Drop reference, USB core will take care of freeing it */
                 usb_free_urb(urb);
         }
@@ -684,6 +692,10 @@ static void unlink_all_urbs(struct ems_usb *dev)
  
         usb_kill_anchored_urbs(&dev->rx_submitted);
  
+       for (i = 0; i < MAX_RX_URBS; ++i)
+               usb_free_coherent(dev->udev, RX_BUFFER_SIZE,
+                                 dev->rxbuf[i], dev->rxbuf_dma[i]);
+
         usb_kill_anchored_urbs(&dev->tx_submitted);
         atomic_set(&dev->active_tx_urbs, 0);
  
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c

index 60f3e0c..7370981 100644 (file)
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -195,6 +195,8 @@ struct esd_usb2 {
         int net_count;
         u32 version;
         int rxinitdone;
+       void *rxbuf[MAX_RX_URBS];
+       dma_addr_t rxbuf_dma[MAX_RX_URBS];
  };
  
  struct esd_usb2_net_priv {
@@ -545,6 +547,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
         for (i = 0; i < MAX_RX_URBS; i++) {
                 struct urb *urb = NULL;
                 u8 *buf = NULL;
+               dma_addr_t buf_dma;
  
                 /* create a URB, and a buffer for it */
                 urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -554,7 +557,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
                 }
  
                 buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL,
-                                        &urb->transfer_dma);
+                                        &buf_dma);
                 if (!buf) {
                         dev_warn(dev->udev->dev.parent,
                                  "No memory left for USB buffer\n");
@@ -562,6 +565,8 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
                         goto freeurb;
                 }
  
+               urb->transfer_dma = buf_dma;
+
                 usb_fill_bulk_urb(urb, dev->udev,
                                   usb_rcvbulkpipe(dev->udev, 1),
                                   buf, RX_BUFFER_SIZE,
@@ -574,8 +579,12 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev)
                         usb_unanchor_urb(urb);
                         usb_free_coherent(dev->udev, RX_BUFFER_SIZE, buf,
                                           urb->transfer_dma);
+                       goto freeurb;
                 }
  
+               dev->rxbuf[i] = buf;
+               dev->rxbuf_dma[i] = buf_dma;
+
  freeurb:
                 /* Drop reference, USB core will take care of freeing it */
                 usb_free_urb(urb);
@@ -663,6 +672,11 @@ static void unlink_all_urbs(struct esd_usb2 *dev)
         int i, j;
  
         usb_kill_anchored_urbs(&dev->rx_submitted);
+
+       for (i = 0; i < MAX_RX_URBS; ++i)
+               usb_free_coherent(dev->udev, RX_BUFFER_SIZE,
+                                 dev->rxbuf[i], dev->rxbuf_dma[i]);
+
         for (i = 0; i < dev->net_count; i++) {
                 priv = dev->nets[i];
                 if (priv) {
diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c

index a45865b..a1a154c 100644 (file)
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -653,6 +653,8 @@ static int mcba_usb_start(struct mcba_priv *priv)
                         break;
                 }
  
+               urb->transfer_dma = buf_dma;
+
                 usb_fill_bulk_urb(urb, priv->udev,
                                   usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN),
                                   buf, MCBA_USB_RX_BUFF_SIZE,
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c

index e36e60c..837b3fe 100644 (file)
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -123,7 +123,8 @@
  #define PCAN_USB_BERR_MASK     (PCAN_USB_ERR_RXERR | PCAN_USB_ERR_TXERR)
  
  /* identify bus event packets with rx/tx error counters */
-#define PCAN_USB_ERR_CNT               0x80
+#define PCAN_USB_ERR_CNT_DEC           0x00    /* counters are decreasing */
+#define PCAN_USB_ERR_CNT_INC           0x80    /* counters are increasing */
  
  /* private to PCAN-USB adapter */
  struct pcan_usb {
@@ -535,11 +536,12 @@ static int pcan_usb_handle_bus_evt(struct pcan_usb_msg_context *mc, u8 ir)
  
         /* acccording to the content of the packet */
         switch (ir) {
-       case PCAN_USB_ERR_CNT:
+       case PCAN_USB_ERR_CNT_DEC:
+       case PCAN_USB_ERR_CNT_INC:
  
                 /* save rx/tx error counters from in the device context */
-               pdev->bec.rxerr = mc->ptr[0];
-               pdev->bec.txerr = mc->ptr[1];
+               pdev->bec.rxerr = mc->ptr[1];
+               pdev->bec.txerr = mc->ptr[2];
                 break;
  
         default:
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c

index b6e7ef0..d1b83bd 100644 (file)
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -137,7 +137,8 @@ struct usb_8dev_priv {
         u8 *cmd_msg_buffer;
  
         struct mutex usb_8dev_cmd_lock;
-
+       void *rxbuf[MAX_RX_URBS];
+       dma_addr_t rxbuf_dma[MAX_RX_URBS];
  };
  
  /* tx frame */
@@ -733,6 +734,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
         for (i = 0; i < MAX_RX_URBS; i++) {
                 struct urb *urb = NULL;
                 u8 *buf;
+               dma_addr_t buf_dma;
  
                 /* create a URB, and a buffer for it */
                 urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -742,7 +744,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
                 }
  
                 buf = usb_alloc_coherent(priv->udev, RX_BUFFER_SIZE, GFP_KERNEL,
-                                        &urb->transfer_dma);
+                                        &buf_dma);
                 if (!buf) {
                         netdev_err(netdev, "No memory left for USB buffer\n");
                         usb_free_urb(urb);
@@ -750,6 +752,8 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
                         break;
                 }
  
+               urb->transfer_dma = buf_dma;
+
                 usb_fill_bulk_urb(urb, priv->udev,
                                   usb_rcvbulkpipe(priv->udev,
                                                   USB_8DEV_ENDP_DATA_RX),
@@ -767,6 +771,9 @@ static int usb_8dev_start(struct usb_8dev_priv *priv)
                         break;
                 }
  
+               priv->rxbuf[i] = buf;
+               priv->rxbuf_dma[i] = buf_dma;
+
                 /* Drop reference, USB core will take care of freeing it */
                 usb_free_urb(urb);
         }
@@ -836,6 +843,10 @@ static void unlink_all_urbs(struct usb_8dev_priv *priv)
  
         usb_kill_anchored_urbs(&priv->rx_submitted);
  
+       for (i = 0; i < MAX_RX_URBS; ++i)
+               usb_free_coherent(priv->udev, RX_BUFFER_SIZE,
+                                 priv->rxbuf[i], priv->rxbuf_dma[i]);
+
         usb_kill_anchored_urbs(&priv->tx_submitted);
         atomic_set(&priv->active_tx_urbs, 0);
  
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c

index b23e348..bd1417a 100644 (file)
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2016,15 +2016,6 @@ int b53_br_flags(struct dsa_switch *ds, int port,
  }
  EXPORT_SYMBOL(b53_br_flags);
  
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                   struct netlink_ext_ack *extack)
-{
-       b53_port_set_mcast_flood(ds->priv, port, mrouter);
-
-       return 0;
-}
-EXPORT_SYMBOL(b53_set_mrouter);
-
  static bool b53_possible_cpu_port(struct dsa_switch *ds, int port)
  {
         /* Broadcom switches will accept enabling Broadcom tags on the
@@ -2268,7 +2259,6 @@ static const struct dsa_switch_ops b53_switch_ops = {
         .port_bridge_leave      = b53_br_leave,
         .port_pre_bridge_flags  = b53_br_flags_pre,
         .port_bridge_flags      = b53_br_flags,
-       .port_set_mrouter       = b53_set_mrouter,
         .port_stp_state_set     = b53_br_set_stp_state,
         .port_fast_age          = b53_br_fast_age,
         .port_vlan_filtering    = b53_vlan_filtering,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h

index 82700a5..9bf8319 100644 (file)
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -328,8 +328,6 @@ int b53_br_flags_pre(struct dsa_switch *ds, int port,
  int b53_br_flags(struct dsa_switch *ds, int port,
                  struct switchdev_brport_flags flags,
                  struct netlink_ext_ack *extack);
-int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                   struct netlink_ext_ack *extack);
  int b53_setup_devlink_resources(struct dsa_switch *ds);
  void b53_port_event(struct dsa_switch *ds, int port);
  void b53_phylink_validate(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c

index 3b018fc..6ce9ec1 100644 (file)
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1199,7 +1199,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
         .port_pre_bridge_flags  = b53_br_flags_pre,
         .port_bridge_flags      = b53_br_flags,
         .port_stp_state_set     = b53_br_set_stp_state,
-       .port_set_mrouter       = b53_set_mrouter,
         .port_fast_age          = b53_br_fast_age,
         .port_vlan_filtering    = b53_vlan_filtering,
         .port_vlan_add          = b53_vlan_add,
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c

index 69f21b7..53e6150 100644 (file)
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -366,8 +366,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid,
         int i;
  
         reg[1] |= vid & CVID_MASK;
-       if (vid > 1)
-               reg[1] |= ATA2_IVL;
+       reg[1] |= ATA2_IVL;
+       reg[1] |= ATA2_FID(FID_BRIDGED);
         reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER;
         reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP;
         /* STATIC_ENT indicate that entry is static wouldn't
@@ -1021,6 +1021,10 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port)
         mt7530_write(priv, MT7530_PCR_P(port),
                      PCR_MATRIX(dsa_user_ports(priv->ds)));
  
+       /* Set to fallback mode for independent VLAN learning */
+       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                  MT7530_PORT_FALLBACK_MODE);
+
         return 0;
  }
  
@@ -1143,7 +1147,8 @@ mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state)
                 break;
         }
  
-       mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state);
+       mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK(FID_BRIDGED),
+                  FID_PST(FID_BRIDGED, stp_state));
  }
  
  static int
@@ -1184,18 +1189,6 @@ mt7530_port_bridge_flags(struct dsa_switch *ds, int port,
         return 0;
  }
  
-static int
-mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                       struct netlink_ext_ack *extack)
-{
-       struct mt7530_priv *priv = ds->priv;
-
-       mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
-                  mrouter ? UNM_FFP(BIT(port)) : 0);
-
-       return 0;
-}
-
  static int
  mt7530_port_bridge_join(struct dsa_switch *ds, int port,
                         struct net_device *bridge)
@@ -1229,6 +1222,10 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port,
                            PCR_MATRIX_MASK, PCR_MATRIX(port_bitmap));
         priv->ports[port].pm |= PCR_MATRIX(port_bitmap);
  
+       /* Set to fallback mode for independent VLAN learning */
+       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                  MT7530_PORT_FALLBACK_MODE);
+
         mutex_unlock(&priv->reg_mutex);
  
         return 0;
@@ -1241,15 +1238,22 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port)
         bool all_user_ports_removed = true;
         int i;
  
-       /* When a port is removed from the bridge, the port would be set up
-        * back to the default as is at initial boot which is a VLAN-unaware
-        * port.
+       /* This is called after .port_bridge_leave when leaving a VLAN-aware
+        * bridge. Don't set standalone ports to fallback mode.
          */
-       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
-                  MT7530_PORT_MATRIX_MODE);
-       mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK,
+       if (dsa_to_port(ds, port)->bridge_dev)
+               mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                          MT7530_PORT_FALLBACK_MODE);
+
+       mt7530_rmw(priv, MT7530_PVC_P(port),
+                  VLAN_ATTR_MASK | PVC_EG_TAG_MASK | ACC_FRM_MASK,
                    VLAN_ATTR(MT7530_VLAN_TRANSPARENT) |
-                  PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
+                  PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT) |
+                  MT7530_VLAN_ACC_ALL);
+
+       /* Set PVID to 0 */
+       mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                  G0_PORT_VID_DEF);
  
         for (i = 0; i < MT7530_NUM_PORTS; i++) {
                 if (dsa_is_user_port(ds, i) &&
@@ -1276,15 +1280,19 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
         struct mt7530_priv *priv = ds->priv;
  
         /* Trapped into security mode allows packet forwarding through VLAN
-        * table lookup. CPU port is set to fallback mode to let untagged
-        * frames pass through.
+        * table lookup.
          */
-       if (dsa_is_cpu_port(ds, port))
-               mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
-                          MT7530_PORT_FALLBACK_MODE);
-       else
+       if (dsa_is_user_port(ds, port)) {
                 mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
                            MT7530_PORT_SECURITY_MODE);
+               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                          G0_PORT_VID(priv->ports[port].pvid));
+
+               /* Only accept tagged frames if PVID is not set */
+               if (!priv->ports[port].pvid)
+                       mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                                  MT7530_VLAN_ACC_TAGGED);
+       }
  
         /* Set the port as a user port which is to be able to recognize VID
          * from incoming packets before fetching entry within the VLAN table.
@@ -1329,6 +1337,13 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
                            PCR_MATRIX(BIT(MT7530_CPU_PORT)));
         priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT));
  
+       /* When a port is removed from the bridge, the port would be set up
+        * back to the default as is at initial boot which is a VLAN-unaware
+        * port.
+        */
+       mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+                  MT7530_PORT_MATRIX_MODE);
+
         mutex_unlock(&priv->reg_mutex);
  }
  
@@ -1511,7 +1526,8 @@ mt7530_hw_vlan_add(struct mt7530_priv *priv,
         /* Validate the entry with independent learning, create egress tag per
          * VLAN and joining the port as one of the port members.
          */
-       val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID;
+       val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | FID(FID_BRIDGED) |
+             VLAN_VALID;
         mt7530_write(priv, MT7530_VAWD1, val);
  
         /* Decide whether adding tag or not for those outgoing packets from the
@@ -1601,9 +1617,28 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port,
         mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add);
  
         if (pvid) {
-               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
-                          G0_PORT_VID(vlan->vid));
                 priv->ports[port].pvid = vlan->vid;
+
+               /* Accept all frames if PVID is set */
+               mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                          MT7530_VLAN_ACC_ALL);
+
+               /* Only configure PVID if VLAN filtering is enabled */
+               if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+                       mt7530_rmw(priv, MT7530_PPBV1_P(port),
+                                  G0_PORT_VID_MASK,
+                                  G0_PORT_VID(vlan->vid));
+       } else if (vlan->vid && priv->ports[port].pvid == vlan->vid) {
+               /* This VLAN is overwritten without PVID, so unset it */
+               priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+               /* Only accept tagged frames if the port is VLAN-aware */
+               if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+                       mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                                  MT7530_VLAN_ACC_TAGGED);
+
+               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                          G0_PORT_VID_DEF);
         }
  
         mutex_unlock(&priv->reg_mutex);
@@ -1617,11 +1652,9 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
  {
         struct mt7530_hw_vlan_entry target_entry;
         struct mt7530_priv *priv = ds->priv;
-       u16 pvid;
  
         mutex_lock(&priv->reg_mutex);
  
-       pvid = priv->ports[port].pvid;
         mt7530_hw_vlan_entry_init(&target_entry, port, 0);
         mt7530_hw_vlan_update(priv, vlan->vid, &target_entry,
                               mt7530_hw_vlan_del);
@@ -1629,11 +1662,18 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
         /* PVID is being restored to the default whenever the PVID port
          * is being removed from the VLAN.
          */
-       if (pvid == vlan->vid)
-               pvid = G0_PORT_VID_DEF;
+       if (priv->ports[port].pvid == vlan->vid) {
+               priv->ports[port].pvid = G0_PORT_VID_DEF;
+
+               /* Only accept tagged frames if the port is VLAN-aware */
+               if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+                       mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK,
+                                  MT7530_VLAN_ACC_TAGGED);
+
+               mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK,
+                          G0_PORT_VID_DEF);
+       }
  
-       mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid);
-       priv->ports[port].pvid = pvid;
  
         mutex_unlock(&priv->reg_mutex);
  
@@ -1717,15 +1757,7 @@ static enum dsa_tag_protocol
  mtk_get_tag_protocol(struct dsa_switch *ds, int port,
                      enum dsa_tag_protocol mp)
  {
-       struct mt7530_priv *priv = ds->priv;
-
-       if (port != MT7530_CPU_PORT) {
-               dev_warn(priv->dev,
-                        "port not matched with tagging CPU port\n");
-               return DSA_TAG_PROTO_NONE;
-       } else {
-               return DSA_TAG_PROTO_MTK;
-       }
+       return DSA_TAG_PROTO_MTK;
  }
  
  #ifdef CONFIG_GPIOLIB
@@ -2054,6 +2086,7 @@ mt7530_setup(struct dsa_switch *ds)
          * as two netdev instances.
          */
         dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
+       ds->assisted_learning_on_cpu_port = true;
         ds->mtu_enforcement_ingress = true;
  
         if (priv->id == ID_MT7530) {
@@ -2124,6 +2157,9 @@ mt7530_setup(struct dsa_switch *ds)
                 mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
                            PCR_MATRIX_CLR);
  
+               /* Disable learning by default on all ports */
+               mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
                 if (dsa_is_cpu_port(ds, i)) {
                         ret = mt753x_cpu_port_enable(ds, i);
                         if (ret)
@@ -2131,8 +2167,9 @@ mt7530_setup(struct dsa_switch *ds)
                 } else {
                         mt7530_port_disable(ds, i);
  
-                       /* Disable learning by default on all user ports */
-                       mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+                       /* Set default PVID to 0 on all user ports */
+                       mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+                                  G0_PORT_VID_DEF);
                 }
                 /* Enable consistent egress tag */
                 mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
@@ -2289,6 +2326,9 @@ mt7531_setup(struct dsa_switch *ds)
                 mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK,
                            PCR_MATRIX_CLR);
  
+               /* Disable learning by default on all ports */
+               mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+
                 mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR);
  
                 if (dsa_is_cpu_port(ds, i)) {
@@ -2298,8 +2338,9 @@ mt7531_setup(struct dsa_switch *ds)
                 } else {
                         mt7530_port_disable(ds, i);
  
-                       /* Disable learning by default on all user ports */
-                       mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+                       /* Set default PVID to 0 on all user ports */
+                       mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK,
+                                  G0_PORT_VID_DEF);
                 }
  
                 /* Enable consistent egress tag */
@@ -2307,6 +2348,7 @@ mt7531_setup(struct dsa_switch *ds)
                            PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
         }
  
+       ds->assisted_learning_on_cpu_port = true;
         ds->mtu_enforcement_ingress = true;
  
         /* Flush the FDB table */
@@ -3060,7 +3102,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = {
         .port_stp_state_set     = mt7530_stp_state_set,
         .port_pre_bridge_flags  = mt7530_port_pre_bridge_flags,
         .port_bridge_flags      = mt7530_port_bridge_flags,
-       .port_set_mrouter       = mt7530_port_set_mrouter,
         .port_bridge_join       = mt7530_port_bridge_join,
         .port_bridge_leave      = mt7530_port_bridge_leave,
         .port_fdb_add           = mt7530_port_fdb_add,
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h

index b19b389..fe4cd2a 100644 (file)
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -80,6 +80,7 @@ enum mt753x_bpdu_port_fw {
  #define  STATIC_ENT                    3
  #define MT7530_ATA2                    0x78
  #define  ATA2_IVL                      BIT(15)
+#define  ATA2_FID(x)                   (((x) & 0x7) << 12)
  
  /* Register for address table write data */
  #define MT7530_ATWD                    0x7c
@@ -148,11 +149,18 @@ enum mt7530_vlan_cmd {
  #define  VTAG_EN                       BIT(28)
  /* VLAN Member Control */
  #define  PORT_MEM(x)                   (((x) & 0xff) << 16)
+/* Filter ID */
+#define  FID(x)                                (((x) & 0x7) << 1)
  /* VLAN Entry Valid */
  #define  VLAN_VALID                    BIT(0)
  #define  PORT_MEM_SHFT                 16
  #define  PORT_MEM_MASK                 0xff
  
+enum mt7530_fid {
+       FID_STANDALONE = 0,
+       FID_BRIDGED = 1,
+};
+
  #define MT7530_VAWD2                   0x98
  /* Egress Tag Control */
  #define  ETAG_CTRL_P(p, x)             (((x) & 0x3) << ((p) << 1))
@@ -179,8 +187,8 @@ enum mt7530_vlan_egress_attr {
  
  /* Register for port STP state control */
  #define MT7530_SSP_P(x)                        (0x2000 + ((x) * 0x100))
-#define  FID_PST(x)                    ((x) & 0x3)
-#define  FID_PST_MASK                  FID_PST(0x3)
+#define  FID_PST(fid, state)           (((state) & 0x3) << ((fid) * 2))
+#define  FID_PST_MASK(fid)             FID_PST(fid, 0x3)
  
  enum mt7530_stp_state {
         MT7530_STP_DISABLED = 0,
@@ -230,6 +238,7 @@ enum mt7530_port_mode {
  #define  PVC_EG_TAG_MASK               PVC_EG_TAG(7)
  #define  VLAN_ATTR(x)                  (((x) & 0x3) << 6)
  #define  VLAN_ATTR_MASK                        VLAN_ATTR(3)
+#define  ACC_FRM_MASK                  GENMASK(1, 0)
  
  enum mt7530_vlan_port_eg_tag {
         MT7530_VLAN_EG_DISABLED = 0,
@@ -241,13 +250,19 @@ enum mt7530_vlan_port_attr {
         MT7530_VLAN_TRANSPARENT = 3,
  };
  
+enum mt7530_vlan_port_acc_frm {
+       MT7530_VLAN_ACC_ALL = 0,
+       MT7530_VLAN_ACC_TAGGED = 1,
+       MT7530_VLAN_ACC_UNTAGGED = 2,
+};
+
  #define  STAG_VPID                     (((x) & 0xffff) << 16)
  
  /* Register for port port-and-protocol based vlan 1 control */
  #define MT7530_PPBV1_P(x)              (0x2014 + ((x) * 0x100))
  #define  G0_PORT_VID(x)                        (((x) & 0xfff) << 0)
  #define  G0_PORT_VID_MASK              G0_PORT_VID(0xfff)
-#define  G0_PORT_VID_DEF               G0_PORT_VID(1)
+#define  G0_PORT_VID_DEF               G0_PORT_VID(0)
  
  /* Register for port MAC control register */
  #define MT7530_PMCR_P(x)               (0x3000 + ((x) * 0x100))
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c

index af764b8..c45ca24 100644 (file)
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2176,7 +2176,7 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip,
         int i, err;
  
         if (!vid)
-               return -EOPNOTSUPP;
+               return 0;
  
         err = mv88e6xxx_vtu_get(chip, vid, &vlan);
         if (err)
@@ -5797,7 +5797,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
                                        struct netlink_ext_ack *extack)
  {
         struct mv88e6xxx_chip *chip = ds->priv;
-       bool do_fast_age = false;
         int err = -EOPNOTSUPP;
  
         mv88e6xxx_reg_lock(chip);
@@ -5809,9 +5808,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
                 err = mv88e6xxx_port_set_assoc_vector(chip, port, pav);
                 if (err)
                         goto out;
-
-               if (!learning)
-                       do_fast_age = true;
         }
  
         if (flags.mask & BR_FLOOD) {
@@ -5843,26 +5839,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port,
  out:
         mv88e6xxx_reg_unlock(chip);
  
-       if (do_fast_age)
-               mv88e6xxx_port_fast_age(ds, port);
-
-       return err;
-}
-
-static int mv88e6xxx_port_set_mrouter(struct dsa_switch *ds, int port,
-                                     bool mrouter,
-                                     struct netlink_ext_ack *extack)
-{
-       struct mv88e6xxx_chip *chip = ds->priv;
-       int err;
-
-       if (!chip->info->ops->port_set_mcast_flood)
-               return -EOPNOTSUPP;
-
-       mv88e6xxx_reg_lock(chip);
-       err = chip->info->ops->port_set_mcast_flood(chip, port, mrouter);
-       mv88e6xxx_reg_unlock(chip);
-
         return err;
  }
  
@@ -6167,7 +6143,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
         .port_bridge_leave      = mv88e6xxx_port_bridge_leave,
         .port_pre_bridge_flags  = mv88e6xxx_port_pre_bridge_flags,
         .port_bridge_flags      = mv88e6xxx_port_bridge_flags,
-       .port_set_mrouter       = mv88e6xxx_port_set_mrouter,
         .port_stp_state_set     = mv88e6xxx_port_stp_state_set,
         .port_fast_age          = mv88e6xxx_port_fast_age,
         .port_vlan_filtering    = mv88e6xxx_port_vlan_filtering,
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c

index ca2ad77..6686192 100644 (file)
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -837,16 +837,24 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val)
                 return 0;
         }
  
-       ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val);
+       /* In case of this switch we work with 32bit registers on top of 16bit
+        * bus. Some registers (for example access to forwarding database) have
+        * trigger bit on the first 16bit half of request, the result and
+        * configuration of request in the second half.
+        * To make it work properly, we should do the second part of transfer
+        * before the first one is done.
+        */
+       ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2,
+                                 val >> 16);
         if (ret < 0)
                 goto error;
  
-       ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2,
-                                 val >> 16);
+       ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val);
         if (ret < 0)
                 goto error;
  
         return 0;
+
  error:
         dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n");
         return ret;
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h

index 9cd7dbd..2e899c9 100644 (file)
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -233,7 +233,6 @@ struct sja1105_private {
         phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS];
         bool fixed_link[SJA1105_MAX_NUM_PORTS];
         bool vlan_aware;
-       unsigned long learn_ena;
         unsigned long ucast_egress_floods;
         unsigned long bcast_egress_floods;
         const struct sja1105_info *info;
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c

index bd3ad18..f2049f5 100644 (file)
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -304,6 +304,15 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
                         hostcmd = SJA1105_HOSTCMD_INVALIDATE;
         }
         sja1105_packing(p, &hostcmd, 25, 23, size, op);
+}
+
+static void
+sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+                                 enum packing_op op)
+{
+       int entry_size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
+
+       sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size);
  
         /* Hack - The hardware takes the 'index' field within
          * struct sja1105_l2_lookup_entry as the index on which this command
@@ -313,26 +322,18 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
          * such that our API doesn't need to ask for a full-blown entry
          * structure when e.g. a delete is requested.
          */
-       sja1105_packing(buf, &cmd->index, 15, 6,
-                       SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op);
-}
-
-static void
-sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
-                                 enum packing_op op)
-{
-       int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY;
-
-       return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+       sja1105_packing(buf, &cmd->index, 15, 6, entry_size, op);
  }
  
  static void
  sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
                               enum packing_op op)
  {
-       int size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
+       int entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY;
+
+       sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size);
  
-       return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size);
+       sja1105_packing(buf, &cmd->index, 10, 1, entry_size, op);
  }
  
  /* The switch is so retarded that it makes our command/entry abstraction
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c

index 5ab1676..6a52db1 100644 (file)
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -176,7 +176,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
         struct sja1105_mac_config_entry *mac;
         struct dsa_switch *ds = priv->ds;
         struct sja1105_table *table;
-       int i;
+       struct dsa_port *dp;
  
         table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG];
  
@@ -195,14 +195,21 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv)
  
         mac = table->entries;
  
-       for (i = 0; i < ds->num_ports; i++) {
-               mac[i] = default_mac;
+       list_for_each_entry(dp, &ds->dst->ports, list) {
+               if (dp->ds != ds)
+                       continue;
+
+               mac[dp->index] = default_mac;
  
                 /* Let sja1105_bridge_stp_state_set() keep address learning
-                * enabled for the CPU port.
+                * enabled for the DSA ports. CPU ports use software-assisted
+                * learning to ensure that only FDB entries belonging to the
+                * bridge are learned, and that they are learned towards all
+                * CPU ports in a cross-chip topology if multiple CPU ports
+                * exist.
                  */
-               if (dsa_is_cpu_port(ds, i))
-                       priv->learn_ena |= BIT(i);
+               if (dsa_port_is_dsa(dp))
+                       dp->learning = true;
         }
  
         return 0;
@@ -460,7 +467,7 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
                 pvid.vlan_bc |= BIT(port);
                 pvid.tag_port &= ~BIT(port);
  
-               if (dsa_is_cpu_port(ds, port)) {
+               if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
                         priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN;
                         priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN;
                 }
@@ -474,8 +481,11 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
  {
         struct sja1105_l2_forwarding_entry *l2fwd;
         struct dsa_switch *ds = priv->ds;
+       struct dsa_switch_tree *dst;
         struct sja1105_table *table;
-       int i, j;
+       struct dsa_link *dl;
+       int port, tc;
+       int from, to;
  
         table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING];
  
@@ -493,47 +503,109 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv)
  
         l2fwd = table->entries;
  
-       /* First 5 entries define the forwarding rules */
-       for (i = 0; i < ds->num_ports; i++) {
-               unsigned int upstream = dsa_upstream_port(priv->ds, i);
+       /* First 5 entries in the L2 Forwarding Table define the forwarding
+        * rules and the VLAN PCP to ingress queue mapping.
+        * Set up the ingress queue mapping first.
+        */
+       for (port = 0; port < ds->num_ports; port++) {
+               if (dsa_is_unused_port(ds, port))
+                       continue;
+
+               for (tc = 0; tc < SJA1105_NUM_TC; tc++)
+                       l2fwd[port].vlan_pmap[tc] = tc;
+       }
  
-               if (dsa_is_unused_port(ds, i))
+       /* Then manage the forwarding domain for user ports. These can forward
+        * only to the always-on domain (CPU port and DSA links)
+        */
+       for (from = 0; from < ds->num_ports; from++) {
+               if (!dsa_is_user_port(ds, from))
                         continue;
  
-               for (j = 0; j < SJA1105_NUM_TC; j++)
-                       l2fwd[i].vlan_pmap[j] = j;
+               for (to = 0; to < ds->num_ports; to++) {
+                       if (!dsa_is_cpu_port(ds, to) &&
+                           !dsa_is_dsa_port(ds, to))
+                               continue;
  
-               /* All ports start up with egress flooding enabled,
-                * including the CPU port.
-                */
-               priv->ucast_egress_floods |= BIT(i);
-               priv->bcast_egress_floods |= BIT(i);
+                       l2fwd[from].bc_domain |= BIT(to);
+                       l2fwd[from].fl_domain |= BIT(to);
  
-               if (i == upstream)
+                       sja1105_port_allow_traffic(l2fwd, from, to, true);
+               }
+       }
+
+       /* Then manage the forwarding domain for DSA links and CPU ports (the
+        * always-on domain). These can send packets to any enabled port except
+        * themselves.
+        */
+       for (from = 0; from < ds->num_ports; from++) {
+               if (!dsa_is_cpu_port(ds, from) && !dsa_is_dsa_port(ds, from))
                         continue;
  
-               sja1105_port_allow_traffic(l2fwd, i, upstream, true);
-               sja1105_port_allow_traffic(l2fwd, upstream, i, true);
+               for (to = 0; to < ds->num_ports; to++) {
+                       if (dsa_is_unused_port(ds, to))
+                               continue;
  
-               l2fwd[i].bc_domain = BIT(upstream);
-               l2fwd[i].fl_domain = BIT(upstream);
+                       if (from == to)
+                               continue;
+
+                       l2fwd[from].bc_domain |= BIT(to);
+                       l2fwd[from].fl_domain |= BIT(to);
+
+                       sja1105_port_allow_traffic(l2fwd, from, to, true);
+               }
+       }
+
+       /* In odd topologies ("H" connections where there is a DSA link to
+        * another switch which also has its own CPU port), TX packets can loop
+        * back into the system (they are flooded from CPU port 1 to the DSA
+        * link, and from there to CPU port 2). Prevent this from happening by
+        * cutting RX from DSA links towards our CPU port, if the remote switch
+        * has its own CPU port and therefore doesn't need ours for network
+        * stack termination.
+        */
+       dst = ds->dst;
+
+       list_for_each_entry(dl, &dst->rtable, list) {
+               if (dl->dp->ds != ds || dl->link_dp->cpu_dp == dl->dp->cpu_dp)
+                       continue;
+
+               from = dl->dp->index;
+               to = dsa_upstream_port(ds, from);
+
+               dev_warn(ds->dev,
+                        "H topology detected, cutting RX from DSA link %d to CPU port %d to prevent TX packet loops\n",
+                        from, to);
+
+               sja1105_port_allow_traffic(l2fwd, from, to, false);
  
-               l2fwd[upstream].bc_domain |= BIT(i);
-               l2fwd[upstream].fl_domain |= BIT(i);
+               l2fwd[from].bc_domain &= ~BIT(to);
+               l2fwd[from].fl_domain &= ~BIT(to);
+       }
+
+       /* Finally, manage the egress flooding domain. All ports start up with
+        * flooding enabled, including the CPU port and DSA links.
+        */
+       for (port = 0; port < ds->num_ports; port++) {
+               if (dsa_is_unused_port(ds, port))
+                       continue;
+
+               priv->ucast_egress_floods |= BIT(port);
+               priv->bcast_egress_floods |= BIT(port);
         }
  
         /* Next 8 entries define VLAN PCP mapping from ingress to egress.
          * Create a one-to-one mapping.
          */
-       for (i = 0; i < SJA1105_NUM_TC; i++) {
-               for (j = 0; j < ds->num_ports; j++) {
-                       if (dsa_is_unused_port(ds, j))
+       for (tc = 0; tc < SJA1105_NUM_TC; tc++) {
+               for (port = 0; port < ds->num_ports; port++) {
+                       if (dsa_is_unused_port(ds, port))
                                 continue;
  
-                       l2fwd[ds->num_ports + i].vlan_pmap[j] = i;
+                       l2fwd[ds->num_ports + tc].vlan_pmap[port] = tc;
                 }
  
-               l2fwd[ds->num_ports + i].type_egrpcp2outputq = true;
+               l2fwd[ds->num_ports + tc].type_egrpcp2outputq = true;
         }
  
         return 0;
@@ -688,6 +760,72 @@ static void sja1110_select_tdmaconfigidx(struct sja1105_private *priv)
         general_params->tdmaconfigidx = tdmaconfigidx;
  }
  
+static int sja1105_init_topology(struct sja1105_private *priv,
+                                struct sja1105_general_params_entry *general_params)
+{
+       struct dsa_switch *ds = priv->ds;
+       int port;
+
+       /* The host port is the destination for traffic matching mac_fltres1
+        * and mac_fltres0 on all ports except itself. Default to an invalid
+        * value.
+        */
+       general_params->host_port = ds->num_ports;
+
+       /* Link-local traffic received on casc_port will be forwarded
+        * to host_port without embedding the source port and device ID
+        * info in the destination MAC address, and no RX timestamps will be
+        * taken either (presumably because it is a cascaded port and a
+        * downstream SJA switch already did that).
+        * To disable the feature, we need to do different things depending on
+        * switch generation. On SJA1105 we need to set an invalid port, while
+        * on SJA1110 which support multiple cascaded ports, this field is a
+        * bitmask so it must be left zero.
+        */
+       if (!priv->info->multiple_cascade_ports)
+               general_params->casc_port = ds->num_ports;
+
+       for (port = 0; port < ds->num_ports; port++) {
+               bool is_upstream = dsa_is_upstream_port(ds, port);
+               bool is_dsa_link = dsa_is_dsa_port(ds, port);
+
+               /* Upstream ports can be dedicated CPU ports or
+                * upstream-facing DSA links
+                */
+               if (is_upstream) {
+                       if (general_params->host_port == ds->num_ports) {
+                               general_params->host_port = port;
+                       } else {
+                               dev_err(ds->dev,
+                                       "Port %llu is already a host port, configuring %d as one too is not supported\n",
+                                       general_params->host_port, port);
+                               return -EINVAL;
+                       }
+               }
+
+               /* Cascade ports are downstream-facing DSA links */
+               if (is_dsa_link && !is_upstream) {
+                       if (priv->info->multiple_cascade_ports) {
+                               general_params->casc_port |= BIT(port);
+                       } else if (general_params->casc_port == ds->num_ports) {
+                               general_params->casc_port = port;
+                       } else {
+                               dev_err(ds->dev,
+                                       "Port %llu is already a cascade port, configuring %d as one too is not supported\n",
+                                       general_params->casc_port, port);
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       if (general_params->host_port == ds->num_ports) {
+               dev_err(ds->dev, "No host port configured\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
  static int sja1105_init_general_params(struct sja1105_private *priv)
  {
         struct sja1105_general_params_entry default_general_params = {
@@ -706,12 +844,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
                 .mac_flt0    = SJA1105_LINKLOCAL_FILTER_B_MASK,
                 .incl_srcpt0 = false,
                 .send_meta0  = false,
-               /* The destination for traffic matching mac_fltres1 and
-                * mac_fltres0 on all ports except host_port. Such traffic
-                * receieved on host_port itself would be dropped, except
-                * by installing a temporary 'management route'
-                */
-               .host_port = priv->ds->num_ports,
                 /* Default to an invalid value */
                 .mirr_port = priv->ds->num_ports,
                 /* No TTEthernet */
@@ -731,16 +863,12 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
                 .header_type = ETH_P_SJA1110,
         };
         struct sja1105_general_params_entry *general_params;
-       struct dsa_switch *ds = priv->ds;
         struct sja1105_table *table;
-       int port;
+       int rc;
  
-       for (port = 0; port < ds->num_ports; port++) {
-               if (dsa_is_cpu_port(ds, port)) {
-                       default_general_params.host_port = port;
-                       break;
-               }
-       }
+       rc = sja1105_init_topology(priv, &default_general_params);
+       if (rc)
+               return rc;
  
         table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
  
@@ -763,19 +891,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
  
         sja1110_select_tdmaconfigidx(priv);
  
-       /* Link-local traffic received on casc_port will be forwarded
-        * to host_port without embedding the source port and device ID
-        * info in the destination MAC address, and no RX timestamps will be
-        * taken either (presumably because it is a cascaded port and a
-        * downstream SJA switch already did that).
-        * To disable the feature, we need to do different things depending on
-        * switch generation. On SJA1105 we need to set an invalid port, while
-        * on SJA1110 which support multiple cascaded ports, this field is a
-        * bitmask so it must be left zero.
-        */
-       if (!priv->info->multiple_cascade_ports)
-               general_params->casc_port = ds->num_ports;
-
         return 0;
  }
  
@@ -903,7 +1018,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv)
         for (port = 0; port < ds->num_ports; port++) {
                 int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
  
-               if (dsa_is_cpu_port(priv->ds, port))
+               if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
                         mtu += VLAN_HLEN;
  
                 policing[port].smax = 65535; /* Burst size in bytes */
@@ -1372,10 +1487,11 @@ static int sja1105et_is_fdb_entry_in_bin(struct sja1105_private *priv, int bin,
  int sja1105et_fdb_add(struct dsa_switch *ds, int port,
                       const unsigned char *addr, u16 vid)
  {
-       struct sja1105_l2_lookup_entry l2_lookup = {0};
+       struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
         struct sja1105_private *priv = ds->priv;
         struct device *dev = ds->dev;
         int last_unused = -1;
+       int start, end, i;
         int bin, way, rc;
  
         bin = sja1105et_fdb_hash(priv, addr, vid);
@@ -1387,7 +1503,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
                  * mask? If yes, we need to do nothing. If not, we need
                  * to rewrite the entry by adding this port to it.
                  */
-               if (l2_lookup.destports & BIT(port))
+               if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds)
                         return 0;
                 l2_lookup.destports |= BIT(port);
         } else {
@@ -1418,6 +1534,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
                                                      index, NULL, false);
                 }
         }
+       l2_lookup.lockeds = true;
         l2_lookup.index = sja1105et_fdb_index(bin, way);
  
         rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
@@ -1426,6 +1543,29 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port,
         if (rc < 0)
                 return rc;
  
+       /* Invalidate a dynamically learned entry if that exists */
+       start = sja1105et_fdb_index(bin, 0);
+       end = sja1105et_fdb_index(bin, way);
+
+       for (i = start; i < end; i++) {
+               rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+                                                i, &tmp);
+               if (rc == -ENOENT)
+                       continue;
+               if (rc)
+                       return rc;
+
+               if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid)
+                       continue;
+
+               rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+                                                 i, NULL, false);
+               if (rc)
+                       return rc;
+
+               break;
+       }
+
         return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
  }
  
@@ -1467,32 +1607,30 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port,
  int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
                         const unsigned char *addr, u16 vid)
  {
-       struct sja1105_l2_lookup_entry l2_lookup = {0};
+       struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp;
         struct sja1105_private *priv = ds->priv;
         int rc, i;
  
         /* Search for an existing entry in the FDB table */
         l2_lookup.macaddr = ether_addr_to_u64(addr);
         l2_lookup.vlanid = vid;
-       l2_lookup.iotag = SJA1105_S_TAG;
         l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-       if (priv->vlan_aware) {
-               l2_lookup.mask_vlanid = VLAN_VID_MASK;
-               l2_lookup.mask_iotag = BIT(0);
-       } else {
-               l2_lookup.mask_vlanid = 0;
-               l2_lookup.mask_iotag = 0;
-       }
+       l2_lookup.mask_vlanid = VLAN_VID_MASK;
         l2_lookup.destports = BIT(port);
  
+       tmp = l2_lookup;
+
         rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
-                                        SJA1105_SEARCH, &l2_lookup);
-       if (rc == 0) {
-               /* Found and this port is already in the entry's
+                                        SJA1105_SEARCH, &tmp);
+       if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) {
+               /* Found a static entry and this port is already in the entry's
                  * port mask => job done
                  */
-               if (l2_lookup.destports & BIT(port))
+               if ((tmp.destports & BIT(port)) && tmp.lockeds)
                         return 0;
+
+               l2_lookup = tmp;
+
                 /* l2_lookup.index is populated by the switch in case it
                  * found something.
                  */
@@ -1514,16 +1652,46 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
                 dev_err(ds->dev, "FDB is full, cannot add entry.\n");
                 return -EINVAL;
         }
-       l2_lookup.lockeds = true;
         l2_lookup.index = i;
  
  skip_finding_an_index:
+       l2_lookup.lockeds = true;
+
         rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
                                           l2_lookup.index, &l2_lookup,
                                           true);
         if (rc < 0)
                 return rc;
  
+       /* The switch learns dynamic entries and looks up the FDB left to
+        * right. It is possible that our addition was concurrent with the
+        * dynamic learning of the same address, so now that the static entry
+        * has been installed, we are certain that address learning for this
+        * particular address has been turned off, so the dynamic entry either
+        * is in the FDB at an index smaller than the static one, or isn't (it
+        * can also be at a larger index, but in that case it is inactive
+        * because the static FDB entry will match first, and the dynamic one
+        * will eventually age out). Search for a dynamically learned address
+        * prior to our static one and invalidate it.
+        */
+       tmp = l2_lookup;
+
+       rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+                                        SJA1105_SEARCH, &tmp);
+       if (rc < 0) {
+               dev_err(ds->dev,
+                       "port %d failed to read back entry for %pM vid %d: %pe\n",
+                       port, addr, vid, ERR_PTR(rc));
+               return rc;
+       }
+
+       if (tmp.index < l2_lookup.index) {
+               rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP,
+                                                 tmp.index, NULL, false);
+               if (rc < 0)
+                       return rc;
+       }
+
         return sja1105_static_fdb_change(priv, port, &l2_lookup, true);
  }
  
@@ -1537,15 +1705,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
  
         l2_lookup.macaddr = ether_addr_to_u64(addr);
         l2_lookup.vlanid = vid;
-       l2_lookup.iotag = SJA1105_S_TAG;
         l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-       if (priv->vlan_aware) {
-               l2_lookup.mask_vlanid = VLAN_VID_MASK;
-               l2_lookup.mask_iotag = BIT(0);
-       } else {
-               l2_lookup.mask_vlanid = 0;
-               l2_lookup.mask_iotag = 0;
-       }
+       l2_lookup.mask_vlanid = VLAN_VID_MASK;
         l2_lookup.destports = BIT(port);
  
         rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
@@ -1633,6 +1794,46 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
         return 0;
  }
  
+static void sja1105_fast_age(struct dsa_switch *ds, int port)
+{
+       struct sja1105_private *priv = ds->priv;
+       int i;
+
+       for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) {
+               struct sja1105_l2_lookup_entry l2_lookup = {0};
+               u8 macaddr[ETH_ALEN];
+               int rc;
+
+               rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP,
+                                                i, &l2_lookup);
+               /* No fdb entry at i, not an issue */
+               if (rc == -ENOENT)
+                       continue;
+               if (rc) {
+                       dev_err(ds->dev, "Failed to read FDB: %pe\n",
+                               ERR_PTR(rc));
+                       return;
+               }
+
+               if (!(l2_lookup.destports & BIT(port)))
+                       continue;
+
+               /* Don't delete static FDB entries */
+               if (l2_lookup.lockeds)
+                       continue;
+
+               u64_to_ether_addr(l2_lookup.macaddr, macaddr);
+
+               rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid);
+               if (rc) {
+                       dev_err(ds->dev,
+                               "Failed to delete FDB entry %pM vid %lld: %pe\n",
+                               macaddr, l2_lookup.vlanid, ERR_PTR(rc));
+                       return;
+               }
+       }
+}
+
  static int sja1105_mdb_add(struct dsa_switch *ds, int port,
                            const struct switchdev_obj_port_mdb *mdb)
  {
@@ -1741,6 +1942,7 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port,
  static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
                                          u8 state)
  {
+       struct dsa_port *dp = dsa_to_port(ds, port);
         struct sja1105_private *priv = ds->priv;
         struct sja1105_mac_config_entry *mac;
  
@@ -1766,12 +1968,12 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port,
         case BR_STATE_LEARNING:
                 mac[port].ingress   = true;
                 mac[port].egress    = false;
-               mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+               mac[port].dyn_learn = dp->learning;
                 break;
         case BR_STATE_FORWARDING:
                 mac[port].ingress   = true;
                 mac[port].egress    = true;
-               mac[port].dyn_learn = !!(priv->learn_ena & BIT(port));
+               mac[port].dyn_learn = dp->learning;
                 break;
         default:
                 dev_err(ds->dev, "invalid STP state: %d\n", state);
@@ -2231,8 +2433,8 @@ static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port,
                 return -EBUSY;
         }
  
-       /* Always install bridge VLANs as egress-tagged on the CPU port. */
-       if (dsa_is_cpu_port(ds, port))
+       /* Always install bridge VLANs as egress-tagged on CPU and DSA ports */
+       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
                 flags = 0;
  
         rc = sja1105_vlan_add(priv, port, vlan->vid, flags);
@@ -2401,6 +2603,7 @@ static int sja1105_setup(struct dsa_switch *ds)
         ds->num_tx_queues = SJA1105_NUM_TC;
  
         ds->mtu_enforcement_ingress = true;
+       ds->assisted_learning_on_cpu_port = true;
  
         rc = sja1105_devlink_setup(ds);
         if (rc < 0)
@@ -2585,7 +2788,7 @@ static int sja1105_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
  
         new_mtu += VLAN_ETH_HLEN + ETH_FCS_LEN;
  
-       if (dsa_is_cpu_port(ds, port))
+       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
                 new_mtu += VLAN_HLEN;
  
         policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
@@ -2732,23 +2935,13 @@ static int sja1105_port_set_learning(struct sja1105_private *priv, int port,
                                      bool enabled)
  {
         struct sja1105_mac_config_entry *mac;
-       int rc;
  
         mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;
  
         mac[port].dyn_learn = enabled;
  
-       rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
-                                         &mac[port], true);
-       if (rc)
-               return rc;
-
-       if (enabled)
-               priv->learn_ena |= BIT(port);
-       else
-               priv->learn_ena &= ~BIT(port);
-
-       return 0;
+       return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port,
+                                           &mac[port], true);
  }
  
  static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to,
@@ -2883,6 +3076,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
         .port_fdb_dump          = sja1105_fdb_dump,
         .port_fdb_add           = sja1105_fdb_add,
         .port_fdb_del           = sja1105_fdb_del,
+       .port_fast_age          = sja1105_fast_age,
         .port_bridge_join       = sja1105_bridge_join,
         .port_bridge_leave      = sja1105_bridge_leave,
         .port_pre_bridge_flags  = sja1105_port_pre_bridge_flags,
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c

index 96cc5fc..87c906e 100644 (file)
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -302,7 +302,6 @@ static int el3_isa_match(struct device *pdev, unsigned int ndev)
                 return -ENOMEM;
  
         SET_NETDEV_DEV(dev, pdev);
-       netdev_boot_setup_check(dev);
  
         if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) {
                 free_netdev(dev);
@@ -421,7 +420,6 @@ static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id)
                 return -ENOMEM;
         }
         SET_NETDEV_DEV(dev, &pdev->dev);
-       netdev_boot_setup_check(dev);
  
         el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP);
         pnp_set_drvdata(pdev, dev);
@@ -514,7 +512,9 @@ static int el3_common_init(struct net_device *dev)
  {
         struct el3_private *lp = netdev_priv(dev);
         int err;
-       const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"};
+       static const char * const if_names[] = {
+               "10baseT", "AUI", "undefined", "BNC"
+       };
  
         spin_lock_init(&lp->lock);
  
@@ -588,7 +588,6 @@ static int el3_eisa_probe(struct device *device)
         }
  
         SET_NETDEV_DEV(dev, device);
-       netdev_boot_setup_check(dev);
  
         el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA);
         eisa_set_drvdata (edev, dev);
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c

index 47b4215..8d90fed 100644 (file)
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -407,7 +407,7 @@ MODULE_PARM_DESC(max_interrupt_work, "3c515 maximum events handled per interrupt
  /* we will need locking (and refcounting) if we ever use it for more */
  static LIST_HEAD(root_corkscrew_dev);
  
-int init_module(void)
+static int corkscrew_init_module(void)
  {
         int found = 0;
         if (debug >= 0)
@@ -416,6 +416,7 @@ int init_module(void)
                 found++;
         return found ? 0 : -ENODEV;
  }
+module_init(corkscrew_init_module);
  
  #else
  struct net_device *tc515_probe(int unit)
diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig

index a52a374..706bd59 100644 (file)
--- a/drivers/net/ethernet/3com/Kconfig
+++ b/drivers/net/ethernet/3com/Kconfig
@@ -34,6 +34,7 @@ config EL3
  config 3C515
         tristate "3c515 ISA \"Fast EtherLink\""
         depends on ISA && ISA_DMA_API && !PPC32
+       select NETDEV_LEGACY_INIT
         help
           If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet
           network card, say Y here.
diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig

index 9f4b302..a4130e6 100644 (file)
--- a/drivers/net/ethernet/8390/Kconfig
+++ b/drivers/net/ethernet/8390/Kconfig
@@ -102,6 +102,7 @@ config MCF8390
  config NE2000
         tristate "NE2000/NE1000 support"
         depends on (ISA || (Q40 && m) || MACH_TX49XX || ATARI_ETHERNEC)
+       select NETDEV_LEGACY_INIT if ISA
         select CRC32
         help
           If you have a network (Ethernet) card of this type, say Y here.
@@ -169,6 +170,7 @@ config STNIC
  config ULTRA
         tristate "SMC Ultra support"
         depends on ISA
+       select NETDEV_LEGACY_INIT
         select CRC32
         help
           If you have a network (Ethernet) card of this type, say Y here.
@@ -186,6 +188,7 @@ config ULTRA
  config WD80x3
         tristate "WD80*3 support"
         depends on ISA
+       select NETDEV_LEGACY_INIT
         select CRC32
         help
           If you have a network (Ethernet) card of this type, say Y here.
diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c

index fe6c834..da1ae37 100644 (file)
--- a/drivers/net/ethernet/8390/apne.c
+++ b/drivers/net/ethernet/8390/apne.c
@@ -75,7 +75,6 @@
  #define NESM_STOP_PG   0x80    /* Last page +1 of RX ring */
  
  
-struct net_device * __init apne_probe(int unit);
  static int apne_probe1(struct net_device *dev, int ioaddr);
  
  static void apne_reset_8390(struct net_device *dev);
@@ -120,7 +119,7 @@ static u32 apne_msg_enable;
  module_param_named(msg_enable, apne_msg_enable, uint, 0444);
  MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)");
  
-struct net_device * __init apne_probe(int unit)
+static struct net_device * __init apne_probe(void)
  {
         struct net_device *dev;
         struct ei_device *ei_local;
@@ -150,10 +149,6 @@ struct net_device * __init apne_probe(int unit)
         dev = alloc_ei_netdev();
         if (!dev)
                 return ERR_PTR(-ENOMEM);
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
         ei_local = netdev_priv(dev);
         ei_local->msg_enable = apne_msg_enable;
  
@@ -554,12 +549,11 @@ static irqreturn_t apne_interrupt(int irq, void *dev_id)
      return IRQ_HANDLED;
  }
  
-#ifdef MODULE
  static struct net_device *apne_dev;
  
  static int __init apne_module_init(void)
  {
-       apne_dev = apne_probe(-1);
+       apne_dev = apne_probe();
         return PTR_ERR_OR_ZERO(apne_dev);
  }
  
@@ -579,7 +573,6 @@ static void __exit apne_module_exit(void)
  }
  module_init(apne_module_init);
  module_exit(apne_module_exit);
-#endif
  
  static int init_pcmcia(void)
  {
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c

index 9595dd1..6c6bdd5 100644 (file)
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -101,6 +101,13 @@ static inline struct ax_device *to_ax_dev(struct net_device *dev)
         return (struct ax_device *)(ei_local + 1);
  }
  
+void ax_NS8390_reinit(struct net_device *dev)
+{
+       ax_NS8390_init(dev, 1);
+}
+
+EXPORT_SYMBOL_GPL(ax_NS8390_reinit);
+
  /*
   * ax_initial_check
   *
diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c

index e9756d0..53660bc 100644 (file)
--- a/drivers/net/ethernet/8390/ne.c
+++ b/drivers/net/ethernet/8390/ne.c
@@ -923,7 +923,7 @@ static void __init ne_add_devices(void)
  }
  
  #ifdef MODULE
-int __init init_module(void)
+static int __init ne_init(void)
  {
         int retval;
         ne_add_devices();
@@ -940,6 +940,7 @@ int __init init_module(void)
         ne_loop_rm_unreg(0);
         return retval;
  }
+module_init(ne_init);
  #else /* MODULE */
  static int __init ne_init(void)
  {
@@ -951,6 +952,7 @@ static int __init ne_init(void)
  }
  module_init(ne_init);
  
+#ifdef CONFIG_NETDEV_LEGACY_INIT
  struct net_device * __init ne_probe(int unit)
  {
         int this_dev;
@@ -991,6 +993,7 @@ struct net_device * __init ne_probe(int unit)
  
         return ERR_PTR(-ENODEV);
  }
+#endif
  #endif /* MODULE */
  
  static void __exit ne_exit(void)
diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c

index 1d8ed73..0890fa4 100644 (file)
--- a/drivers/net/ethernet/8390/smc-ultra.c
+++ b/drivers/net/ethernet/8390/smc-ultra.c
@@ -522,7 +522,6 @@ static void ultra_pio_input(struct net_device *dev, int count,
         /* We know skbuffs are padded to at least word alignment. */
         insw(ioaddr + IOPD, buf, (count+1)>>1);
  }
-
  static void ultra_pio_output(struct net_device *dev, int count,
                                                         const unsigned char *buf, const int start_page)
  {
@@ -572,8 +571,7 @@ MODULE_LICENSE("GPL");
  
  /* This is set up so that only a single autoprobe takes place per call.
  ISA device autoprobes on a running machine are not recommended. */
-int __init
-init_module(void)
+static int __init ultra_init_module(void)
  {
         struct net_device *dev;
         int this_dev, found = 0;
@@ -600,6 +598,7 @@ init_module(void)
                 return 0;
         return -ENXIO;
  }
+module_init(ultra_init_module);
  
  static void cleanup_card(struct net_device *dev)
  {
@@ -613,8 +612,7 @@ static void cleanup_card(struct net_device *dev)
         iounmap(ei_status.mem);
  }
  
-void __exit
-cleanup_module(void)
+static void __exit ultra_cleanup_module(void)
  {
         int this_dev;
  
@@ -627,4 +625,5 @@ cleanup_module(void)
                 }
         }
  }
+module_exit(ultra_cleanup_module);
  #endif /* MODULE */
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c

index c834123..263a942 100644 (file)
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -519,7 +519,7 @@ MODULE_LICENSE("GPL");
  /* This is set up so that only a single autoprobe takes place per call.
  ISA device autoprobes on a running machine are not recommended. */
  
-int __init init_module(void)
+static int __init wd_init_module(void)
  {
         struct net_device *dev;
         int this_dev, found = 0;
@@ -548,6 +548,7 @@ int __init init_module(void)
                 return 0;
         return -ENXIO;
  }
+module_init(wd_init_module);
  
  static void cleanup_card(struct net_device *dev)
  {
@@ -556,8 +557,7 @@ static void cleanup_card(struct net_device *dev)
         iounmap(ei_status.mem);
  }
  
-void __exit
-cleanup_module(void)
+static void __exit wd_cleanup_module(void)
  {
         int this_dev;
  
@@ -570,4 +570,5 @@ cleanup_module(void)
                 }
         }
  }
+module_exit(wd_cleanup_module);
  #endif /* MODULE */
diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c

index e2c9638..fe7a747 100644 (file)
--- a/drivers/net/ethernet/8390/xsurf100.c
+++ b/drivers/net/ethernet/8390/xsurf100.c
@@ -22,8 +22,6 @@
  #define XS100_8390_DATA_WRITE32_BASE 0x0C80
  #define XS100_8390_DATA_AREA_SIZE 0x80
  
-#define __NS8390_init ax_NS8390_init
-
  /* force unsigned long back to 'void __iomem *' */
  #define ax_convert_addr(_a) ((void __force __iomem *)(_a))
  
@@ -42,10 +40,7 @@
  /* Ensure we have our RCR base value */
  #define AX88796_PLATFORM
  
-static unsigned char version[] =
-               "ax88796.c: Copyright 2005,2007 Simtec Electronics\n";
-
-#include "lib8390.c"
+#include "8390.h"
  
  /* from ne.c */
  #define NE_CMD         EI_SHIFT(0x00)
@@ -232,7 +227,7 @@ static void xs100_block_output(struct net_device *dev, int count,
                 if (jiffies - dma_start > 2 * HZ / 100) {       /* 20ms */
                         netdev_warn(dev, "timeout waiting for Tx RDC.\n");
                         ei_local->reset_8390(dev);
-                       ax_NS8390_init(dev, 1);
+                       ax_NS8390_reinit(dev);
                         break;
                 }
         }
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig

index d0b0609..c6a3abe 100644 (file)
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -46,6 +46,7 @@ config AMD8111_ETH
  config LANCE
         tristate "AMD LANCE and PCnet (AT1500 and NE2100) support"
         depends on ISA && ISA_DMA_API && !ARM && !PPC32
+       select NETDEV_LEGACY_INIT
         help
           If you have a network (Ethernet) card of this type, say Y here.
           Some LinkSys cards are of this type.
@@ -132,6 +133,7 @@ config PCMCIA_NMCLAN
  config NI65
         tristate "NI6510 support"
         depends on ISA && ISA_DMA_API && !ARM && !PPC32
+       select NETDEV_LEGACY_INIT
         help
           If you have a network (Ethernet) card of this type, say Y here.
  
diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c

index 36f54d1..9d2f49f 100644 (file)
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -367,7 +367,7 @@ static void *slow_memcpy( void *dst, const void *src, size_t len )
  }
  
  
-struct net_device * __init atarilance_probe(int unit)
+struct net_device * __init atarilance_probe(void)
  {
         int i;
         static int found;
@@ -382,10 +382,6 @@ struct net_device * __init atarilance_probe(int unit)
         dev = alloc_etherdev(sizeof(struct lance_private));
         if (!dev)
                 return ERR_PTR(-ENOMEM);
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
  
         for( i = 0; i < N_LANCE_ADDR; ++i ) {
                 if (lance_probe1( dev, &lance_addr_list[i] )) {
@@ -1137,13 +1133,11 @@ static int lance_set_mac_address( struct net_device *dev, void *addr )
         return 0;
  }
  
-
-#ifdef MODULE
  static struct net_device *atarilance_dev;
  
  static int __init atarilance_module_init(void)
  {
-       atarilance_dev = atarilance_probe(-1);
+       atarilance_dev = atarilance_probe();
         return PTR_ERR_OR_ZERO(atarilance_dev);
  }
  
@@ -1155,4 +1149,3 @@ static void __exit atarilance_module_exit(void)
  }
  module_init(atarilance_module_init);
  module_exit(atarilance_module_exit);
-#endif /* MODULE */
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c

index 2178e6b..945bf1d 100644 (file)
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -327,7 +327,7 @@ MODULE_PARM_DESC(dma, "LANCE/PCnet ISA DMA channel (ignored for some devices)");
  MODULE_PARM_DESC(irq, "LANCE/PCnet IRQ number (ignored for some devices)");
  MODULE_PARM_DESC(lance_debug, "LANCE/PCnet debug level (0-7)");
  
-int __init init_module(void)
+static int __init lance_init_module(void)
  {
         struct net_device *dev;
         int this_dev, found = 0;
@@ -356,6 +356,7 @@ int __init init_module(void)
                 return 0;
         return -ENXIO;
  }
+module_init(lance_init_module);
  
  static void cleanup_card(struct net_device *dev)
  {
@@ -368,7 +369,7 @@ static void cleanup_card(struct net_device *dev)
         kfree(lp);
  }
  
-void __exit cleanup_module(void)
+static void __exit lance_cleanup_module(void)
  {
         int this_dev;
  
@@ -381,6 +382,7 @@ void __exit cleanup_module(void)
                 }
         }
  }
+module_exit(lance_cleanup_module);
  #endif /* MODULE */
  MODULE_LICENSE("GPL");
  
diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c

index 3f2e4cd..da97fcc 100644 (file)
--- a/drivers/net/ethernet/amd/mvme147.c
+++ b/drivers/net/ethernet/amd/mvme147.c
@@ -68,7 +68,7 @@ static const struct net_device_ops lance_netdev_ops = {
  };
  
  /* Initialise the one and only on-board 7990 */
-struct net_device * __init mvme147lance_probe(int unit)
+static struct net_device * __init mvme147lance_probe(void)
  {
         struct net_device *dev;
         static int called;
@@ -86,9 +86,6 @@ struct net_device * __init mvme147lance_probe(int unit)
         if (!dev)
                 return ERR_PTR(-ENOMEM);
  
-       if (unit >= 0)
-               sprintf(dev->name, "eth%d", unit);
-
         /* Fill the dev fields */
         dev->base_addr = (unsigned long)MVME147_LANCE_BASE;
         dev->netdev_ops = &lance_netdev_ops;
@@ -179,22 +176,21 @@ static int m147lance_close(struct net_device *dev)
         return 0;
  }
  
-#ifdef MODULE
  MODULE_LICENSE("GPL");
  
  static struct net_device *dev_mvme147_lance;
-int __init init_module(void)
+static int __init m147lance_init(void)
  {
-       dev_mvme147_lance = mvme147lance_probe(-1);
+       dev_mvme147_lance = mvme147lance_probe();
         return PTR_ERR_OR_ZERO(dev_mvme147_lance);
  }
+module_init(m147lance_init);
  
-void __exit cleanup_module(void)
+static void __exit m147lance_exit(void)
  {
         struct m147lance_private *lp = netdev_priv(dev_mvme147_lance);
         unregister_netdev(dev_mvme147_lance);
         free_pages(lp->ram, 3);
         free_netdev(dev_mvme147_lance);
  }
-
-#endif /* MODULE */
+module_exit(m147lance_exit);
diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c

index 5c1cfb0..b5df7ad 100644 (file)
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c
@@ -1230,18 +1230,20 @@ MODULE_PARM_DESC(irq, "ni6510 IRQ number (ignored for some cards)");
  MODULE_PARM_DESC(io, "ni6510 I/O base address");
  MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)");
  
-int __init init_module(void)
+static int __init ni65_init_module(void)
  {
         dev_ni65 = ni65_probe(-1);
         return PTR_ERR_OR_ZERO(dev_ni65);
  }
+module_init(ni65_init_module);
  
-void __exit cleanup_module(void)
+static void __exit ni65_cleanup_module(void)
  {
         unregister_netdev(dev_ni65);
         cleanup_card(dev_ni65);
         free_netdev(dev_ni65);
  }
+module_exit(ni65_cleanup_module);
  #endif /* MODULE */
  
  MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c

index f8d7a93..4a845bc 100644 (file)
--- a/drivers/net/ethernet/amd/sun3lance.c
+++ b/drivers/net/ethernet/amd/sun3lance.c
@@ -245,7 +245,7 @@ static void set_multicast_list( struct net_device *dev );
  
  /************************* End of Prototypes **************************/
  
-struct net_device * __init sun3lance_probe(int unit)
+static struct net_device * __init sun3lance_probe(void)
  {
         struct net_device *dev;
         static int found;
@@ -272,10 +272,6 @@ struct net_device * __init sun3lance_probe(int unit)
         dev = alloc_etherdev(sizeof(struct lance_private));
         if (!dev)
                 return ERR_PTR(-ENOMEM);
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
  
         if (!lance_probe(dev))
                 goto out;
@@ -924,17 +920,16 @@ static void set_multicast_list( struct net_device *dev )
  }
  
  
-#ifdef MODULE
-
  static struct net_device *sun3lance_dev;
  
-int __init init_module(void)
+static int __init sun3lance_init(void)
  {
-       sun3lance_dev = sun3lance_probe(-1);
+       sun3lance_dev = sun3lance_probe();
         return PTR_ERR_OR_ZERO(sun3lance_dev);
  }
+module_init(sun3lance_init);
  
-void __exit cleanup_module(void)
+static void __exit sun3lance_cleanup(void)
  {
         unregister_netdev(sun3lance_dev);
  #ifdef CONFIG_SUN3
@@ -942,6 +937,4 @@ void __exit cleanup_module(void)
  #endif
         free_netdev(sun3lance_dev);
  }
-
-#endif /* MODULE */
-
+module_exit(sun3lance_cleanup);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

index 1a6ec1a..b5d954c 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2669,7 +2669,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
         }
  
         /* Allocated memory for FW statistics  */
-       if (bnx2x_alloc_fw_stats_mem(bp))
+       rc = bnx2x_alloc_fw_stats_mem(bp);
+       if (rc)
                 LOAD_ERROR_EXIT(bp, load_error0);
  
         /* request pf to initialize status blocks */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

index 4a92ea7..865fcb8 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3163,6 +3163,58 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
         return 0;
  }
  
+static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr)
+{
+       kfree(cpr->cp_desc_ring);
+       cpr->cp_desc_ring = NULL;
+       kfree(cpr->cp_desc_mapping);
+       cpr->cp_desc_mapping = NULL;
+}
+
+static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n)
+{
+       cpr->cp_desc_ring = kcalloc(n, sizeof(*cpr->cp_desc_ring), GFP_KERNEL);
+       if (!cpr->cp_desc_ring)
+               return -ENOMEM;
+       cpr->cp_desc_mapping = kcalloc(n, sizeof(*cpr->cp_desc_mapping),
+                                      GFP_KERNEL);
+       if (!cpr->cp_desc_mapping)
+               return -ENOMEM;
+       return 0;
+}
+
+static void bnxt_free_all_cp_arrays(struct bnxt *bp)
+{
+       int i;
+
+       if (!bp->bnapi)
+               return;
+       for (i = 0; i < bp->cp_nr_rings; i++) {
+               struct bnxt_napi *bnapi = bp->bnapi[i];
+
+               if (!bnapi)
+                       continue;
+               bnxt_free_cp_arrays(&bnapi->cp_ring);
+       }
+}
+
+static int bnxt_alloc_all_cp_arrays(struct bnxt *bp)
+{
+       int i, n = bp->cp_nr_pages;
+
+       for (i = 0; i < bp->cp_nr_rings; i++) {
+               struct bnxt_napi *bnapi = bp->bnapi[i];
+               int rc;
+
+               if (!bnapi)
+                       continue;
+               rc = bnxt_alloc_cp_arrays(&bnapi->cp_ring, n);
+               if (rc)
+                       return rc;
+       }
+       return 0;
+}
+
  static void bnxt_free_cp_rings(struct bnxt *bp)
  {
         int i;
@@ -3190,6 +3242,7 @@ static void bnxt_free_cp_rings(struct bnxt *bp)
                         if (cpr2) {
                                 ring = &cpr2->cp_ring_struct;
                                 bnxt_free_ring(bp, &ring->ring_mem);
+                               bnxt_free_cp_arrays(cpr2);
                                 kfree(cpr2);
                                 cpr->cp_ring_arr[j] = NULL;
                         }
@@ -3208,6 +3261,12 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
         if (!cpr)
                 return NULL;
  
+       rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages);
+       if (rc) {
+               bnxt_free_cp_arrays(cpr);
+               kfree(cpr);
+               return NULL;
+       }
         ring = &cpr->cp_ring_struct;
         rmem = &ring->ring_mem;
         rmem->nr_pages = bp->cp_nr_pages;
@@ -3218,6 +3277,7 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp)
         rc = bnxt_alloc_ring(bp, rmem);
         if (rc) {
                 bnxt_free_ring(bp, rmem);
+               bnxt_free_cp_arrays(cpr);
                 kfree(cpr);
                 cpr = NULL;
         }
@@ -3650,9 +3710,15 @@ void bnxt_set_ring_params(struct bnxt *bp)
                 if (jumbo_factor > agg_factor)
                         agg_factor = jumbo_factor;
         }
-       agg_ring_size = ring_size * agg_factor;
+       if (agg_factor) {
+               if (ring_size > BNXT_MAX_RX_DESC_CNT_JUM_ENA) {
+                       ring_size = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+                       netdev_warn(bp->dev, "RX ring size reduced from %d to %d because the jumbo ring is now enabled\n",
+                                   bp->rx_ring_size, ring_size);
+                       bp->rx_ring_size = ring_size;
+               }
+               agg_ring_size = ring_size * agg_factor;
  
-       if (agg_ring_size) {
                 bp->rx_agg_nr_pages = bnxt_calc_nr_ring_pages(agg_ring_size,
                                                         RX_DESC_CNT);
                 if (bp->rx_agg_nr_pages > MAX_RX_AGG_PAGES) {
@@ -4253,6 +4319,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
         bnxt_free_tx_rings(bp);
         bnxt_free_rx_rings(bp);
         bnxt_free_cp_rings(bp);
+       bnxt_free_all_cp_arrays(bp);
         bnxt_free_ntp_fltrs(bp, irq_re_init);
         if (irq_re_init) {
                 bnxt_free_ring_stats(bp);
@@ -4373,6 +4440,10 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init)
                         goto alloc_mem_err;
         }
  
+       rc = bnxt_alloc_all_cp_arrays(bp);
+       if (rc)
+               goto alloc_mem_err;
+
         bnxt_init_ring_struct(bp);
  
         rc = bnxt_alloc_rx_rings(bp);
@@ -12168,9 +12239,8 @@ static void bnxt_fw_reset_task(struct work_struct *work)
                 /* Make sure fw_reset_state is 0 before clearing the flag */
                 smp_mb__before_atomic();
                 clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
-               bnxt_ulp_start(bp, rc);
-               if (!rc)
-                       bnxt_reenable_sriov(bp);
+               bnxt_ulp_start(bp, 0);
+               bnxt_reenable_sriov(bp);
                 bnxt_vf_reps_alloc(bp);
                 bnxt_vf_reps_open(bp);
                 bnxt_ptp_reapply_pps(bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h

index e379c48..9c3324e 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -596,15 +596,17 @@ struct nqe_cn {
  #define MAX_TPA_SEGS_P5        0x3f
  
  #if (BNXT_PAGE_SHIFT == 16)
-#define MAX_RX_PAGES   1
+#define MAX_RX_PAGES_AGG_ENA   1
+#define MAX_RX_PAGES   4
  #define MAX_RX_AGG_PAGES       4
  #define MAX_TX_PAGES   1
-#define MAX_CP_PAGES   8
+#define MAX_CP_PAGES   16
  #else
-#define MAX_RX_PAGES   8
+#define MAX_RX_PAGES_AGG_ENA   8
+#define MAX_RX_PAGES   32
  #define MAX_RX_AGG_PAGES       32
  #define MAX_TX_PAGES   8
-#define MAX_CP_PAGES   64
+#define MAX_CP_PAGES   128
  #endif
  
  #define RX_DESC_CNT (BNXT_PAGE_SIZE / sizeof(struct rx_bd))
@@ -622,6 +624,7 @@ struct nqe_cn {
  #define HW_CMPD_RING_SIZE (sizeof(struct tx_cmp) * CP_DESC_CNT)
  
  #define BNXT_MAX_RX_DESC_CNT           (RX_DESC_CNT * MAX_RX_PAGES - 1)
+#define BNXT_MAX_RX_DESC_CNT_JUM_ENA   (RX_DESC_CNT * MAX_RX_PAGES_AGG_ENA - 1)
  #define BNXT_MAX_RX_JUM_DESC_CNT       (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1)
  #define BNXT_MAX_TX_DESC_CNT           (TX_DESC_CNT * MAX_TX_PAGES - 1)
  
@@ -972,11 +975,11 @@ struct bnxt_cp_ring_info {
         struct dim              dim;
  
         union {
-               struct tx_cmp   *cp_desc_ring[MAX_CP_PAGES];
-               struct nqe_cn   *nq_desc_ring[MAX_CP_PAGES];
+               struct tx_cmp   **cp_desc_ring;
+               struct nqe_cn   **nq_desc_ring;
         };
  
-       dma_addr_t              cp_desc_mapping[MAX_CP_PAGES];
+       dma_addr_t              *cp_desc_mapping;
  
         struct bnxt_stats_mem   stats;
         u32                     hw_stats_ctx_id;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c

index 64381be..2cd8bb3 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -743,14 +743,17 @@ static void bnxt_dl_params_unregister(struct bnxt *bp)
  
  int bnxt_dl_register(struct bnxt *bp)
  {
+       const struct devlink_ops *devlink_ops;
         struct devlink_port_attrs attrs = {};
         struct devlink *dl;
         int rc;
  
         if (BNXT_PF(bp))
-               dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+               devlink_ops = &bnxt_dl_ops;
         else
-               dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
+               devlink_ops = &bnxt_vf_dl_ops;
+
+       dl = devlink_alloc(devlink_ops, sizeof(struct bnxt_dl), &bp->pdev->dev);
         if (!dl) {
                 netdev_warn(bp->dev, "devlink_alloc failed\n");
                 return -ENOMEM;
@@ -763,7 +766,7 @@ int bnxt_dl_register(struct bnxt *bp)
             bp->hwrm_spec_code > 0x10803)
                 bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
  
-       rc = devlink_register(dl, &bp->pdev->dev);
+       rc = devlink_register(dl);
         if (rc) {
                 netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc);
                 goto err_dl_free;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c

index 786ca51..485252d 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -768,8 +768,13 @@ static void bnxt_get_ringparam(struct net_device *dev,
  {
         struct bnxt *bp = netdev_priv(dev);
  
-       ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
-       ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+       if (bp->flags & BNXT_FLAG_AGG_RINGS) {
+               ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA;
+               ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT;
+       } else {
+               ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT;
+               ering->rx_jumbo_max_pending = 0;
+       }
         ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT;
  
         ering->rx_pending = bp->rx_ring_size;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c

index e33e311..7f55ebb 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -560,6 +560,12 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info)
  
         bnxt_ptp_get_current_time(bp);
         ptp->next_period = now + HZ;
+       if (time_after_eq(now, ptp->next_overflow_check)) {
+               spin_lock_bh(&ptp->ptp_lock);
+               timecounter_read(&ptp->tc);
+               spin_unlock_bh(&ptp->ptp_lock);
+               ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD;
+       }
         return HZ;
  }
  
@@ -713,6 +719,7 @@ int bnxt_ptp_init(struct bnxt *bp)
         ptp->cc.shift = 0;
         ptp->cc.mult = 1;
  
+       ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD;
         timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real()));
  
         ptp->ptp_info = bnxt_ptp_caps;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h

index 8892334..cc3cdba 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h
@@ -83,6 +83,10 @@ struct bnxt_ptp_cfg {
         u64                     current_time;
         u64                     old_time;
         unsigned long           next_period;
+       unsigned long           next_overflow_check;
+       /* 48-bit PHC overflows in 78 hours.  Check overflow every 19 hours. */
+       #define BNXT_PHC_OVERFLOW_PERIOD        (19 * 3600 * HZ)
+
         u16                     tx_seqid;
         struct bnxt             *bp;
         atomic_t                tx_avail;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c

index 63e2237..8507198 100644 (file)
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3972,8 +3972,6 @@ static int bcmgenet_probe(struct platform_device *pdev)
          */
         dev->needed_headroom += 64;
  
-       netdev_boot_setup_check(dev);
-
         priv->dev = dev;
         priv->pdev = pdev;
  
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c

index a4a5209..2907e13 100644 (file)
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1457,7 +1457,7 @@ static void free_netsgbuf(void *buf)
         while (frags--) {
                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
  
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                                g->sg[(i >> 2)].ptr[(i & 3)],
                                skb_frag_size(frag), DMA_TO_DEVICE);
                 i++;
@@ -1500,7 +1500,7 @@ static void free_netsgbuf_with_resp(void *buf)
         while (frags--) {
                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
  
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                                g->sg[(i >> 2)].ptr[(i & 3)],
                                skb_frag_size(frag), DMA_TO_DEVICE);
                 i++;
@@ -3750,7 +3750,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
         }
  
         devlink = devlink_alloc(&liquidio_devlink_ops,
-                               sizeof(struct lio_devlink_priv));
+                               sizeof(struct lio_devlink_priv),
+                               &octeon_dev->pci_dev->dev);
         if (!devlink) {
                 dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n");
                 goto setup_nic_dev_free;
@@ -3759,7 +3760,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
         lio_devlink = devlink_priv(devlink);
         lio_devlink->oct = octeon_dev;
  
-       if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) {
+       if (devlink_register(devlink)) {
                 devlink_free(devlink);
                 dev_err(&octeon_dev->pci_dev->dev,
                         "devlink registration failed\n");
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c

index 3085dd4..c6fe0f2 100644 (file)
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -843,7 +843,7 @@ static void free_netsgbuf(void *buf)
         while (frags--) {
                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
  
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                                g->sg[(i >> 2)].ptr[(i & 3)],
                                skb_frag_size(frag), DMA_TO_DEVICE);
                 i++;
@@ -887,7 +887,7 @@ static void free_netsgbuf_with_resp(void *buf)
         while (frags--) {
                 skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
  
-               pci_unmap_page((lio->oct_dev)->pci_dev,
+               dma_unmap_page(&lio->oct_dev->pci_dev->dev,
                                g->sg[(i >> 2)].ptr[(i & 3)],
                                skb_frag_size(frag), DMA_TO_DEVICE);
                 i++;
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c

index 9361f96..691e147 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -1322,18 +1322,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 goto err_disable_device;
         }
  
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
         if (err) {
                 dev_err(dev, "Unable to get usable DMA configuration\n");
                 goto err_release_regions;
         }
  
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
-       if (err) {
-               dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n");
-               goto err_release_regions;
-       }
-
         /* MAP PF's configuration registers */
         nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
         if (!nic->reg_base) {
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c

index efaaa57..d1667b7 100644 (file)
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -2130,18 +2130,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 goto err_disable_device;
         }
  
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48));
+       err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
         if (err) {
                 dev_err(dev, "Unable to get usable DMA configuration\n");
                 goto err_release_regions;
         }
  
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48));
-       if (err) {
-               dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n");
-               goto err_release_regions;
-       }
-
         qcount = netif_get_num_default_rss_queues();
  
         /* Restrict multiqset support only for host bound VFs */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c

index 6260b3b..786ceae 100644 (file)
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1441,7 +1441,7 @@ static int cxgb4_set_hash_filter(struct net_device *dev,
         } else if (iconf & USE_ENC_IDX_F) {
                 if (f->fs.val.encap_vld) {
                         struct port_info *pi = netdev_priv(f->dev);
-                       u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+                       static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
  
                         /* allocate MPS TCAM entry */
                         ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
@@ -1688,7 +1688,7 @@ int __cxgb4_set_filter(struct net_device *dev, int ftid,
         } else if (iconf & USE_ENC_IDX_F) {
                 if (f->fs.val.encap_vld) {
                         struct port_info *pi = netdev_priv(f->dev);
-                       u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
+                       static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
  
                         /* allocate MPS TCAM entry */
                         ret = t4_alloc_encap_mac_filt(adapter, pi->viid,
diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig

index d8af9e6..dac1764 100644 (file)
--- a/drivers/net/ethernet/cirrus/Kconfig
+++ b/drivers/net/ethernet/cirrus/Kconfig
@@ -6,7 +6,7 @@
  config NET_VENDOR_CIRRUS
         bool "Cirrus devices"
         default y
-       depends on ISA || EISA || ARM || MAC
+       depends on ISA || EISA || ARM || MAC || COMPILE_TEST
         help
           If you have a network (Ethernet) card belonging to this class, say Y.
  
@@ -18,9 +18,16 @@ config NET_VENDOR_CIRRUS
  if NET_VENDOR_CIRRUS
  
  config CS89x0
-       tristate "CS89x0 support"
-       depends on ISA || EISA || ARM
+       tristate
+
+config CS89x0_ISA
+       tristate "CS89x0 ISA driver support"
+       depends on HAS_IOPORT_MAP
+       depends on ISA
         depends on !PPC32
+       depends on CS89x0_PLATFORM=n
+       select NETDEV_LEGACY_INIT
+       select CS89x0
         help
           Support for CS89x0 chipset based Ethernet cards. If you have a
           network (Ethernet) card of this type, say Y and read the file
@@ -30,15 +37,15 @@ config CS89x0
           will be called cs89x0.
  
  config CS89x0_PLATFORM
-       bool "CS89x0 platform driver support" if HAS_IOPORT_MAP
-       default !HAS_IOPORT_MAP
-       depends on CS89x0
+       tristate "CS89x0 platform driver support"
+       depends on ARM || COMPILE_TEST
+       select CS89x0
         help
-         Say Y to compile the cs89x0 driver as a platform driver. This
-         makes this driver suitable for use on certain evaluation boards
-         such as the iMX21ADS.
+         Say Y to compile the cs89x0 platform driver. This makes this driver
+         suitable for use on certain evaluation boards such as the iMX21ADS.
  
-         If you are unsure, say N.
+         To compile this driver as a module, choose M here. The module
+         will be called cs89x0.
  
  config EP93XX_ETH
         tristate "EP93xx Ethernet support"
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c

index 33ace33..d0c4c8b 100644 (file)
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -104,7 +104,7 @@ static char version[] __initdata =
   * them to system IRQ numbers. This mapping is card specific and is set to
   * the configuration of the Cirrus Eval board for this chip.
   */
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
  static unsigned int netcard_portlist[] __used __initdata = {
         0x300, 0x320, 0x340, 0x360, 0x200, 0x220, 0x240,
         0x260, 0x280, 0x2a0, 0x2c0, 0x2e0, 0
@@ -292,7 +292,7 @@ write_irq(struct net_device *dev, int chip_type, int irq)
         int i;
  
         if (chip_type == CS8900) {
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
                 /* Search the mapping table for the corresponding IRQ pin. */
                 for (i = 0; i != ARRAY_SIZE(cs8900_irq_map); i++)
                         if (cs8900_irq_map[i] == irq)
@@ -859,7 +859,7 @@ net_open(struct net_device *dev)
                         goto bad_out;
                 }
         } else {
-#if !defined(CONFIG_CS89x0_PLATFORM)
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
                 if (((1 << dev->irq) & lp->irq_map) == 0) {
                         pr_err("%s: IRQ %d is not in our map of allowable IRQs, which is %x\n",
                                dev->name, dev->irq, lp->irq_map);
@@ -1523,7 +1523,7 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular)
                         dev->irq = i;
         } else {
                 i = lp->isa_config & INT_NO_MASK;
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
                 if (lp->chip_type == CS8900) {
                         /* Translate the IRQ using the IRQ mapping table. */
                         if (i >= ARRAY_SIZE(cs8900_irq_map))
@@ -1576,7 +1576,7 @@ out1:
         return retval;
  }
  
-#ifndef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_ISA)
  /*
   * This function converts the I/O port address used by the cs89x0_probe() and
   * init_module() functions to the I/O memory address used by the
@@ -1682,11 +1682,7 @@ out:
         pr_warn("no cs8900 or cs8920 detected.  Be sure to disable PnP with SETUP\n");
         return ERR_PTR(err);
  }
-#endif
-#endif
-
-#if defined(MODULE) && !defined(CONFIG_CS89x0_PLATFORM)
-
+#else
  static struct net_device *dev_cs89x0;
  
  /* Support the 'debug' module parm even if we're compiled for non-debug to
@@ -1757,9 +1753,9 @@ MODULE_LICENSE("GPL");
   * (hw or software util)
   */
  
-int __init init_module(void)
+static int __init cs89x0_isa_init_module(void)
  {
-       struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
+       struct net_device *dev;
         struct net_local *lp;
         int ret = 0;
  
@@ -1768,6 +1764,7 @@ int __init init_module(void)
  #else
         debug = 0;
  #endif
+       dev = alloc_etherdev(sizeof(struct net_local));
         if (!dev)
                 return -ENOMEM;
  
@@ -1826,9 +1823,9 @@ out:
         free_netdev(dev);
         return ret;
  }
+module_init(cs89x0_isa_init_module);
  
-void __exit
-cleanup_module(void)
+static void __exit cs89x0_isa_cleanup_module(void)
  {
         struct net_local *lp = netdev_priv(dev_cs89x0);
  
@@ -1838,9 +1835,11 @@ cleanup_module(void)
         release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT);
         free_netdev(dev_cs89x0);
  }
-#endif /* MODULE && !CONFIG_CS89x0_PLATFORM */
+module_exit(cs89x0_isa_cleanup_module);
+#endif /* MODULE */
+#endif /* CONFIG_CS89x0_ISA */
  
-#ifdef CONFIG_CS89x0_PLATFORM
+#if IS_ENABLED(CONFIG_CS89x0_PLATFORM)
  static int __init cs89x0_platform_probe(struct platform_device *pdev)
  {
         struct net_device *dev = alloc_etherdev(sizeof(struct net_local));
diff --git a/drivers/net/ethernet/dec/tulip/media.c b/drivers/net/ethernet/dec/tulip/media.c

index 0116047..55d6fc9 100644 (file)
--- a/drivers/net/ethernet/dec/tulip/media.c
+++ b/drivers/net/ethernet/dec/tulip/media.c
@@ -362,7 +362,7 @@ void tulip_select_media(struct net_device *dev, int startup)
                         iowrite32(0x33, ioaddr + CSR12);
                         new_csr6 = 0x01860000;
                         /* Trigger autonegotiation. */
-                       iowrite32(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8);
+                       iowrite32(0x0001F868, ioaddr + 0xB8);
                 } else {
                         iowrite32(0x32, ioaddr + CSR12);
                         new_csr6 = 0x00420000;
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c

index 07a48f6..85b9909 100644 (file)
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -357,7 +357,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
         int i, option = find_cnt < MAX_UNITS ? options[find_cnt] : 0;
         void __iomem *ioaddr;
  
-       i = pci_enable_device(pdev);
+       i = pcim_enable_device(pdev);
         if (i) return i;
  
         pci_set_master(pdev);
@@ -379,7 +379,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
  
         ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size);
         if (!ioaddr)
-               goto err_out_free_res;
+               goto err_out_netdev;
  
         for (i = 0; i < 3; i++)
                 ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(eeprom_read(ioaddr, i));
@@ -458,8 +458,6 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
  
  err_out_cleardev:
         pci_iounmap(pdev, ioaddr);
-err_out_free_res:
-       pci_release_regions(pdev);
  err_out_netdev:
         free_netdev (dev);
         return -ENODEV;
@@ -1526,7 +1524,6 @@ static void w840_remove1(struct pci_dev *pdev)
         if (dev) {
                 struct netdev_private *np = netdev_priv(dev);
                 unregister_netdev(dev);
-               pci_release_regions(pdev);
                 pci_iounmap(pdev, np->base_addr);
                 free_netdev(dev);
         }
diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile

index c2ef740..3d9842a 100644 (file)
--- a/drivers/net/ethernet/freescale/dpaa2/Makefile
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile
@@ -11,7 +11,7 @@ fsl-dpaa2-eth-objs    := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpa
  fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
  fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
  fsl-dpaa2-ptp-objs     := dpaa2-ptp.o dprtc.o
-fsl-dpaa2-switch-objs  := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o
+fsl-dpaa2-switch-objs  := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o dpaa2-mac.o dpmac.o
  
  # Needed by the tracing framework
  CFLAGS_dpaa2-eth.o := -I$(src)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c

index 8336962..605a39f 100644 (file)
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
@@ -68,7 +68,7 @@ dpaa2_eth_dl_trap_item_lookup(struct dpaa2_eth_priv *priv, u16 trap_id)
  struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv,
                                                   struct dpaa2_fapr *fapr)
  {
-       struct dpaa2_faf_error_bit {
+       static const struct dpaa2_faf_error_bit {
                 int position;
                 enum devlink_trap_generic_id trap_id;
         } faf_bits[] = {
@@ -196,7 +196,8 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv)
         struct dpaa2_eth_devlink_priv *dl_priv;
         int err;
  
-       priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv));
+       priv->devlink =
+               devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv), dev);
         if (!priv->devlink) {
                 dev_err(dev, "devlink_alloc failed\n");
                 return -ENOMEM;
@@ -204,7 +205,7 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv)
         dl_priv = devlink_priv(priv->devlink);
         dl_priv->dpaa2_priv = priv;
  
-       err = devlink_register(priv->devlink, dev);
+       err = devlink_register(priv->devlink);
         if (err) {
                 dev_err(dev, "devlink_register() = %d\n", err);
                 goto devlink_free;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c

index f664021..7065c71 100644 (file)
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -4138,7 +4138,7 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv)
         int err;
  
         dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent);
-       dpmac_dev = fsl_mc_get_endpoint(dpni_dev);
+       dpmac_dev = fsl_mc_get_endpoint(dpni_dev, 0);
  
         if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
                 return PTR_ERR(dpmac_dev);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c

index 70e0432..720c923 100644 (file)
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c
@@ -15,18 +15,18 @@ static struct {
         enum dpsw_counter id;
         char name[ETH_GSTRING_LEN];
  } dpaa2_switch_ethtool_counters[] =  {
-       {DPSW_CNT_ING_FRAME,            "rx frames"},
-       {DPSW_CNT_ING_BYTE,             "rx bytes"},
-       {DPSW_CNT_ING_FLTR_FRAME,       "rx filtered frames"},
-       {DPSW_CNT_ING_FRAME_DISCARD,    "rx discarded frames"},
-       {DPSW_CNT_ING_BCAST_FRAME,      "rx b-cast frames"},
-       {DPSW_CNT_ING_BCAST_BYTES,      "rx b-cast bytes"},
-       {DPSW_CNT_ING_MCAST_FRAME,      "rx m-cast frames"},
-       {DPSW_CNT_ING_MCAST_BYTE,       "rx m-cast bytes"},
-       {DPSW_CNT_EGR_FRAME,            "tx frames"},
-       {DPSW_CNT_EGR_BYTE,             "tx bytes"},
-       {DPSW_CNT_EGR_FRAME_DISCARD,    "tx discarded frames"},
-       {DPSW_CNT_ING_NO_BUFF_DISCARD,  "rx discarded no buffer frames"},
+       {DPSW_CNT_ING_FRAME,            "[hw] rx frames"},
+       {DPSW_CNT_ING_BYTE,             "[hw] rx bytes"},
+       {DPSW_CNT_ING_FLTR_FRAME,       "[hw] rx filtered frames"},
+       {DPSW_CNT_ING_FRAME_DISCARD,    "[hw] rx discarded frames"},
+       {DPSW_CNT_ING_BCAST_FRAME,      "[hw] rx bcast frames"},
+       {DPSW_CNT_ING_BCAST_BYTES,      "[hw] rx bcast bytes"},
+       {DPSW_CNT_ING_MCAST_FRAME,      "[hw] rx mcast frames"},
+       {DPSW_CNT_ING_MCAST_BYTE,       "[hw] rx mcast bytes"},
+       {DPSW_CNT_EGR_FRAME,            "[hw] tx frames"},
+       {DPSW_CNT_EGR_BYTE,             "[hw] tx bytes"},
+       {DPSW_CNT_EGR_FRAME_DISCARD,    "[hw] tx discarded frames"},
+       {DPSW_CNT_ING_NO_BUFF_DISCARD,  "[hw] rx nobuffer discards"},
  };
  
  #define DPAA2_SWITCH_NUM_COUNTERS      ARRAY_SIZE(dpaa2_switch_ethtool_counters)
@@ -62,6 +62,10 @@ dpaa2_switch_get_link_ksettings(struct net_device *netdev,
         struct dpsw_link_state state = {0};
         int err = 0;
  
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               return phylink_ethtool_ksettings_get(port_priv->mac->phylink,
+                                                    link_ksettings);
+
         err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0,
                                      port_priv->ethsw_data->dpsw_handle,
                                      port_priv->idx,
@@ -95,6 +99,10 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev,
         bool if_running;
         int err = 0, ret;
  
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               return phylink_ethtool_ksettings_set(port_priv->mac->phylink,
+                                                    link_ksettings);
+
         /* Interface needs to be down to change link settings */
         if_running = netif_running(netdev);
         if (if_running) {
@@ -134,11 +142,17 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev,
         return err;
  }
  
-static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
+static int
+dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset)
  {
+       struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+       int num_ss_stats = DPAA2_SWITCH_NUM_COUNTERS;
+
         switch (sset) {
         case ETH_SS_STATS:
-               return DPAA2_SWITCH_NUM_COUNTERS;
+               if (port_priv->mac)
+                       num_ss_stats += dpaa2_mac_get_sset_count();
+               return num_ss_stats;
         default:
                 return -EOPNOTSUPP;
         }
@@ -147,14 +161,19 @@ static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset)
  static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev,
                                              u32 stringset, u8 *data)
  {
+       struct ethsw_port_priv *port_priv = netdev_priv(netdev);
+       u8 *p = data;
         int i;
  
         switch (stringset) {
         case ETH_SS_STATS:
-               for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++)
-                       memcpy(data + i * ETH_GSTRING_LEN,
-                              dpaa2_switch_ethtool_counters[i].name,
+               for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) {
+                       memcpy(p, dpaa2_switch_ethtool_counters[i].name,
                                ETH_GSTRING_LEN);
+                       p += ETH_GSTRING_LEN;
+               }
+               if (port_priv->mac)
+                       dpaa2_mac_get_strings(p);
                 break;
         }
  }
@@ -176,6 +195,9 @@ static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev,
                         netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n",
                                    dpaa2_switch_ethtool_counters[i].name, err);
         }
+
+       if (port_priv->mac)
+               dpaa2_mac_get_ethtool_stats(port_priv->mac, data + i);
  }
  
  const struct ethtool_ops dpaa2_switch_port_ethtool_ops = {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c

index 7112972..d260993 100644 (file)
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c
@@ -594,12 +594,18 @@ static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu)
         return 0;
  }
  
-static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev)
+static int dpaa2_switch_port_link_state_update(struct net_device *netdev)
  {
         struct ethsw_port_priv *port_priv = netdev_priv(netdev);
         struct dpsw_link_state state;
         int err;
  
+       /* When we manage the MAC/PHY using phylink there is no need
+        * to manually update the netif_carrier.
+        */
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               return 0;
+
         /* Interrupts are received even though no one issued an 'ifconfig up'
          * on the switch interface. Ignore these link state update interrupts
          */
@@ -677,12 +683,14 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
         struct ethsw_core *ethsw = port_priv->ethsw_data;
         int err;
  
-       /* Explicitly set carrier off, otherwise
-        * netif_carrier_ok() will return true and cause 'ip link show'
-        * to report the LOWER_UP flag, even though the link
-        * notification wasn't even received.
-        */
-       netif_carrier_off(netdev);
+       if (!dpaa2_switch_port_is_type_phy(port_priv)) {
+               /* Explicitly set carrier off, otherwise
+                * netif_carrier_ok() will return true and cause 'ip link show'
+                * to report the LOWER_UP flag, even though the link
+                * notification wasn't even received.
+                */
+               netif_carrier_off(netdev);
+       }
  
         err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0,
                              port_priv->ethsw_data->dpsw_handle,
@@ -692,23 +700,12 @@ static int dpaa2_switch_port_open(struct net_device *netdev)
                 return err;
         }
  
-       /* sync carrier state */
-       err = dpaa2_switch_port_carrier_state_sync(netdev);
-       if (err) {
-               netdev_err(netdev,
-                          "dpaa2_switch_port_carrier_state_sync err %d\n", err);
-               goto err_carrier_sync;
-       }
-
         dpaa2_switch_enable_ctrl_if_napi(ethsw);
  
-       return 0;
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               phylink_start(port_priv->mac->phylink);
  
-err_carrier_sync:
-       dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
-                       port_priv->ethsw_data->dpsw_handle,
-                       port_priv->idx);
-       return err;
+       return 0;
  }
  
  static int dpaa2_switch_port_stop(struct net_device *netdev)
@@ -717,6 +714,13 @@ static int dpaa2_switch_port_stop(struct net_device *netdev)
         struct ethsw_core *ethsw = port_priv->ethsw_data;
         int err;
  
+       if (dpaa2_switch_port_is_type_phy(port_priv)) {
+               phylink_stop(port_priv->mac->phylink);
+       } else {
+               netif_tx_stop_all_queues(netdev);
+               netif_carrier_off(netdev);
+       }
+
         err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0,
                               port_priv->ethsw_data->dpsw_handle,
                               port_priv->idx);
@@ -1419,41 +1423,103 @@ bool dpaa2_switch_port_dev_check(const struct net_device *netdev)
         return netdev->netdev_ops == &dpaa2_switch_port_ops;
  }
  
-static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw)
+static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv)
  {
-       int i;
+       struct fsl_mc_device *dpsw_port_dev, *dpmac_dev;
+       struct dpaa2_mac *mac;
+       int err;
  
-       for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
-               dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev);
-               dpaa2_switch_port_set_mac_addr(ethsw->ports[i]);
+       dpsw_port_dev = to_fsl_mc_device(port_priv->netdev->dev.parent);
+       dpmac_dev = fsl_mc_get_endpoint(dpsw_port_dev, port_priv->idx);
+
+       if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER)
+               return PTR_ERR(dpmac_dev);
+
+       if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type)
+               return 0;
+
+       mac = kzalloc(sizeof(*mac), GFP_KERNEL);
+       if (!mac)
+               return -ENOMEM;
+
+       mac->mc_dev = dpmac_dev;
+       mac->mc_io = port_priv->ethsw_data->mc_io;
+       mac->net_dev = port_priv->netdev;
+
+       err = dpaa2_mac_open(mac);
+       if (err)
+               goto err_free_mac;
+       port_priv->mac = mac;
+
+       if (dpaa2_switch_port_is_type_phy(port_priv)) {
+               err = dpaa2_mac_connect(mac);
+               if (err) {
+                       netdev_err(port_priv->netdev,
+                                  "Error connecting to the MAC endpoint %pe\n",
+                                  ERR_PTR(err));
+                       goto err_close_mac;
+               }
         }
+
+       return 0;
+
+err_close_mac:
+       dpaa2_mac_close(mac);
+       port_priv->mac = NULL;
+err_free_mac:
+       kfree(mac);
+       return err;
+}
+
+static void dpaa2_switch_port_disconnect_mac(struct ethsw_port_priv *port_priv)
+{
+       if (dpaa2_switch_port_is_type_phy(port_priv))
+               dpaa2_mac_disconnect(port_priv->mac);
+
+       if (!dpaa2_switch_port_has_mac(port_priv))
+               return;
+
+       dpaa2_mac_close(port_priv->mac);
+       kfree(port_priv->mac);
+       port_priv->mac = NULL;
  }
  
  static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg)
  {
         struct device *dev = (struct device *)arg;
         struct ethsw_core *ethsw = dev_get_drvdata(dev);
-
-       /* Mask the events and the if_id reserved bits to be cleared on read */
-       u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000;
-       int err;
+       struct ethsw_port_priv *port_priv;
+       u32 status = ~0;
+       int err, if_id;
  
         err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
                                   DPSW_IRQ_INDEX_IF, &status);
         if (err) {
                 dev_err(dev, "Can't get irq status (err %d)\n", err);
-
-               err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
-                                           DPSW_IRQ_INDEX_IF, 0xFFFFFFFF);
-               if (err)
-                       dev_err(dev, "Can't clear irq status (err %d)\n", err);
                 goto out;
         }
  
-       if (status & DPSW_IRQ_EVENT_LINK_CHANGED)
-               dpaa2_switch_links_state_update(ethsw);
+       if_id = (status & 0xFFFF0000) >> 16;
+       port_priv = ethsw->ports[if_id];
+
+       if (status & DPSW_IRQ_EVENT_LINK_CHANGED) {
+               dpaa2_switch_port_link_state_update(port_priv->netdev);
+               dpaa2_switch_port_set_mac_addr(port_priv);
+       }
+
+       if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) {
+               if (dpaa2_switch_port_has_mac(port_priv))
+                       dpaa2_switch_port_disconnect_mac(port_priv);
+               else
+                       dpaa2_switch_port_connect_mac(port_priv);
+       }
  
  out:
+       err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle,
+                                   DPSW_IRQ_INDEX_IF, status);
+       if (err)
+               dev_err(dev, "Can't clear irq status (err %d)\n", err);
+
         return IRQ_HANDLED;
  }
  
@@ -3133,6 +3199,7 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev)
         for (i = 0; i < ethsw->sw_attr.num_ifs; i++) {
                 port_priv = ethsw->ports[i];
                 unregister_netdev(port_priv->netdev);
+               dpaa2_switch_port_disconnect_mac(port_priv);
                 free_netdev(port_priv->netdev);
         }
  
@@ -3212,6 +3279,10 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw,
                 goto err_port_probe;
         port_priv->learn_ena = false;
  
+       err = dpaa2_switch_port_connect_mac(port_priv);
+       if (err)
+               goto err_port_probe;
+
         return 0;
  
  err_port_probe:
@@ -3288,12 +3359,6 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev)
                                &ethsw->fq[i].napi, dpaa2_switch_poll,
                                NAPI_POLL_WEIGHT);
  
-       err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle);
-       if (err) {
-               dev_err(ethsw->dev, "dpsw_enable err %d\n", err);
-               goto err_free_netdev;
-       }
-
         /* Setup IRQs */
         err = dpaa2_switch_setup_irqs(sw_dev);
         if (err)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h

index f69d940..0002dca 100644 (file)
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h
@@ -21,6 +21,7 @@
  #include <net/pkt_cls.h>
  #include <soc/fsl/dpaa2-io.h>
  
+#include "dpaa2-mac.h"
  #include "dpsw.h"
  
  /* Number of IRQs supported */
@@ -159,6 +160,7 @@ struct ethsw_port_priv {
         bool                    learn_ena;
  
         struct dpaa2_switch_filter_block *filter_block;
+       struct dpaa2_mac        *mac;
  };
  
  /* Switch data */
@@ -225,6 +227,22 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw)
         return true;
  }
  
+static inline bool
+dpaa2_switch_port_is_type_phy(struct ethsw_port_priv *port_priv)
+{
+       if (port_priv->mac &&
+           (port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY ||
+            port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE))
+               return true;
+
+       return false;
+}
+
+static inline bool dpaa2_switch_port_has_mac(struct ethsw_port_priv *port_priv)
+{
+       return port_priv->mac ? true : false;
+}
+
  bool dpaa2_switch_port_dev_check(const struct net_device *netdev);
  
  int dpaa2_switch_port_vlans_add(struct net_device *netdev,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h

index 892df90..b90bd36 100644 (file)
--- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h
@@ -98,6 +98,11 @@ int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token);
   */
  #define DPSW_IRQ_EVENT_LINK_CHANGED    0x0001
  
+/**
+ * DPSW_IRQ_EVENT_ENDPOINT_CHANGED - Indicates a change in endpoint
+ */
+#define DPSW_IRQ_EVENT_ENDPOINT_CHANGED        0x0002
+
  /**
   * struct dpsw_irq_cfg - IRQ configuration
   * @addr:      Address that must be written to signal a message-based interrupt
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h

index ae32591..d2e9a6c 100644 (file)
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -189,6 +189,8 @@
  #define FEC_RXIC0              0xfff
  #define FEC_RXIC1              0xfff
  #define FEC_RXIC2              0xfff
+#define FEC_LPI_SLEEP          0xfff
+#define FEC_LPI_WAKE           0xfff
  #endif /* CONFIG_M5272 */
  
  
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c

index 40ea318..fdff37b 100644 (file)
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2042,6 +2042,34 @@ failed_clk_ptp:
         return ret;
  }
  
+static int fec_enet_parse_rgmii_delay(struct fec_enet_private *fep,
+                                     struct device_node *np)
+{
+       u32 rgmii_tx_delay, rgmii_rx_delay;
+
+       /* For rgmii tx internal delay, valid values are 0ps and 2000ps */
+       if (!of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_tx_delay)) {
+               if (rgmii_tx_delay != 0 && rgmii_tx_delay != 2000) {
+                       dev_err(&fep->pdev->dev, "The only allowed RGMII TX delay values are: 0ps, 2000ps");
+                       return -EINVAL;
+               } else if (rgmii_tx_delay == 2000) {
+                       fep->rgmii_txc_dly = true;
+               }
+       }
+
+       /* For rgmii rx internal delay, valid values are 0ps and 2000ps */
+       if (!of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_rx_delay)) {
+               if (rgmii_rx_delay != 0 && rgmii_rx_delay != 2000) {
+                       dev_err(&fep->pdev->dev, "The only allowed RGMII RX delay values are: 0ps, 2000ps");
+                       return -EINVAL;
+               } else if (rgmii_rx_delay == 2000) {
+                       fep->rgmii_rxc_dly = true;
+               }
+       }
+
+       return 0;
+}
+
  static int fec_enet_mii_probe(struct net_device *ndev)
  {
         struct fec_enet_private *fep = netdev_priv(ndev);
@@ -3719,7 +3747,6 @@ fec_probe(struct platform_device *pdev)
         char irq_name[8];
         int irq_cnt;
         struct fec_devinfo *dev_info;
-       u32 rgmii_delay;
  
         fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs);
  
@@ -3777,12 +3804,6 @@ fec_probe(struct platform_device *pdev)
         if (ret)
                 goto failed_stop_mode;
  
-       /* For rgmii internal delay, valid values are 0ps and 2000ps */
-       if (of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_delay))
-               fep->rgmii_txc_dly = true;
-       if (of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_delay))
-               fep->rgmii_rxc_dly = true;
-
         phy_node = of_parse_phandle(np, "phy-handle", 0);
         if (!phy_node && of_phy_is_fixed_link(np)) {
                 ret = of_phy_register_fixed_link(np);
@@ -3806,6 +3827,10 @@ fec_probe(struct platform_device *pdev)
                 fep->phy_interface = interface;
         }
  
+       ret = fec_enet_parse_rgmii_delay(fep, np);
+       if (ret)
+               goto failed_rgmii_delay;
+
         fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
         if (IS_ERR(fep->clk_ipg)) {
                 ret = PTR_ERR(fep->clk_ipg);
@@ -3835,9 +3860,11 @@ fec_probe(struct platform_device *pdev)
         fep->clk_ref_rate = clk_get_rate(fep->clk_ref);
  
         /* clk_2x_txclk is optional, depends on board */
-       fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk");
-       if (IS_ERR(fep->clk_2x_txclk))
-               fep->clk_2x_txclk = NULL;
+       if (fep->rgmii_txc_dly || fep->rgmii_rxc_dly) {
+               fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk");
+               if (IS_ERR(fep->clk_2x_txclk))
+                       fep->clk_2x_txclk = NULL;
+       }
  
         fep->bufdesc_ex = fep->quirks & FEC_QUIRK_HAS_BUFDESC_EX;
         fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp");
@@ -3955,6 +3982,7 @@ failed_clk_ahb:
  failed_clk_ipg:
         fec_enet_clk_enable(ndev, false);
  failed_clk:
+failed_rgmii_delay:
         if (of_phy_is_fixed_link(np))
                 of_phy_deregister_fixed_link(np);
         of_node_put(phy_node);
@@ -3989,13 +4017,13 @@ fec_drv_remove(struct platform_device *pdev)
         if (of_phy_is_fixed_link(np))
                 of_phy_deregister_fixed_link(np);
         of_node_put(fep->phy_node);
-       free_netdev(ndev);
  
         clk_disable_unprepare(fep->clk_ahb);
         clk_disable_unprepare(fep->clk_ipg);
         pm_runtime_put_noidle(&pdev->dev);
         pm_runtime_disable(&pdev->dev);
  
+       free_netdev(ndev);
         return 0;
  }
  
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig

index 094e4a3..2ba0e7b 100644 (file)
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -91,6 +91,7 @@ config HNS3
         tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
         depends on PCI
         select NET_DEVLINK
+       select PAGE_POOL
         help
           This selects the framework support for Hisilicon Network Subsystem 3.
           This layer facilitates clients like ENET, RoCE and user-space ethernet
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c

index cb8d5da..fcbeb1f 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3205,6 +3205,21 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
         unsigned int order = hns3_page_order(ring);
         struct page *p;
  
+       if (ring->page_pool) {
+               p = page_pool_dev_alloc_frag(ring->page_pool,
+                                            &cb->page_offset,
+                                            hns3_buf_size(ring));
+               if (unlikely(!p))
+                       return -ENOMEM;
+
+               cb->priv = p;
+               cb->buf = page_address(p);
+               cb->dma = page_pool_get_dma_addr(p);
+               cb->type = DESC_TYPE_PP_FRAG;
+               cb->reuse_flag = 0;
+               return 0;
+       }
+
         p = dev_alloc_pages(order);
         if (!p)
                 return -ENOMEM;
@@ -3227,8 +3242,13 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring,
         if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
                         DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB))
                 napi_consume_skb(cb->priv, budget);
-       else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
-               __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+       else if (!HNAE3_IS_TX_RING(ring)) {
+               if (cb->type & DESC_TYPE_PAGE && cb->pagecnt_bias)
+                       __page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
+               else if (cb->type & DESC_TYPE_PP_FRAG)
+                       page_pool_put_full_page(ring->page_pool, cb->priv,
+                                               false);
+       }
         memset(cb, 0, sizeof(*cb));
  }
  
@@ -3315,7 +3335,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring,
         int ret;
  
         ret = hns3_alloc_buffer(ring, cb);
-       if (ret)
+       if (ret || ring->page_pool)
                 goto out;
  
         ret = hns3_map_buffer(ring, cb);
@@ -3337,7 +3357,8 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i)
         if (ret)
                 return ret;
  
-       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+                                        ring->desc_cb[i].page_offset);
  
         return 0;
  }
@@ -3367,7 +3388,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
  {
         hns3_unmap_buffer(ring, &ring->desc_cb[i]);
         ring->desc_cb[i] = *res_cb;
-       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
+       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
+                                        ring->desc_cb[i].page_offset);
         ring->desc[i].rx.bd_base_info = 0;
  }
  
@@ -3539,6 +3561,12 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
         u32 frag_size = size - pull_len;
         bool reused;
  
+       if (ring->page_pool) {
+               skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
+                               frag_size, truesize);
+               return;
+       }
+
         /* Avoid re-using remote or pfmem page */
         if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
                 goto out;
@@ -3856,6 +3884,9 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
                 /* We can reuse buffer as-is, just make sure it is reusable */
                 if (dev_page_is_reusable(desc_cb->priv))
                         desc_cb->reuse_flag = 1;
+               else if (desc_cb->type & DESC_TYPE_PP_FRAG)
+                       page_pool_put_full_page(ring->page_pool, desc_cb->priv,
+                                               false);
                 else /* This page cannot be reused so discard it */
                         __page_frag_cache_drain(desc_cb->priv,
                                                 desc_cb->pagecnt_bias);
@@ -3863,6 +3894,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
                 hns3_rx_ring_move_fw(ring);
                 return 0;
         }
+
+       if (ring->page_pool)
+               skb_mark_for_recycle(skb);
+
         u64_stats_update_begin(&ring->syncp);
         ring->stats.seg_pkt_cnt++;
         u64_stats_update_end(&ring->syncp);
@@ -3901,6 +3936,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring)
                                             "alloc rx fraglist skb fail\n");
                                 return -ENXIO;
                         }
+
+                       if (ring->page_pool)
+                               skb_mark_for_recycle(new_skb);
+
                         ring->frag_num = 0;
  
                         if (ring->tail_skb) {
@@ -4705,6 +4744,29 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv)
         priv->ring = NULL;
  }
  
+static void hns3_alloc_page_pool(struct hns3_enet_ring *ring)
+{
+       struct page_pool_params pp_params = {
+               .flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG |
+                               PP_FLAG_DMA_SYNC_DEV,
+               .order = hns3_page_order(ring),
+               .pool_size = ring->desc_num * hns3_buf_size(ring) /
+                               (PAGE_SIZE << hns3_page_order(ring)),
+               .nid = dev_to_node(ring_to_dev(ring)),
+               .dev = ring_to_dev(ring),
+               .dma_dir = DMA_FROM_DEVICE,
+               .offset = 0,
+               .max_len = PAGE_SIZE << hns3_page_order(ring),
+       };
+
+       ring->page_pool = page_pool_create(&pp_params);
+       if (IS_ERR(ring->page_pool)) {
+               dev_warn(ring_to_dev(ring), "page pool creation failed: %ld\n",
+                        PTR_ERR(ring->page_pool));
+               ring->page_pool = NULL;
+       }
+}
+
  static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
  {
         int ret;
@@ -4724,6 +4786,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
                 goto out_with_desc_cb;
  
         if (!HNAE3_IS_TX_RING(ring)) {
+               hns3_alloc_page_pool(ring);
+
                 ret = hns3_alloc_ring_buffers(ring);
                 if (ret)
                         goto out_with_desc;
@@ -4764,6 +4828,11 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
                 devm_kfree(ring_to_dev(ring), tx_spare);
                 ring->tx_spare = NULL;
         }
+
+       if (!HNAE3_IS_TX_RING(ring) && ring->page_pool) {
+               page_pool_destroy(ring->page_pool);
+               ring->page_pool = NULL;
+       }
  }
  
  static int hns3_buf_size2type(u32 buf_size)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h

index 15af3d9..27809d6 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -6,6 +6,7 @@
  
  #include <linux/dim.h>
  #include <linux/if_vlan.h>
+#include <net/page_pool.h>
  
  #include "hnae3.h"
  
@@ -307,6 +308,7 @@ enum hns3_desc_type {
         DESC_TYPE_BOUNCE_ALL            = 1 << 3,
         DESC_TYPE_BOUNCE_HEAD           = 1 << 4,
         DESC_TYPE_SGL_SKB               = 1 << 5,
+       DESC_TYPE_PP_FRAG               = 1 << 6,
  };
  
  struct hns3_desc_cb {
@@ -451,6 +453,7 @@ struct hns3_enet_ring {
         struct hnae3_queue *tqp;
         int queue_index;
         struct device *dev; /* will be used for DMA mapping of descriptors */
+       struct page_pool *page_pool;
  
         /* statistic */
         struct ring_stats stats;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c

index 06d2994..448f29a 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
@@ -112,14 +112,14 @@ int hclge_devlink_init(struct hclge_dev *hdev)
         int ret;
  
         devlink = devlink_alloc(&hclge_devlink_ops,
-                               sizeof(struct hclge_devlink_priv));
+                               sizeof(struct hclge_devlink_priv), &pdev->dev);
         if (!devlink)
                 return -ENOMEM;
  
         priv = devlink_priv(devlink);
         priv->hdev = hdev;
  
-       ret = devlink_register(devlink, &pdev->dev);
+       ret = devlink_register(devlink);
         if (ret) {
                 dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
                         ret);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c

index 3b1f845..befa9bc 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
@@ -5,9 +5,27 @@
  #include "hclge_main.h"
  #include "hnae3.h"
  
+static int hclge_ptp_get_cycle(struct hclge_dev *hdev)
+{
+       struct hclge_ptp *ptp = hdev->ptp;
+
+       ptp->cycle.quo = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG) &
+                        HCLGE_PTP_CYCLE_QUO_MASK;
+       ptp->cycle.numer = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG);
+       ptp->cycle.den = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
+
+       if (ptp->cycle.den == 0) {
+               dev_err(&hdev->pdev->dev, "invalid ptp cycle denominator!\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
  static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
  {
         struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp);
+       struct hclge_ptp_cycle *cycle = &hdev->ptp->cycle;
         u64 adj_val, adj_base, diff;
         unsigned long flags;
         bool is_neg = false;
@@ -18,7 +36,7 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
                 is_neg = true;
         }
  
-       adj_base = HCLGE_PTP_CYCLE_ADJ_BASE * HCLGE_PTP_CYCLE_ADJ_UNIT;
+       adj_base = (u64)cycle->quo * (u64)cycle->den + (u64)cycle->numer;
         adj_val = adj_base * ppb;
         diff = div_u64(adj_val, 1000000000ULL);
  
@@ -29,16 +47,16 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
  
         /* This clock cycle is defined by three part: quotient, numerator
          * and denominator. For example, 2.5ns, the quotient is 2,
-        * denominator is fixed to HCLGE_PTP_CYCLE_ADJ_UNIT, and numerator
-        * is 0.5 * HCLGE_PTP_CYCLE_ADJ_UNIT.
+        * denominator is fixed to ptp->cycle.den, and numerator
+        * is 0.5 * ptp->cycle.den.
          */
-       quo = div_u64_rem(adj_val, HCLGE_PTP_CYCLE_ADJ_UNIT, &numerator);
+       quo = div_u64_rem(adj_val, cycle->den, &numerator);
  
         spin_lock_irqsave(&hdev->ptp->lock, flags);
-       writel(quo, hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG);
+       writel(quo & HCLGE_PTP_CYCLE_QUO_MASK,
+              hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG);
         writel(numerator, hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG);
-       writel(HCLGE_PTP_CYCLE_ADJ_UNIT,
-              hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
+       writel(cycle->den, hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG);
         writel(HCLGE_PTP_CYCLE_ADJ_EN,
                hdev->ptp->io_base + HCLGE_PTP_CYCLE_CFG_REG);
         spin_unlock_irqrestore(&hdev->ptp->lock, flags);
@@ -475,6 +493,10 @@ int hclge_ptp_init(struct hclge_dev *hdev)
                 ret = hclge_ptp_create_clock(hdev);
                 if (ret)
                         return ret;
+
+               ret = hclge_ptp_get_cycle(hdev);
+               if (ret)
+                       return ret;
         }
  
         ret = hclge_ptp_int_en(hdev, true);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h

index 5a202b7..dbf5f4c 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h
@@ -29,6 +29,7 @@
  #define HCLGE_PTP_TIME_ADJ_REG         0x60
  #define HCLGE_PTP_TIME_ADJ_EN          BIT(0)
  #define HCLGE_PTP_CYCLE_QUO_REG                0x64
+#define HCLGE_PTP_CYCLE_QUO_MASK       GENMASK(7, 0)
  #define HCLGE_PTP_CYCLE_DEN_REG                0x68
  #define HCLGE_PTP_CYCLE_NUM_REG                0x6C
  #define HCLGE_PTP_CYCLE_CFG_REG                0x70
@@ -37,9 +38,7 @@
  #define HCLGE_PTP_CUR_TIME_SEC_L_REG   0x78
  #define HCLGE_PTP_CUR_TIME_NSEC_REG    0x7C
  
-#define HCLGE_PTP_CYCLE_ADJ_BASE       2
  #define HCLGE_PTP_CYCLE_ADJ_MAX                500000000
-#define HCLGE_PTP_CYCLE_ADJ_UNIT       100000000
  #define HCLGE_PTP_SEC_H_OFFSET         32u
  #define HCLGE_PTP_SEC_L_MASK           GENMASK(31, 0)
  
@@ -47,6 +46,12 @@
  #define HCLGE_PTP_FLAG_TX_EN           1
  #define HCLGE_PTP_FLAG_RX_EN           2
  
+struct hclge_ptp_cycle {
+       u32 quo;
+       u32 numer;
+       u32 den;
+};
+
  struct hclge_ptp {
         struct hclge_dev *hdev;
         struct ptp_clock *clock;
@@ -58,6 +63,7 @@ struct hclge_ptp {
         spinlock_t lock;        /* protects ptp registers */
         u32 ptp_cfg;
         u32 last_tx_seqid;
+       struct hclge_ptp_cycle cycle;
         unsigned long tx_start;
         unsigned long tx_cnt;
         unsigned long tx_skipped;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c

index 21a4527..1e6061f 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c
@@ -112,15 +112,16 @@ int hclgevf_devlink_init(struct hclgevf_dev *hdev)
         struct devlink *devlink;
         int ret;
  
-       devlink = devlink_alloc(&hclgevf_devlink_ops,
-                               sizeof(struct hclgevf_devlink_priv));
+       devlink =
+               devlink_alloc(&hclgevf_devlink_ops,
+                             sizeof(struct hclgevf_devlink_priv), &pdev->dev);
         if (!devlink)
                 return -ENOMEM;
  
         priv = devlink_priv(devlink);
         priv->hdev = hdev;
  
-       ret = devlink_register(devlink, &pdev->dev);
+       ret = devlink_register(devlink);
         if (ret) {
                 dev_err(&pdev->dev, "failed to register devlink, ret = %d\n",
                         ret);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c

index 58d5646..6e11ee3 100644 (file)
--- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c
@@ -293,9 +293,9 @@ static const struct devlink_ops hinic_devlink_ops = {
         .flash_update = hinic_devlink_flash_update,
  };
  
-struct devlink *hinic_devlink_alloc(void)
+struct devlink *hinic_devlink_alloc(struct device *dev)
  {
-       return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev));
+       return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev), dev);
  }
  
  void hinic_devlink_free(struct devlink *devlink)
@@ -303,11 +303,11 @@ void hinic_devlink_free(struct devlink *devlink)
         devlink_free(devlink);
  }
  
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev)
+int hinic_devlink_register(struct hinic_devlink_priv *priv)
  {
         struct devlink *devlink = priv_to_devlink(priv);
  
-       return devlink_register(devlink, dev);
+       return devlink_register(devlink);
  }
  
  void hinic_devlink_unregister(struct hinic_devlink_priv *priv)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h

index a090ebc..9e31501 100644 (file)
--- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h
@@ -108,9 +108,9 @@ struct host_image_st {
         u32 device_id;
  };
  
-struct devlink *hinic_devlink_alloc(void);
+struct devlink *hinic_devlink_alloc(struct device *dev);
  void hinic_devlink_free(struct devlink *devlink);
-int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev);
+int hinic_devlink_register(struct hinic_devlink_priv *priv);
  void hinic_devlink_unregister(struct hinic_devlink_priv *priv);
  
  int hinic_health_reporters_create(struct hinic_devlink_priv *priv);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c

index 428108e..56b6b04 100644 (file)
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -754,7 +754,7 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev)
                 return err;
         }
  
-       err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev);
+       err = hinic_devlink_register(hwdev->devlink_dev);
         if (err) {
                 dev_err(&hwif->pdev->dev, "Failed to register devlink\n");
                 hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c

index 405ee4d..881d0b2 100644 (file)
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -1183,7 +1183,7 @@ static int nic_dev_init(struct pci_dev *pdev)
         struct devlink *devlink;
         int err, num_qps;
  
-       devlink = hinic_devlink_alloc();
+       devlink = hinic_devlink_alloc(&pdev->dev);
         if (!devlink) {
                 dev_err(&pdev->dev, "Hinic devlink alloc failed\n");
                 return -ENOMEM;
diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c

index fc8c7cd..b8a4014 100644 (file)
--- a/drivers/net/ethernet/i825xx/82596.c
+++ b/drivers/net/ethernet/i825xx/82596.c
@@ -1110,9 +1110,6 @@ static void print_eth(unsigned char *add, char *str)
                add, add + 6, add, add[12], add[13], str);
  }
  
-static int io = 0x300;
-static int irq = 10;
-
  static const struct net_device_ops i596_netdev_ops = {
         .ndo_open               = i596_open,
         .ndo_stop               = i596_close,
@@ -1123,7 +1120,7 @@ static const struct net_device_ops i596_netdev_ops = {
         .ndo_validate_addr      = eth_validate_addr,
  };
  
-struct net_device * __init i82596_probe(int unit)
+static struct net_device * __init i82596_probe(void)
  {
         struct net_device *dev;
         int i;
@@ -1140,14 +1137,6 @@ struct net_device * __init i82596_probe(int unit)
         if (!dev)
                 return ERR_PTR(-ENOMEM);
  
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       } else {
-               dev->base_addr = io;
-               dev->irq = irq;
-       }
-
  #ifdef ENABLE_MVME16x_NET
         if (MACH_IS_MVME16x) {
                 if (mvme16x_config & MVME16x_CONFIG_NO_ETHERNET) {
@@ -1515,22 +1504,22 @@ static void set_multicast_list(struct net_device *dev)
         }
  }
  
-#ifdef MODULE
  static struct net_device *dev_82596;
  
  static int debug = -1;
  module_param(debug, int, 0);
  MODULE_PARM_DESC(debug, "i82596 debug mask");
  
-int __init init_module(void)
+static int __init i82596_init(void)
  {
         if (debug >= 0)
                 i596_debug = debug;
-       dev_82596 = i82596_probe(-1);
+       dev_82596 = i82596_probe();
         return PTR_ERR_OR_ZERO(dev_82596);
  }
+module_init(i82596_init);
  
-void __exit cleanup_module(void)
+static void __exit i82596_cleanup(void)
  {
         unregister_netdev(dev_82596);
  #ifdef __mc68000__
@@ -1544,5 +1533,4 @@ void __exit cleanup_module(void)
         free_page ((u32)(dev_82596->mem_start));
         free_netdev(dev_82596);
  }
-
-#endif                         /* MODULE */
+module_exit(i82596_cleanup);
diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c

index 4564ee0..893e0dd 100644 (file)
--- a/drivers/net/ethernet/i825xx/sun3_82586.c
+++ b/drivers/net/ethernet/i825xx/sun3_82586.c
@@ -29,6 +29,7 @@ static int rfdadd = 0; /* rfdadd=1 may be better for 8K MEM cards */
  static int fifo=0x8;   /* don't change */
  
  #include <linux/kernel.h>
+#include <linux/module.h>
  #include <linux/string.h>
  #include <linux/errno.h>
  #include <linux/ioport.h>
@@ -276,7 +277,7 @@ static void alloc586(struct net_device *dev)
         memset((char *)p->scb,0,sizeof(struct scb_struct));
  }
  
-struct net_device * __init sun3_82586_probe(int unit)
+static int __init sun3_82586_probe(void)
  {
         struct net_device *dev;
         unsigned long ioaddr;
@@ -291,25 +292,20 @@ struct net_device * __init sun3_82586_probe(int unit)
                 break;
  
         default:
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
         }
  
         if (found)
-               return ERR_PTR(-ENODEV);
+               return -ENODEV;
  
         ioaddr = (unsigned long)ioremap(IE_OBIO, SUN3_82586_TOTAL_SIZE);
         if (!ioaddr)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
         found = 1;
  
         dev = alloc_etherdev(sizeof(struct priv));
         if (!dev)
                 goto out;
-       if (unit >= 0) {
-               sprintf(dev->name, "eth%d", unit);
-               netdev_boot_setup_check(dev);
-       }
-
         dev->irq = IE_IRQ;
         dev->base_addr = ioaddr;
         err = sun3_82586_probe1(dev, ioaddr);
@@ -326,8 +322,9 @@ out1:
         free_netdev(dev);
  out:
         iounmap((void __iomem *)ioaddr);
-       return ERR_PTR(err);
+       return err;
  }
+module_init(sun3_82586_probe);
  
  static const struct net_device_ops sun3_82586_netdev_ops = {
         .ndo_open               = sun3_82586_open,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c

index 3e822ba..2c9e4ee 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -980,7 +980,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
         default:
                 /* if we got here and link is up something bad is afoot */
                 netdev_info(netdev,
-                           "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
+                           "WARNING: Link is up but PHY type 0x%x is not recognized, or incorrect cable is in use\n",
                             hw_link_info->phy_type);
         }
  
@@ -5294,6 +5294,10 @@ flags_complete:
                                         dev_warn(&pf->pdev->dev,
                                                  "Device configuration forbids SW from starting the LLDP agent.\n");
                                         return -EINVAL;
+                               case I40E_AQ_RC_EAGAIN:
+                                       dev_warn(&pf->pdev->dev,
+                                                "Stop FW LLDP agent command is still being processed, please try again in a second.\n");
+                                       return -EBUSY;
                                 default:
                                         dev_warn(&pf->pdev->dev,
                                                  "Starting FW LLDP agent failed: error: %s, %s\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c

index 5b4012a..97c7855 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4457,11 +4457,10 @@ int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
  }
  
  /**
- * i40e_vsi_control_tx - Start or stop a VSI's rings
+ * i40e_vsi_enable_tx - Start a VSI's rings
   * @vsi: the VSI being configured
- * @enable: start or stop the rings
   **/
-static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
+static int i40e_vsi_enable_tx(struct i40e_vsi *vsi)
  {
         struct i40e_pf *pf = vsi->back;
         int i, pf_q, ret = 0;
@@ -4470,7 +4469,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
         for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
                 ret = i40e_control_wait_tx_q(vsi->seid, pf,
                                              pf_q,
-                                            false /*is xdp*/, enable);
+                                            false /*is xdp*/, true);
                 if (ret)
                         break;
  
@@ -4479,7 +4478,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
  
                 ret = i40e_control_wait_tx_q(vsi->seid, pf,
                                              pf_q + vsi->alloc_queue_pairs,
-                                            true /*is xdp*/, enable);
+                                            true /*is xdp*/, true);
                 if (ret)
                         break;
         }
@@ -4577,32 +4576,25 @@ int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
  }
  
  /**
- * i40e_vsi_control_rx - Start or stop a VSI's rings
+ * i40e_vsi_enable_rx - Start a VSI's rings
   * @vsi: the VSI being configured
- * @enable: start or stop the rings
   **/
-static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
+static int i40e_vsi_enable_rx(struct i40e_vsi *vsi)
  {
         struct i40e_pf *pf = vsi->back;
         int i, pf_q, ret = 0;
  
         pf_q = vsi->base_queue;
         for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
-               ret = i40e_control_wait_rx_q(pf, pf_q, enable);
+               ret = i40e_control_wait_rx_q(pf, pf_q, true);
                 if (ret) {
                         dev_info(&pf->pdev->dev,
-                                "VSI seid %d Rx ring %d %sable timeout\n",
-                                vsi->seid, pf_q, (enable ? "en" : "dis"));
+                                "VSI seid %d Rx ring %d enable timeout\n",
+                                vsi->seid, pf_q);
                         break;
                 }
         }
  
-       /* Due to HW errata, on Rx disable only, the register can indicate done
-        * before it really is. Needs 50ms to be sure
-        */
-       if (!enable)
-               mdelay(50);
-
         return ret;
  }
  
@@ -4615,29 +4607,47 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi)
         int ret = 0;
  
         /* do rx first for enable and last for disable */
-       ret = i40e_vsi_control_rx(vsi, true);
+       ret = i40e_vsi_enable_rx(vsi);
         if (ret)
                 return ret;
-       ret = i40e_vsi_control_tx(vsi, true);
+       ret = i40e_vsi_enable_tx(vsi);
  
         return ret;
  }
  
+#define I40E_DISABLE_TX_GAP_MSEC       50
+
  /**
   * i40e_vsi_stop_rings - Stop a VSI's rings
   * @vsi: the VSI being configured
   **/
  void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
  {
+       struct i40e_pf *pf = vsi->back;
+       int pf_q, err, q_end;
+
         /* When port TX is suspended, don't wait */
         if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state))
                 return i40e_vsi_stop_rings_no_wait(vsi);
  
-       /* do rx first for enable and last for disable
-        * Ignore return value, we need to shutdown whatever we can
-        */
-       i40e_vsi_control_tx(vsi, false);
-       i40e_vsi_control_rx(vsi, false);
+       q_end = vsi->base_queue + vsi->num_queue_pairs;
+       for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+               i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false);
+
+       for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) {
+               err = i40e_control_wait_rx_q(pf, pf_q, false);
+               if (err)
+                       dev_info(&pf->pdev->dev,
+                                "VSI seid %d Rx ring %d dissable timeout\n",
+                                vsi->seid, pf_q);
+       }
+
+       msleep(I40E_DISABLE_TX_GAP_MSEC);
+       pf_q = vsi->base_queue;
+       for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+               wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0);
+
+       i40e_vsi_wait_queues_disabled(vsi);
  }
  
  /**
@@ -7283,6 +7293,8 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
         }
         if (vsi->num_queue_pairs <
             (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+               dev_err(&vsi->back->pdev->dev,
+                       "Failed to create traffic channel, insufficient number of queues.\n");
                 return -EINVAL;
         }
         if (sum_max_rate > i40e_get_link_speed(vsi)) {
@@ -13264,6 +13276,7 @@ static const struct net_device_ops i40e_netdev_ops = {
         .ndo_poll_controller    = i40e_netpoll,
  #endif
         .ndo_setup_tc           = __i40e_setup_tc,
+       .ndo_select_queue       = i40e_lan_select_queue,
         .ndo_set_features       = i40e_set_features,
         .ndo_set_vf_mac         = i40e_ndo_set_vf_mac,
         .ndo_set_vf_vlan        = i40e_ndo_set_vf_port_vlan,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c

index 38eb815..3f25bd8 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -3631,6 +3631,56 @@ dma_error:
         return -1;
  }
  
+static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev,
+                                 const struct sk_buff *skb,
+                                 u16 num_tx_queues)
+{
+       u32 jhash_initval_salt = 0xd631614b;
+       u32 hash;
+
+       if (skb->sk && skb->sk->sk_hash)
+               hash = skb->sk->sk_hash;
+       else
+               hash = (__force u16)skb->protocol ^ skb->hash;
+
+       hash = jhash_1word(hash, jhash_initval_salt);
+
+       return (u16)(((u64)hash * num_tx_queues) >> 32);
+}
+
+u16 i40e_lan_select_queue(struct net_device *netdev,
+                         struct sk_buff *skb,
+                         struct net_device __always_unused *sb_dev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_hw *hw;
+       u16 qoffset;
+       u16 qcount;
+       u8 tclass;
+       u16 hash;
+       u8 prio;
+
+       /* is DCB enabled at all? */
+       if (vsi->tc_config.numtc == 1)
+               return i40e_swdcb_skb_tx_hash(netdev, skb,
+                                             netdev->real_num_tx_queues);
+
+       prio = skb->priority;
+       hw = &vsi->back->hw;
+       tclass = hw->local_dcbx_config.etscfg.prioritytable[prio];
+       /* sanity check */
+       if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass))))
+               tclass = 0;
+
+       /* select a queue assigned for the given TC */
+       qcount = vsi->tc_config.tc_info[tclass].qcount;
+       hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount);
+
+       qoffset = vsi->tc_config.tc_info[tclass].qoffset;
+       return qoffset + hash;
+}
+
  /**
   * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
   * @xdpf: data to transmit
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h

index 86fed05..bfc2845 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -451,6 +451,8 @@ static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring)
  
  bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
  netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+u16 i40e_lan_select_queue(struct net_device *netdev, struct sk_buff *skb,
+                         struct net_device *sb_dev);
  void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
  void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
  int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring);
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c

index 91b545a..8c863d6 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -475,7 +475,7 @@ struct ice_pf *ice_allocate_pf(struct device *dev)
  {
         struct devlink *devlink;
  
-       devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf));
+       devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev);
         if (!devlink)
                 return NULL;
  
@@ -502,7 +502,7 @@ int ice_devlink_register(struct ice_pf *pf)
         struct device *dev = ice_pf_to_dev(pf);
         int err;
  
-       err = devlink_register(devlink, dev);
+       err = devlink_register(devlink);
         if (err) {
                 dev_err(dev, "devlink registration failed: %d\n", err);
                 return err;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c

index ff8db31..5d1007e 100644 (file)
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2327,7 +2327,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
         if (!skb)
                 return ERR_PTR(-ENOMEM);
  
-       skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool);
+       skb_mark_for_recycle(skb);
  
         skb_reserve(skb, xdp->data - xdp->data_hard_start);
         skb_put(skb, xdp->data_end - xdp->data);
@@ -2339,10 +2339,6 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
                                 skb_frag_page(frag), skb_frag_off(frag),
                                 skb_frag_size(frag), PAGE_SIZE);
-               /* We don't need to reset pp_recycle here. It's already set, so
-                * just mark fragments for recycling.
-                */
-               page_pool_store_mem_info(skb_frag_page(frag), pool);
         }
  
         return skb;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c

index 99bd8b8..744f58f 100644 (file)
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3995,7 +3995,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
                 }
  
                 if (pp)
-                       skb_mark_for_recycle(skb, page, pp);
+                       skb_mark_for_recycle(skb);
                 else
                         dma_unmap_single_attrs(dev->dev.parent, dma_addr,
                                                bm_pool->buf_size, DMA_FROM_DEVICE,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c

index 9169849..544c96c 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c
@@ -1504,8 +1504,8 @@ static int cgx_lmac_init(struct cgx *cgx)
  
                 /* Add reference */
                 cgx->lmac_idmap[lmac->lmac_id] = lmac;
-               cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true);
                 set_bit(lmac->lmac_id, &cgx->lmac_bmap);
+               cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true);
         }
  
         return cgx_lmac_verify_fwi_version(cgx);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h

index 47f5ed0..752ba6b 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -146,10 +146,7 @@ enum nix_scheduler {
  #define TXSCH_RR_QTM_MAX               ((1 << 24) - 1)
  #define TXSCH_TL1_DFLT_RR_QTM          TXSCH_RR_QTM_MAX
  #define TXSCH_TL1_DFLT_RR_PRIO         (0x1ull)
-#define MAX_SCHED_WEIGHT               0xFF
-#define DFLT_RR_WEIGHT                 71
-#define DFLT_RR_QTM    ((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \
-                        / MAX_SCHED_WEIGHT)
+#define CN10K_MAX_DWRR_WEIGHT          16384 /* Weight is 14bit on CN10K */
  
  /* Min/Max packet sizes, excluding FCS */
  #define        NIC_HW_MIN_FRS                  40
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h

index f5ec39d..4470933 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1032,8 +1032,12 @@ struct nix_bp_cfg_rsp {
  
  struct nix_hw_info {
         struct mbox_msghdr hdr;
+       u16 rsvs16;
         u16 max_mtu;
         u16 min_mtu;
+       u32 rpm_dwrr_mtu;
+       u32 sdp_dwrr_mtu;
+       u64 rsvd[16]; /* Add reserved fields for future expansion */
  };
  
  struct nix_bandprof_alloc_req {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h

index 19bad9a..243cf80 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -151,7 +151,10 @@ enum npc_kpu_lh_ltype {
   * Software assigns pkind for each incoming port such as CGX
   * Ethernet interfaces, LBK interfaces, etc.
   */
+#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_VLAN_EXDSA_PKIND
+
  enum npc_pkind_type {
+       NPC_RX_LBK_PKIND = 0ULL,
         NPC_RX_VLAN_EXDSA_PKIND = 56ULL,
         NPC_RX_CHLEN24B_PKIND = 57ULL,
         NPC_RX_CPT_HDR_PKIND,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c

index 017163f..5fe277e 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
@@ -391,8 +391,10 @@ void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf)
  
         /* Get numVFs attached to this PF and first HWVF */
         cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf));
-       *numvfs = (cfg >> 12) & 0xFF;
-       *hwvf = cfg & 0xFFF;
+       if (numvfs)
+               *numvfs = (cfg >> 12) & 0xFF;
+       if (hwvf)
+               *hwvf = cfg & 0xFFF;
  }
  
  static int rvu_get_hwvf(struct rvu *rvu, int pcifunc)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h

index 91503fb..95591e7 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -329,6 +329,7 @@ struct hw_cap {
         bool    nix_shaping;             /* Is shaping and coloring supported */
         bool    nix_tx_link_bp;          /* Can link backpressure TL queues ? */
         bool    nix_rx_multicast;        /* Rx packet replication support */
+       bool    nix_common_dwrr_mtu;     /* Common DWRR MTU for quantum config */
         bool    per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */
         bool    programmable_chans; /* Channels programmable ? */
         bool    ipolicer;
@@ -706,6 +707,8 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw,
                         struct nix_cn10k_aq_enq_rsp *aq_rsp,
                         u16 pcifunc, u8 ctype, u32 qidx);
  int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc);
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu);
+u32 convert_bytes_to_dwrr_mtu(u32 bytes);
  
  /* NPC APIs */
  int rvu_npc_init(struct rvu *rvu);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c

index 2688186..a55b46a 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -1364,6 +1364,89 @@ static void rvu_health_reporters_destroy(struct rvu *rvu)
         rvu_nix_health_reporters_destroy(rvu_dl);
  }
  
+/* Devlink Params APIs */
+static int rvu_af_dl_dwrr_mtu_validate(struct devlink *devlink, u32 id,
+                                      union devlink_param_value val,
+                                      struct netlink_ext_ack *extack)
+{
+       struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+       struct rvu *rvu = rvu_dl->rvu;
+       int dwrr_mtu = val.vu32;
+       struct nix_txsch *txsch;
+       struct nix_hw *nix_hw;
+
+       if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Setting DWRR_MTU is not supported on this silicon");
+               return -EOPNOTSUPP;
+       }
+
+       if ((dwrr_mtu > 65536 || !is_power_of_2(dwrr_mtu)) &&
+           (dwrr_mtu != 9728 && dwrr_mtu != 10240)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Invalid, supported MTUs are 0,2,4,8.16,32,64....4K,8K,32K,64K and 9728, 10240");
+               return -EINVAL;
+       }
+
+       nix_hw = get_nix_hw(rvu->hw, BLKADDR_NIX0);
+       if (!nix_hw)
+               return -ENODEV;
+
+       txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ];
+       if (rvu_rsrc_free_count(&txsch->schq) != txsch->schq.max) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Changing DWRR MTU is not supported when there are active NIXLFs");
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Make sure none of the PF/VF interfaces are initialized and retry");
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id,
+                                 struct devlink_param_gset_ctx *ctx)
+{
+       struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+       struct rvu *rvu = rvu_dl->rvu;
+       u64 dwrr_mtu;
+
+       dwrr_mtu = convert_bytes_to_dwrr_mtu(ctx->val.vu32);
+       rvu_write64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU, dwrr_mtu);
+
+       return 0;
+}
+
+static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id,
+                                 struct devlink_param_gset_ctx *ctx)
+{
+       struct rvu_devlink *rvu_dl = devlink_priv(devlink);
+       struct rvu *rvu = rvu_dl->rvu;
+       u64 dwrr_mtu;
+
+       if (!rvu->hw->cap.nix_common_dwrr_mtu)
+               return -EOPNOTSUPP;
+
+       dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+       ctx->val.vu32 = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+       return 0;
+}
+
+enum rvu_af_dl_param_id {
+       RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+       RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+};
+
+static const struct devlink_param rvu_af_dl_params[] = {
+       DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU,
+                            "dwrr_mtu", DEVLINK_PARAM_TYPE_U32,
+                            BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+                            rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set,
+                            rvu_af_dl_dwrr_mtu_validate),
+};
+
+/* Devlink switch mode */
  static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
  {
         struct rvu_devlink *rvu_dl = devlink_priv(devlink);
@@ -1420,13 +1503,14 @@ int rvu_register_dl(struct rvu *rvu)
         struct devlink *dl;
         int err;
  
-       dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink));
+       dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink),
+                          rvu->dev);
         if (!dl) {
                 dev_warn(rvu->dev, "devlink_alloc failed\n");
                 return -ENOMEM;
         }
  
-       err = devlink_register(dl, rvu->dev);
+       err = devlink_register(dl);
         if (err) {
                 dev_err(rvu->dev, "devlink register failed with error %d\n", err);
                 devlink_free(dl);
@@ -1438,7 +1522,30 @@ int rvu_register_dl(struct rvu *rvu)
         rvu_dl->rvu = rvu;
         rvu->rvu_dl = rvu_dl;
  
-       return rvu_health_reporters_create(rvu);
+       err = rvu_health_reporters_create(rvu);
+       if (err) {
+               dev_err(rvu->dev,
+                       "devlink health reporter creation failed with error %d\n", err);
+               goto err_dl_health;
+       }
+
+       err = devlink_params_register(dl, rvu_af_dl_params,
+                                     ARRAY_SIZE(rvu_af_dl_params));
+       if (err) {
+               dev_err(rvu->dev,
+                       "devlink params register failed with error %d", err);
+               goto err_dl_health;
+       }
+
+       devlink_params_publish(dl);
+
+       return 0;
+
+err_dl_health:
+       rvu_health_reporters_destroy(rvu);
+       devlink_unregister(dl);
+       devlink_free(dl);
+       return err;
  }
  
  void rvu_unregister_dl(struct rvu *rvu)
@@ -1449,6 +1556,8 @@ void rvu_unregister_dl(struct rvu *rvu)
         if (!dl)
                 return;
  
+       devlink_params_unregister(dl, rvu_af_dl_params,
+                                 ARRAY_SIZE(rvu_af_dl_params));
         rvu_health_reporters_destroy(rvu);
         devlink_unregister(dl);
         devlink_free(dl);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c

index 0933699..53db8eb 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -192,15 +192,67 @@ struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr)
         return NULL;
  }
  
+u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu)
+{
+       dwrr_mtu &= 0x1FULL;
+
+       /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+        * Value of 4 is reserved for MTU value of 9728 bytes.
+        * Value of 5 is reserved for MTU value of 10240 bytes.
+        */
+       switch (dwrr_mtu) {
+       case 4:
+               return 9728;
+       case 5:
+               return 10240;
+       default:
+               return BIT_ULL(dwrr_mtu);
+       }
+
+       return 0;
+}
+
+u32 convert_bytes_to_dwrr_mtu(u32 bytes)
+{
+       /* MTU used for DWRR calculation is in power of 2 up until 64K bytes.
+        * Value of 4 is reserved for MTU value of 9728 bytes.
+        * Value of 5 is reserved for MTU value of 10240 bytes.
+        */
+       if (bytes > BIT_ULL(16))
+               return 0;
+
+       switch (bytes) {
+       case 9728:
+               return 4;
+       case 10240:
+               return 5;
+       default:
+               return ilog2(bytes);
+       }
+
+       return 0;
+}
+
  static void nix_rx_sync(struct rvu *rvu, int blkaddr)
  {
         int err;
  
-       /*Sync all in flight RX packets to LLC/DRAM */
+       /* Sync all in flight RX packets to LLC/DRAM */
+       rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
+       err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
+       if (err)
+               dev_err(rvu->dev, "SYNC1: NIX RX software sync failed\n");
+
+       /* SW_SYNC ensures all existing transactions are finished and pkts
+        * are written to LLC/DRAM, queues should be teared down after
+        * successful SW_SYNC. Due to a HW errata, in some rare scenarios
+        * an existing transaction might end after SW_SYNC operation. To
+        * ensure operation is fully done, do the SW_SYNC twice.
+        */
         rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0));
         err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true);
         if (err)
-               dev_err(rvu->dev, "NIX RX software sync failed\n");
+               dev_err(rvu->dev, "SYNC2: NIX RX software sync failed\n");
  }
  
  static bool is_valid_txschq(struct rvu *rvu, int blkaddr,
@@ -298,6 +350,7 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf)
                                         rvu_nix_chan_lbk(rvu, lbkid, vf + 1);
                 pfvf->rx_chan_cnt = 1;
                 pfvf->tx_chan_cnt = 1;
+               rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf);
                 rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
                                               pfvf->rx_chan_base,
                                               pfvf->rx_chan_cnt);
@@ -1946,8 +1999,17 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw,
                 return;
         rvu_write64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq),
                     (TXSCH_TL1_DFLT_RR_PRIO << 1));
-       rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
-                   TXSCH_TL1_DFLT_RR_QTM);
+
+       /* On OcteonTx2 the config was in bytes and newer silcons
+        * it's changed to weight.
+        */
+       if (!rvu->hw->cap.nix_common_dwrr_mtu)
+               rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+                           TXSCH_TL1_DFLT_RR_QTM);
+       else
+               rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq),
+                           CN10K_MAX_DWRR_WEIGHT);
+
         rvu_write64(rvu, blkaddr, NIX_AF_TL1X_CIR(schq), 0x00);
         pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE);
  }
@@ -2655,6 +2717,15 @@ static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr)
                 for (schq = 0; schq < txsch->schq.max; schq++)
                         txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE);
         }
+
+       /* Setup a default value of 8192 as DWRR MTU */
+       if (rvu->hw->cap.nix_common_dwrr_mtu) {
+               rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU,
+                           convert_bytes_to_dwrr_mtu(8192));
+               rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU,
+                           convert_bytes_to_dwrr_mtu(8192));
+       }
+
         return 0;
  }
  
@@ -2731,6 +2802,7 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req,
                                      struct nix_hw_info *rsp)
  {
         u16 pcifunc = req->hdr.pcifunc;
+       u64 dwrr_mtu;
         int blkaddr;
  
         blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
@@ -2743,6 +2815,20 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req,
                 rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu);
  
         rsp->min_mtu = NIC_HW_MIN_FRS;
+
+       if (!rvu->hw->cap.nix_common_dwrr_mtu) {
+               /* Return '1' on OTx2 */
+               rsp->rpm_dwrr_mtu = 1;
+               rsp->sdp_dwrr_mtu = 1;
+               return 0;
+       }
+
+       dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU);
+       rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
+       dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU);
+       rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu);
+
         return 0;
  }
  
@@ -3635,6 +3721,28 @@ static int nix_aq_init(struct rvu *rvu, struct rvu_block *block)
         return 0;
  }
  
+static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr)
+{
+       struct rvu_hwinfo *hw = rvu->hw;
+       u64 hw_const;
+
+       hw_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+
+       /* On OcteonTx2 DWRR quantum is directly configured into each of
+        * the transmit scheduler queues. And PF/VF drivers were free to
+        * config any value upto 2^24.
+        * On CN10K, HW is modified, the quantum configuration at scheduler
+        * queues is in terms of weight. And SW needs to setup a base DWRR MTU
+        * at NIX_AF_DWRR_RPM_MTU / NIX_AF_DWRR_SDP_MTU. HW will do
+        * 'DWRR MTU * weight' to get the quantum.
+        *
+        * Check if HW uses a common MTU for all DWRR quantum configs.
+        * On OcteonTx2 this register field is '0'.
+        */
+       if (((hw_const >> 56) & 0x10) == 0x10)
+               hw->cap.nix_common_dwrr_mtu = true;
+}
+
  static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
  {
         const struct npc_lt_def_cfg *ltdefs;
@@ -3672,6 +3780,9 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
         if (err)
                 return err;
  
+       /* Setup capabilities of the NIX block */
+       rvu_nix_setup_capabilities(rvu, blkaddr);
+
         /* Initialize admin queue */
         err = nix_aq_init(rvu, block);
         if (err)
@@ -3842,7 +3953,6 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr,
                 vlan = &nix_hw->txvlan;
                 kfree(vlan->rsrc.bmap);
                 mutex_destroy(&vlan->rsrc_lock);
-               devm_kfree(rvu->dev, vlan->entry2pfvf_map);
  
                 mcast = &nix_hw->mcast;
                 qmem_free(rvu->dev, mcast->mce_ctx);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c

index 1097291..52b2554 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -1721,7 +1721,6 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr)
  {
         struct rvu_hwinfo *hw = rvu->hw;
         int num_pkinds, num_kpus, idx;
-       struct npc_pkind *pkind;
  
         /* Disable all KPUs and their entries */
         for (idx = 0; idx < hw->npc_kpus; idx++) {
@@ -1739,9 +1738,8 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr)
          * Check HW max count to avoid configuring junk or
          * writing to unsupported CSR addresses.
          */
-       pkind = &hw->pkind;
         num_pkinds = rvu->kpu.pkinds;
-       num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds);
+       num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds);
  
         for (idx = 0; idx < num_pkinds; idx++)
                 npc_config_kpuaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], 0, idx, true);
@@ -1891,7 +1889,8 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr)
         if (npc_const1 & BIT_ULL(63))
                 npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2);
  
-       pkind->rsrc.max = (npc_const1 >> 12) & 0xFFULL;
+       pkind->rsrc.max = NPC_UNRESERVED_PKIND_COUNT;
+       hw->npc_pkinds = (npc_const1 >> 12) & 0xFFULL;
         hw->npc_kpu_entries = npc_const1 & 0xFFFULL;
         hw->npc_kpus = (npc_const >> 8) & 0x1FULL;
         hw->npc_intfs = npc_const & 0xFULL;
@@ -2002,6 +2001,10 @@ int rvu_npc_init(struct rvu *rvu)
         err = rvu_alloc_bitmap(&pkind->rsrc);
         if (err)
                 return err;
+       /* Reserve PKIND#0 for LBKs. Power reset value of LBK_CH_PKIND is '0',
+        * no need to configure PKIND for all LBKs separately.
+        */
+       rvu_alloc_rsrc(&pkind->rsrc);
  
         /* Allocate mem for pkind to PF and channel mapping info */
         pkind->pfchan_map = devm_kcalloc(rvu->dev, pkind->rsrc.max,
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h

index 8b01ef6..6efcf3a 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -269,6 +269,8 @@
  #define NIX_AF_DEBUG_NPC_RESP_DATAX(a)          (0x680 | (a) << 3)
  #define NIX_AF_SMQX_CFG(a)                      (0x700 | (a) << 16)
  #define NIX_AF_SQM_DBG_CTL_STATUS               (0x750)
+#define NIX_AF_DWRR_SDP_MTU                     (0x790)
+#define NIX_AF_DWRR_RPM_MTU                     (0x7A0)
  #define NIX_AF_PSE_CHANNEL_LEVEL                (0x800)
  #define NIX_AF_PSE_SHAPER_CFG                   (0x810)
  #define NIX_AF_TX_EXPR_CREDIT                  (0x830)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c

index 2e53797..820adf3 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c
@@ -71,8 +71,8 @@ static int rvu_switch_install_rules(struct rvu *rvu)
         struct rvu_switch *rswitch = &rvu->rswitch;
         u16 start = rswitch->start_entry;
         struct rvu_hwinfo *hw = rvu->hw;
-       int pf, vf, numvfs, hwvf;
         u16 pcifunc, entry = 0;
+       int pf, vf, numvfs;
         int err;
  
         for (pf = 1; pf < hw->total_pfs; pf++) {
@@ -110,8 +110,8 @@ static int rvu_switch_install_rules(struct rvu *rvu)
  
                 rswitch->entry2pcifunc[entry++] = pcifunc;
  
-               rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf);
-               for (vf = 0; vf < numvfs; vf++, hwvf++) {
+               rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL);
+               for (vf = 0; vf < numvfs; vf++) {
                         pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
                         rvu_get_nix_blkaddr(rvu, pcifunc);
  
@@ -198,7 +198,7 @@ void rvu_switch_disable(struct rvu *rvu)
         struct npc_mcam_free_entry_req free_req = { 0 };
         struct rvu_switch *rswitch = &rvu->rswitch;
         struct rvu_hwinfo *hw = rvu->hw;
-       int pf, vf, numvfs, hwvf;
+       int pf, vf, numvfs;
         struct msg_rsp rsp;
         u16 pcifunc;
         int err;
@@ -217,7 +217,8 @@ void rvu_switch_disable(struct rvu *rvu)
                                 "Reverting RX rule for PF%d failed(%d)\n",
                                 pf, err);
  
-               for (vf = 0; vf < numvfs; vf++, hwvf++) {
+               rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL);
+               for (vf = 0; vf < numvfs; vf++) {
                         pcifunc = pf << 10 | ((vf + 1) & 0x3FF);
                         err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF);
                         if (err)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c

index 184de94..ccffdda 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -92,8 +92,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
         aq->sq.ena = 1;
         /* Only one SMQ is allocated, map all SQ's to that SMQ  */
         aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
-       /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/
-       aq->sq.smq_rr_weight = pfvf->netdev->mtu;
+       aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
         aq->sq.default_chan = pfvf->hw.tx_chan_base;
         aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
         aq->sq.sqb_aura = sqb_aura;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h

index 1a1ae33..e07723d 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h
@@ -9,6 +9,20 @@
  
  #include "otx2_common.h"
  
+static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu)
+{
+       u32 weight;
+
+       /* On OTx2, since AF returns DWRR_MTU as '1', this logic
+        * will work on those silicons as well.
+        */
+       weight = mtu / pfvf->hw.dwrr_mtu;
+       if (mtu % pfvf->hw.dwrr_mtu)
+               weight += 1;
+
+       return weight;
+}
+
  void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq);
  void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx);
  int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c

index 7cccd80..ce799b7 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -596,6 +596,9 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
         struct otx2_hw *hw = &pfvf->hw;
         struct nix_txschq_config *req;
         u64 schq, parent;
+       u64 dwrr_val;
+
+       dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
  
         req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
         if (!req)
@@ -621,21 +624,21 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
                 req->num_regs++;
                 /* Set DWRR quantum */
                 req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq);
-               req->regval[2] =  DFLT_RR_QTM;
+               req->regval[2] =  dwrr_val;
         } else if (lvl == NIX_TXSCH_LVL_TL4) {
                 parent =  hw->txschq_list[NIX_TXSCH_LVL_TL3][0];
                 req->reg[0] = NIX_AF_TL4X_PARENT(schq);
                 req->regval[0] = parent << 16;
                 req->num_regs++;
                 req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
-               req->regval[1] = DFLT_RR_QTM;
+               req->regval[1] = dwrr_val;
         } else if (lvl == NIX_TXSCH_LVL_TL3) {
                 parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0];
                 req->reg[0] = NIX_AF_TL3X_PARENT(schq);
                 req->regval[0] = parent << 16;
                 req->num_regs++;
                 req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq);
-               req->regval[1] = DFLT_RR_QTM;
+               req->regval[1] = dwrr_val;
         } else if (lvl == NIX_TXSCH_LVL_TL2) {
                 parent =  hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
                 req->reg[0] = NIX_AF_TL2X_PARENT(schq);
@@ -643,7 +646,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
  
                 req->num_regs++;
                 req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq);
-               req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM;
+               req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val;
  
                 req->num_regs++;
                 req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq,
@@ -656,7 +659,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
                  * For VF this is always ignored.
                  */
  
-               /* Set DWRR quantum */
+               /* On CN10K, if RR_WEIGHT is greater than 16384, HW will
+                * clip it to 16384, so configuring a 24bit max value
+                * will work on both OTx2 and CN10K.
+                */
                 req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq);
                 req->regval[0] = TXSCH_TL1_DFLT_RR_QTM;
  
@@ -803,7 +809,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
         aq->sq.ena = 1;
         /* Only one SMQ is allocated, map all SQ's to that SMQ  */
         aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
-       aq->sq.smq_rr_quantum = DFLT_RR_QTM;
+       aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->max_frs);
         aq->sq.default_chan = pfvf->hw.tx_chan_base;
         aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
         aq->sq.sqb_aura = sqb_aura;
@@ -924,12 +930,14 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
                 aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt);
                 aq->cq.drop_ena = 1;
  
-               /* Enable receive CQ backpressure */
-               aq->cq.bp_ena = 1;
-               aq->cq.bpid = pfvf->bpid[0];
+               if (!is_otx2_lbkvf(pfvf->pdev)) {
+                       /* Enable receive CQ backpressure */
+                       aq->cq.bp_ena = 1;
+                       aq->cq.bpid = pfvf->bpid[0];
  
-               /* Set backpressure level is same as cq pass level */
-               aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+                       /* Set backpressure level is same as cq pass level */
+                       aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
+               }
         }
  
         /* Fill AQ info */
@@ -1186,7 +1194,7 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id,
         aq->aura.fc_hyst_bits = 0; /* Store count on all updates */
  
         /* Enable backpressure for RQ aura */
-       if (aura_id < pfvf->hw.rqpool_cnt) {
+       if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) {
                 aq->aura.bp_ena = 0;
                 aq->aura.nix0_bpid = pfvf->bpid[0];
                 /* Set backpressure level for RQ's Aura */
@@ -1666,6 +1674,11 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf)
                  * SMQ errors
                  */
                 max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN;
+
+               /* Also save DWRR MTU, needed for DWRR weight calculation */
+               pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu;
+               if (!pfvf->hw.dwrr_mtu)
+                       pfvf->hw.dwrr_mtu = 1;
         }
  
  out:
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h

index 8fd58cd..2a80cdc 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -181,6 +181,7 @@ struct otx2_hw {
         /* NIX */
         u16             txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
         u16                     matchall_ipolicer;
+       u32                     dwrr_mtu;
  
         /* HW settings, coalescing etc */
         u16                     rx_chan_base;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c

index 8df748e..b906a0e 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -298,15 +298,14 @@ static int otx2_set_channels(struct net_device *dev,
         err = otx2_set_real_num_queues(dev, channel->tx_count,
                                        channel->rx_count);
         if (err)
-               goto fail;
+               return err;
  
         pfvf->hw.rx_queues = channel->rx_count;
         pfvf->hw.tx_queues = channel->tx_count;
         pfvf->qset.cq_cnt = pfvf->hw.tx_queues +  pfvf->hw.rx_queues;
  
-fail:
         if (if_up)
-               dev->netdev_ops->ndo_open(dev);
+               err = dev->netdev_ops->ndo_open(dev);
  
         netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n",
                     pfvf->hw.tx_queues, pfvf->hw.rx_queues);
@@ -410,7 +409,7 @@ static int otx2_set_ringparam(struct net_device *netdev,
         qs->rqe_cnt = rx_count;
  
         if (if_up)
-               netdev->netdev_ops->ndo_open(netdev);
+               return netdev->netdev_ops->ndo_open(netdev);
  
         return 0;
  }
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c

index 3f03bbd..22b7af0 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1662,6 +1662,7 @@ int otx2_open(struct net_device *netdev)
  err_tx_stop_queues:
         netif_tx_stop_all_queues(netdev);
         netif_carrier_off(netdev);
+       pf->flags |= OTX2_FLAG_INTF_DOWN;
  err_free_cints:
         otx2_free_cints(pf, qidx);
         vec = pci_irq_vector(pf->pdev,
@@ -1689,6 +1690,10 @@ int otx2_stop(struct net_device *netdev)
         struct otx2_rss_info *rss;
         int qidx, vec, wrk;
  
+       /* If the DOWN flag is set resources are already freed */
+       if (pf->flags & OTX2_FLAG_INTF_DOWN)
+               return 0;
+
         netif_carrier_off(netdev);
         netif_tx_stop_all_queues(netdev);
  
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c

index d12e21d..68b442e 100644 (file)
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
@@ -390,11 +390,12 @@ static const struct devlink_ops prestera_dl_ops = {
         .trap_drop_counter_get = prestera_drop_counter_get,
  };
  
-struct prestera_switch *prestera_devlink_alloc(void)
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev)
  {
         struct devlink *dl;
  
-       dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch));
+       dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch),
+                          dev->dev);
  
         return devlink_priv(dl);
  }
@@ -411,7 +412,7 @@ int prestera_devlink_register(struct prestera_switch *sw)
         struct devlink *dl = priv_to_devlink(sw);
         int err;
  
-       err = devlink_register(dl, sw->dev->dev);
+       err = devlink_register(dl);
         if (err) {
                 dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err);
                 return err;
@@ -530,6 +531,8 @@ err_trap_register:
                 prestera_trap = &prestera_trap_items_arr[i];
                 devlink_traps_unregister(devlink, &prestera_trap->trap, 1);
         }
+       devlink_trap_groups_unregister(devlink, prestera_trap_groups_arr,
+                                      groups_count);
  err_groups_register:
         kfree(trap_data->trap_items_arr);
  err_trap_items_alloc:
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h

index 5d73aa9..cc34c3d 100644 (file)
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h
@@ -6,7 +6,7 @@
  
  #include "prestera.h"
  
-struct prestera_switch *prestera_devlink_alloc(void);
+struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev);
  void prestera_devlink_free(struct prestera_switch *sw);
  
  int prestera_devlink_register(struct prestera_switch *sw);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c

index 7c569c1..44c6708 100644 (file)
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -905,7 +905,7 @@ int prestera_device_register(struct prestera_device *dev)
         struct prestera_switch *sw;
         int err;
  
-       sw = prestera_devlink_alloc();
+       sw = prestera_devlink_alloc(dev);
         if (!sw)
                 return -ENOMEM;
  
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c

index 743ca96..dc9dd77 100644 (file)
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4884,7 +4884,7 @@ static int sky2_test_msi(struct sky2_hw *hw)
  /* This driver supports yukon2 chipset only */
  static const char *sky2_name(u8 chipid, char *buf, int sz)
  {
-       const char *name[] = {
+       static const char *const name[] = {
                 "XL",           /* 0xb3 */
                 "EC Ultra",     /* 0xb4 */
                 "Extreme",      /* 0xb5 */
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c

index 00c8465..7267c6c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3535,6 +3535,7 @@ slave_start:
  
                 if (!SRIOV_VALID_STATE(dev->flags)) {
                         mlx4_err(dev, "Invalid SRIOV state\n");
+                       err = -EINVAL;
                         goto err_close;
                 }
         }
@@ -4004,7 +4005,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
  
         printk_once(KERN_INFO "%s", mlx4_version);
  
-       devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv));
+       devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev);
         if (!devlink)
                 return -ENOMEM;
         priv = devlink_priv(devlink);
@@ -4023,7 +4024,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
         mutex_init(&dev->persist->interface_state_mutex);
         mutex_init(&dev->persist->pci_status_mutex);
  
-       ret = devlink_register(devlink, &pdev->dev);
+       ret = devlink_register(devlink);
         if (ret)
                 goto err_persist_free;
         ret = devlink_params_register(devlink, mlx4_devlink_params,
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c

index 427e7a3..2584bc0 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -917,7 +917,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
  {
         int err;
         int i;
-       enum mlx4_qp_state states[] = {
+       static const enum mlx4_qp_state states[] = {
                 MLX4_QP_STATE_RST,
                 MLX4_QP_STATE_INIT,
                 MLX4_QP_STATE_RTR,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile

index 6378dc8..33e550d 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -15,7 +15,7 @@ mlx5_core-y :=        main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
                 health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
                 transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
                 fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
-               lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
+               lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
                 diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
                 fw_reset.o qos.o
  
@@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
                 en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \
                 en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \
                 en/qos.o en/trap.o en/fs_tt_redirect.o en/rqt.o en/tir.o \
-               en/rx_res.o
+               en/rx_res.o en/channels.o
  
  #
  # Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c

index df3e493..99ec278 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -89,7 +89,8 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq,
  int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
                         u32 *in, int inlen, u32 *out, int outlen)
  {
-       int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
+       int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
+                          c_eqn_or_apu_element);
         u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
         struct mlx5_eq_comp *eq;
         int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c

index ceebfc2..def2156 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -500,10 +500,7 @@ static int next_phys_dev(struct device *dev, const void *data)
         return 1;
  }
  
-/* This function is called with two flows:
- * 1. During initialization of mlx5_core_dev and we don't need to lock it.
- * 2. During LAG configure stage and caller holds &mlx5_intf_mutex.
- */
+/* Must be called with intf_mutex held */
  struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
  {
         struct auxiliary_device *adev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c

index d791d35..f38553f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -359,9 +359,10 @@ int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
         return 0;
  }
  
-struct devlink *mlx5_devlink_alloc(void)
+struct devlink *mlx5_devlink_alloc(struct device *dev)
  {
-       return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev));
+       return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev),
+                            dev);
  }
  
  void mlx5_devlink_free(struct devlink *devlink)
@@ -638,11 +639,11 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink)
                                        ARRAY_SIZE(mlx5_trap_groups_arr));
  }
  
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
+int mlx5_devlink_register(struct devlink *devlink)
  {
         int err;
  
-       err = devlink_register(devlink, dev);
+       err = devlink_register(devlink);
         if (err)
                 return err;
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h

index 7318d44..30bf488 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -31,9 +31,9 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev);
  int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
                                   enum devlink_trap_action *action);
  
-struct devlink *mlx5_devlink_alloc(void);
+struct devlink *mlx5_devlink_alloc(struct device *dev);
  void mlx5_devlink_free(struct devlink *devlink);
-int mlx5_devlink_register(struct devlink *devlink, struct device *dev);
+int mlx5_devlink_register(struct devlink *devlink);
  void mlx5_devlink_unregister(struct devlink *devlink);
  
  #endif /* __MLX5_DEVLINK_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h

index 3566898..4f6897c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -66,8 +66,6 @@ struct page_pool;
  #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
  #define MLX5E_METADATA_ETHER_LEN 8
  
-#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
-
  #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
  
  #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
@@ -140,6 +138,7 @@ struct page_pool;
  #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW            0x2
  
  #define MLX5E_MIN_NUM_CHANNELS         0x1
+#define MLX5E_MAX_NUM_CHANNELS         (MLX5E_INDIR_RQT_SIZE / 2)
  #define MLX5E_MAX_NUM_SQS              (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
  #define MLX5E_TX_CQ_POLL_BUDGET        128
  #define MLX5E_TX_XSK_POLL_BUDGET       64
@@ -921,8 +920,6 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
                            u16 vid);
  void mlx5e_timestamp_init(struct mlx5e_priv *priv);
  
-int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv);
-
  struct mlx5e_xsk_param;
  
  struct mlx5e_rq_param;
@@ -984,9 +981,6 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
  void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
  int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
  
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
-                                  int num_channels);
-
  int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
  void mlx5e_activate_rq(struct mlx5e_rq *rq);
  void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
@@ -1036,16 +1030,6 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
  int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node);
  void mlx5e_free_di_list(struct mlx5e_rq *rq);
  
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
-
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv);
-
  int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
  void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn);
  
@@ -1133,8 +1117,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
  void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
  void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
  void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
-                           u16 num_channels);
  void mlx5e_rx_dim_work(struct work_struct *work);
  void mlx5e_tx_dim_work(struct work_struct *work);
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c

new file mode 100644 (file)

index 0000000..e7c14c0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#include "channels.h"
+#include "en.h"
+#include "en/ptp.h"
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs)
+{
+       return chs->num;
+}
+
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+       struct mlx5e_channel *c;
+
+       WARN_ON(ix >= mlx5e_channels_get_num(chs));
+       c = chs->c[ix];
+
+       *rqn = c->rq.rqn;
+}
+
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn)
+{
+       struct mlx5e_channel *c;
+
+       WARN_ON(ix >= mlx5e_channels_get_num(chs));
+       c = chs->c[ix];
+
+       if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+               return false;
+
+       *rqn = c->xskrq.rqn;
+       return true;
+}
+
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn)
+{
+       struct mlx5e_ptp *c = chs->ptp;
+
+       if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state))
+               return false;
+
+       *rqn = c->rq.rqn;
+       return true;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h

new file mode 100644 (file)

index 0000000..ca00cbc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5_EN_CHANNELS_H__
+#define __MLX5_EN_CHANNELS_H__
+
+#include <linux/kernel.h>
+
+struct mlx5e_channels;
+
+unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs);
+void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn);
+bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn);
+
+#endif /* __MLX5_EN_CHANNELS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h

index 0e053aa..e348c27 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -5,6 +5,7 @@
  #define __MLX5E_FLOW_STEER_H__
  
  #include "mod_hdr.h"
+#include "lib/fs_ttc.h"
  
  enum {
         MLX5E_TC_FT_LEVEL = 0,
@@ -67,21 +68,7 @@ struct mlx5e_l2_table {
         bool                       promisc_enabled;
  };
  
-enum mlx5e_traffic_types {
-       MLX5E_TT_IPV4_TCP,
-       MLX5E_TT_IPV6_TCP,
-       MLX5E_TT_IPV4_UDP,
-       MLX5E_TT_IPV6_UDP,
-       MLX5E_TT_IPV4_IPSEC_AH,
-       MLX5E_TT_IPV6_IPSEC_AH,
-       MLX5E_TT_IPV4_IPSEC_ESP,
-       MLX5E_TT_IPV6_IPSEC_ESP,
-       MLX5E_TT_IPV4,
-       MLX5E_TT_IPV6,
-       MLX5E_TT_ANY,
-       MLX5E_NUM_TT,
-       MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY,
-};
+#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1)
  
  #define MLX5_HASH_IP           (MLX5_HASH_FIELD_SEL_SRC_IP   |\
                                  MLX5_HASH_FIELD_SEL_DST_IP)
@@ -93,30 +80,6 @@ enum mlx5e_traffic_types {
                                  MLX5_HASH_FIELD_SEL_DST_IP   |\
                                  MLX5_HASH_FIELD_SEL_IPSEC_SPI)
  
-enum mlx5e_tunnel_types {
-       MLX5E_TT_IPV4_GRE,
-       MLX5E_TT_IPV6_GRE,
-       MLX5E_TT_IPV4_IPIP,
-       MLX5E_TT_IPV6_IPIP,
-       MLX5E_TT_IPV4_IPV6,
-       MLX5E_TT_IPV6_IPV6,
-       MLX5E_NUM_TUNNEL_TT,
-};
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
-
-struct mlx5e_ttc_rule {
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_destination default_dest;
-};
-
-/* L3/L4 traffic type classifier */
-struct mlx5e_ttc_table {
-       struct mlx5e_flow_table ft;
-       struct mlx5e_ttc_rule rules[MLX5E_NUM_TT];
-       struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT];
-};
-
  /* NIC prio FTS */
  enum {
         MLX5E_PROMISC_FT_LEVEL,
@@ -138,22 +101,6 @@ enum {
  #endif
  };
  
-#define MLX5E_TTC_NUM_GROUPS   3
-#define MLX5E_TTC_GROUP1_SIZE  (BIT(3) + MLX5E_NUM_TUNNEL_TT)
-#define MLX5E_TTC_GROUP2_SIZE   BIT(1)
-#define MLX5E_TTC_GROUP3_SIZE   BIT(0)
-#define MLX5E_TTC_TABLE_SIZE   (MLX5E_TTC_GROUP1_SIZE +\
-                                MLX5E_TTC_GROUP2_SIZE +\
-                                MLX5E_TTC_GROUP3_SIZE)
-
-#define MLX5E_INNER_TTC_NUM_GROUPS     3
-#define MLX5E_INNER_TTC_GROUP1_SIZE    BIT(3)
-#define MLX5E_INNER_TTC_GROUP2_SIZE    BIT(1)
-#define MLX5E_INNER_TTC_GROUP3_SIZE    BIT(0)
-#define MLX5E_INNER_TTC_TABLE_SIZE     (MLX5E_INNER_TTC_GROUP1_SIZE +\
-                                        MLX5E_INNER_TTC_GROUP2_SIZE +\
-                                        MLX5E_INNER_TTC_GROUP3_SIZE)
-
  struct mlx5e_priv;
  
  #ifdef CONFIG_MLX5_EN_RXNFC
@@ -222,8 +169,8 @@ struct mlx5e_flow_steering {
         struct mlx5e_promisc_table      promisc;
         struct mlx5e_vlan_table         *vlan;
         struct mlx5e_l2_table           l2;
-       struct mlx5e_ttc_table          ttc;
-       struct mlx5e_ttc_table          inner_ttc;
+       struct mlx5_ttc_table           *ttc;
+       struct mlx5_ttc_table           *inner_ttc;
  #ifdef CONFIG_MLX5_EN_ARFS
         struct mlx5e_arfs_tables       *arfs;
  #endif
@@ -235,27 +182,13 @@ struct mlx5e_flow_steering {
         struct mlx5e_ptp_fs            *ptp_fs;
  };
  
-struct ttc_params {
-       struct mlx5_flow_table_attr ft_attr;
-       u32 any_tt_tirn;
-       u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
-       struct mlx5e_ttc_table *inner_ttc;
-};
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params);
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params);
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+                         struct ttc_params *ttc_params, bool tunnel);
  
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                          struct mlx5e_ttc_table *ttc);
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
-                            struct mlx5e_ttc_table *ttc);
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv);
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv);
  
  void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
-                      struct mlx5_flow_destination *new_dest);
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type);
  
  void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv);
  void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
@@ -263,7 +196,6 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv);
  int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
  void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
  
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt);
  int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int  trap_id, int tir_num);
  void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv);
  int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int  trap_id, int tir_num);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c

index 909faa6..7aa25a5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
@@ -33,22 +33,22 @@ static char *fs_udp_type2str(enum fs_udp_type i)
         }
  }
  
-static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i)
+static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i)
  {
         switch (i) {
         case FS_IPV4_UDP:
-               return MLX5E_TT_IPV4_UDP;
+               return MLX5_TT_IPV4_UDP;
         default: /* FS_IPV6_UDP */
-               return MLX5E_TT_IPV6_UDP;
+               return MLX5_TT_IPV6_UDP;
         }
  }
  
-static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i)
+static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i)
  {
         switch (i) {
-       case MLX5E_TT_IPV4_UDP:
+       case MLX5_TT_IPV4_UDP:
                 return FS_IPV4_UDP;
-       case MLX5E_TT_IPV6_UDP:
+       case MLX5_TT_IPV6_UDP:
                 return FS_IPV6_UDP;
         default:
                 return FS_UDP_NUM_TYPES;
@@ -75,7 +75,7 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type
  
  struct mlx5_flow_handle *
  mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
-                                 enum mlx5e_traffic_types ttc_type,
+                                 enum mlx5_traffic_types ttc_type,
                                   u32 tir_num, u16 d_port)
  {
         enum fs_udp_type type = tt2fs_udp(ttc_type);
@@ -124,7 +124,7 @@ static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type typ
         fs_udp = priv->fs.udp;
         fs_udp_t = &fs_udp->tables[type];
  
-       dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type));
+       dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type));
         rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1);
         if (IS_ERR(rule)) {
                 err = PTR_ERR(rule);
@@ -259,7 +259,7 @@ static int fs_udp_disable(struct mlx5e_priv *priv)
  
         for (i = 0; i < FS_UDP_NUM_TYPES; i++) {
                 /* Modify ttc rules destination to point back to the indir TIRs */
-               err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i));
+               err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i));
                 if (err) {
                         netdev_err(priv->netdev,
                                    "%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -281,7 +281,7 @@ static int fs_udp_enable(struct mlx5e_priv *priv)
                 dest.ft = priv->fs.udp->tables[i].t;
  
                 /* Modify ttc rules destination to point on the accel_fs FTs */
-               err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest);
+               err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest);
                 if (err) {
                         netdev_err(priv->netdev,
                                    "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
@@ -401,7 +401,7 @@ static int fs_any_add_default_rule(struct mlx5e_priv *priv)
         fs_any = priv->fs.any;
         fs_any_t = &fs_any->table;
  
-       dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY);
+       dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY);
         rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1);
         if (IS_ERR(rule)) {
                 err = PTR_ERR(rule);
@@ -514,11 +514,11 @@ static int fs_any_disable(struct mlx5e_priv *priv)
         int err;
  
         /* Modify ttc rules destination to point back to the indir TIRs */
-       err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY);
+       err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY);
         if (err) {
                 netdev_err(priv->netdev,
                            "%s: modify ttc[%d] default destination failed, err(%d)\n",
-                          __func__, MLX5E_TT_ANY, err);
+                          __func__, MLX5_TT_ANY, err);
                 return err;
         }
         return 0;
@@ -533,11 +533,11 @@ static int fs_any_enable(struct mlx5e_priv *priv)
         dest.ft = priv->fs.any->table.t;
  
         /* Modify ttc rules destination to point on the accel_fs FTs */
-       err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest);
+       err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest);
         if (err) {
                 netdev_err(priv->netdev,
                            "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
-                          __func__, MLX5E_TT_ANY, err);
+                          __func__, MLX5_TT_ANY, err);
                 return err;
         }
         return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h

index 8385df2..7a70c4f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h
@@ -12,7 +12,7 @@ void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule);
  /* UDP traffic type redirect */
  struct mlx5_flow_handle *
  mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv,
-                                 enum mlx5e_traffic_types ttc_type,
+                                 enum mlx5_traffic_types ttc_type,
                                   u32 tir_num, u16 d_port);
  void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv);
  int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c

index ea321e5..4e72ca8 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c
@@ -5,11 +5,15 @@
  #include <linux/slab.h>
  #include <linux/xarray.h>
  #include <linux/hashtable.h>
+#include <linux/refcount.h>
  
  #include "mapping.h"
  
  #define MAPPING_GRACE_PERIOD 2000
  
+static LIST_HEAD(shared_ctx_list);
+static DEFINE_MUTEX(shared_ctx_lock);
+
  struct mapping_ctx {
         struct xarray xarray;
         DECLARE_HASHTABLE(ht, 8);
@@ -20,6 +24,10 @@ struct mapping_ctx {
         struct delayed_work dwork;
         struct list_head pending_list;
         spinlock_t pending_list_lock; /* Guards pending list */
+       u64 id;
+       u8 type;
+       struct list_head list;
+       refcount_t refcount;
  };
  
  struct mapping_item {
@@ -205,11 +213,48 @@ mapping_create(size_t data_size, u32 max_id, bool delayed_removal)
         mutex_init(&ctx->lock);
         xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1);
  
+       refcount_set(&ctx->refcount, 1);
+       INIT_LIST_HEAD(&ctx->list);
+
+       return ctx;
+}
+
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal)
+{
+       struct mapping_ctx *ctx;
+
+       mutex_lock(&shared_ctx_lock);
+       list_for_each_entry(ctx, &shared_ctx_list, list) {
+               if (ctx->id == id && ctx->type == type) {
+                       if (refcount_inc_not_zero(&ctx->refcount))
+                               goto unlock;
+                       break;
+               }
+       }
+
+       ctx = mapping_create(data_size, max_id, delayed_removal);
+       if (IS_ERR(ctx))
+               goto unlock;
+
+       ctx->id = id;
+       ctx->type = type;
+       list_add(&ctx->list, &shared_ctx_list);
+
+unlock:
+       mutex_unlock(&shared_ctx_lock);
         return ctx;
  }
  
  void mapping_destroy(struct mapping_ctx *ctx)
  {
+       if (!refcount_dec_and_test(&ctx->refcount))
+               return;
+
+       mutex_lock(&shared_ctx_lock);
+       list_del(&ctx->list);
+       mutex_unlock(&shared_ctx_lock);
+
         mapping_flush_work(ctx);
         xa_destroy(&ctx->xarray);
         mutex_destroy(&ctx->lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h

index 285525c..4e2119f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h
@@ -24,4 +24,9 @@ struct mapping_ctx *mapping_create(size_t data_size, u32 max_id,
                                    bool delayed_removal);
  void mapping_destroy(struct mapping_ctx *ctx);
  
+/* adds mapping with an id or get an existing mapping with the same id
+ */
+struct mapping_ctx *
+mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal);
+
  #endif /* __MLX5_MAPPING_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c

index fc602d8..3cbb596 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -483,6 +483,15 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
         param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
  }
  
+static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+{
+       bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+               MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+
+       return ro && params->lro_en ?
+               MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
+}
+
  int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
                          struct mlx5e_params *params,
                          struct mlx5e_xsk_param *xsk,
@@ -520,7 +529,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev,
         }
  
         MLX5_SET(wq, wq, wq_type,          params->rq_wq_type);
-       MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
+       MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params));
         MLX5_SET(wq, wq, log_wq_stride,
                  mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
         MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.hw_objs.pdn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c

index 849ee3e..f479ef3 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -482,8 +482,11 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c,
                 params->log_sq_size = orig->log_sq_size;
                 mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param);
         }
-       if (test_bit(MLX5E_PTP_STATE_RX, c->state))
+       /* RQ */
+       if (test_bit(MLX5E_PTP_STATE_RX, c->state)) {
+               params->vlan_strip_disable = orig->vlan_strip_disable;
                 mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams);
+       }
  }
  
  static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
@@ -494,7 +497,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params,
         int err;
  
         rq->wq_type      = params->rq_wq_type;
-       rq->pdev         = mdev->device;
+       rq->pdev         = c->pdev;
         rq->netdev       = priv->netdev;
         rq->priv         = priv;
         rq->clock        = &mdev->clock;
@@ -602,8 +605,8 @@ static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv)
  
  static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
  {
+       u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res);
         struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs;
-       u32 tirn = priv->rx_res->ptp.tir.tirn;
         struct mlx5_flow_handle *rule;
         int err;
  
@@ -614,7 +617,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
         if (err)
                 goto out_free;
  
-       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP,
+       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP,
                                                  tirn, PTP_EV_PORT);
         if (IS_ERR(rule)) {
                 err = PTR_ERR(rule);
@@ -622,7 +625,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv)
         }
         ptp_fs->udp_v4_rule = rule;
  
-       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP,
+       rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP,
                                                  tirn, PTP_EV_PORT);
         if (IS_ERR(rule)) {
                 err = PTR_ERR(rule);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c

index 38d0e9d..b915fb2 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
@@ -4,6 +4,15 @@
  #include "rqt.h"
  #include <linux/mlx5/transobj.h>
  
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+                                        unsigned int num_channels)
+{
+       unsigned int i;
+
+       for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+               indir->table[i] = i % num_channels;
+}
+
  static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev,
                           u16 max_size, u32 *init_rqns, u16 init_size)
  {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h

index d2c7664..60c985a 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
@@ -14,6 +14,9 @@ struct mlx5e_rss_params_indir {
         u32 table[MLX5E_INDIR_RQT_SIZE];
  };
  
+void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir,
+                                        unsigned int num_channels);
+
  struct mlx5e_rqt {
         struct mlx5_core_dev *mdev;
         u32 rqtn;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c

index 8fc1dfc..e2a8fe1 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -2,54 +2,56 @@
  /* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */
  
  #include "rx_res.h"
+#include "channels.h"
+#include "params.h"
  
  static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = {
-       [MLX5E_TT_IPV4_TCP] = {
+       [MLX5_TT_IPV4_TCP] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                 .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
                 .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
         },
-       [MLX5E_TT_IPV6_TCP] = {
+       [MLX5_TT_IPV6_TCP] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                 .l4_prot_type = MLX5_L4_PROT_TYPE_TCP,
                 .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
         },
-       [MLX5E_TT_IPV4_UDP] = {
+       [MLX5_TT_IPV4_UDP] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                 .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
                 .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
         },
-       [MLX5E_TT_IPV6_UDP] = {
+       [MLX5_TT_IPV6_UDP] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                 .l4_prot_type = MLX5_L4_PROT_TYPE_UDP,
                 .rx_hash_fields = MLX5_HASH_IP_L4PORTS,
         },
-       [MLX5E_TT_IPV4_IPSEC_AH] = {
+       [MLX5_TT_IPV4_IPSEC_AH] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                 .l4_prot_type = 0,
                 .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
         },
-       [MLX5E_TT_IPV6_IPSEC_AH] = {
+       [MLX5_TT_IPV6_IPSEC_AH] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                 .l4_prot_type = 0,
                 .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
         },
-       [MLX5E_TT_IPV4_IPSEC_ESP] = {
+       [MLX5_TT_IPV4_IPSEC_ESP] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                 .l4_prot_type = 0,
                 .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
         },
-       [MLX5E_TT_IPV6_IPSEC_ESP] = {
+       [MLX5_TT_IPV6_IPSEC_ESP] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                 .l4_prot_type = 0,
                 .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI,
         },
-       [MLX5E_TT_IPV4] = {
+       [MLX5_TT_IPV4] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4,
                 .l4_prot_type = 0,
                 .rx_hash_fields = MLX5_HASH_IP,
         },
-       [MLX5E_TT_IPV6] = {
+       [MLX5_TT_IPV6] = {
                 .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6,
                 .l4_prot_type = 0,
                 .rx_hash_fields = MLX5_HASH_IP,
@@ -57,13 +59,556 @@ static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_I
  };
  
  struct mlx5e_rss_params_traffic_type
-mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt)
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt)
  {
         return rss_default_config[tt];
  }
  
+struct mlx5e_rx_res {
+       struct mlx5_core_dev *mdev;
+       enum mlx5e_rx_res_features features;
+       unsigned int max_nch;
+       u32 drop_rqn;
+
+       struct {
+               struct mlx5e_rss_params_hash hash;
+               struct mlx5e_rss_params_indir indir;
+               u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
+       } rss_params;
+
+       struct mlx5e_rqt indir_rqt;
+       struct {
+               struct mlx5e_tir indir_tir;
+               struct mlx5e_tir inner_indir_tir;
+       } rss[MLX5E_NUM_INDIR_TIRS];
+
+       bool rss_active;
+       u32 rss_rqns[MLX5E_INDIR_RQT_SIZE];
+       unsigned int rss_nch;
+
+       struct {
+               struct mlx5e_rqt direct_rqt;
+               struct mlx5e_tir direct_tir;
+               struct mlx5e_rqt xsk_rqt;
+               struct mlx5e_tir xsk_tir;
+       } *channels;
+
+       struct {
+               struct mlx5e_rqt rqt;
+               struct mlx5e_tir tir;
+       } ptp;
+};
+
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
+{
+       return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL);
+}
+
+static void mlx5e_rx_res_rss_params_init(struct mlx5e_rx_res *res, unsigned int init_nch)
+{
+       enum mlx5_traffic_types tt;
+
+       res->rss_params.hash.hfunc = ETH_RSS_HASH_TOP;
+       netdev_rss_key_fill(res->rss_params.hash.toeplitz_hash_key,
+                           sizeof(res->rss_params.hash.toeplitz_hash_key));
+       mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, init_nch);
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+               res->rss_params.rx_hash_fields[tt] =
+                       mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
+}
+
+static int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res,
+                                const struct mlx5e_lro_param *init_lro_param)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       enum mlx5_traffic_types tt, max_tt;
+       struct mlx5e_tir_builder *builder;
+       u32 indir_rqtn;
+       int err;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       err = mlx5e_rqt_init_direct(&res->indir_rqt, res->mdev, true, res->drop_rqn);
+       if (err)
+               goto out;
+
+       indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt);
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               struct mlx5e_rss_params_traffic_type rss_tt;
+
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           indir_rqtn, inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
+               mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, false);
+
+               err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an indirect TIR: err = %d, tt = %d\n",
+                                      err, tt);
+                       goto err_destroy_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       if (!inner_ft_support)
+               goto out;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               struct mlx5e_rss_params_traffic_type rss_tt;
+
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           indir_rqtn, inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
+               mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, true);
+
+               err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an inner indirect TIR: err = %d, tt = %d\n",
+                                      err, tt);
+                       goto err_destroy_inner_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       goto out;
+
+err_destroy_inner_tirs:
+       max_tt = tt;
+       for (tt = 0; tt < max_tt; tt++)
+               mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
+
+       tt = MLX5E_NUM_INDIR_TIRS;
+err_destroy_tirs:
+       max_tt = tt;
+       for (tt = 0; tt < max_tt; tt++)
+               mlx5e_tir_destroy(&res->rss[tt].indir_tir);
+
+       mlx5e_rqt_destroy(&res->indir_rqt);
+
+out:
+       mlx5e_tir_builder_free(builder);
+
+       return err;
+}
+
+static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
+                                     const struct mlx5e_lro_param *init_lro_param)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_tir_builder *builder;
+       int err = 0;
+       int ix;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL);
+       if (!res->channels) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt,
+                                           res->mdev, false, res->drop_rqn);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_direct_rqts;
+               }
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                           inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               mlx5e_tir_builder_build_direct(builder);
+
+               err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_direct_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+               goto out;
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt,
+                                           res->mdev, false, res->drop_rqn);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_xsk_rqts;
+               }
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                           mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                           inner_ft_support);
+               mlx5e_tir_builder_build_lro(builder, init_lro_param);
+               mlx5e_tir_builder_build_direct(builder);
+
+               err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n",
+                                      err, ix);
+                       goto err_destroy_xsk_tirs;
+               }
+
+               mlx5e_tir_builder_clear(builder);
+       }
+
+       goto out;
+
+err_destroy_xsk_tirs:
+       while (--ix >= 0)
+               mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+
+       ix = res->max_nch;
+err_destroy_xsk_rqts:
+       while (--ix >= 0)
+               mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+
+       ix = res->max_nch;
+err_destroy_direct_tirs:
+       while (--ix >= 0)
+               mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+
+       ix = res->max_nch;
+err_destroy_direct_rqts:
+       while (--ix >= 0)
+               mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+       kvfree(res->channels);
+
+out:
+       mlx5e_tir_builder_free(builder);
+
+       return err;
+}
+
+static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res)
+{
+       bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
+       struct mlx5e_tir_builder *builder;
+       int err;
+
+       builder = mlx5e_tir_builder_alloc(false);
+       if (!builder)
+               return -ENOMEM;
+
+       err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn);
+       if (err)
+               goto out;
+
+       mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
+                                   mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                   inner_ft_support);
+       mlx5e_tir_builder_build_direct(builder);
+
+       err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true);
+       if (err)
+               goto err_destroy_ptp_rqt;
+
+       goto out;
+
+err_destroy_ptp_rqt:
+       mlx5e_rqt_destroy(&res->ptp.rqt);
+
+out:
+       mlx5e_tir_builder_free(builder);
+       return err;
+}
+
+static void mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res)
+{
+       enum mlx5_traffic_types tt;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+               mlx5e_tir_destroy(&res->rss[tt].indir_tir);
+
+       if (res->features & MLX5E_RX_RES_FEATURE_INNER_FT)
+               for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+                       mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
+
+       mlx5e_rqt_destroy(&res->indir_rqt);
+}
+
+static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res)
+{
+       unsigned int ix;
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               mlx5e_tir_destroy(&res->channels[ix].direct_tir);
+               mlx5e_rqt_destroy(&res->channels[ix].direct_rqt);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
+               mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt);
+       }
+
+       kvfree(res->channels);
+}
+
+static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
+{
+       mlx5e_tir_destroy(&res->ptp.tir);
+       mlx5e_rqt_destroy(&res->ptp.rqt);
+}
+
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+                     enum mlx5e_rx_res_features features, unsigned int max_nch,
+                     u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+                     unsigned int init_nch)
+{
+       int err;
+
+       res->mdev = mdev;
+       res->features = features;
+       res->max_nch = max_nch;
+       res->drop_rqn = drop_rqn;
+
+       mlx5e_rx_res_rss_params_init(res, init_nch);
+
+       err = mlx5e_rx_res_rss_init(res, init_lro_param);
+       if (err)
+               return err;
+
+       err = mlx5e_rx_res_channels_init(res, init_lro_param);
+       if (err)
+               goto err_rss_destroy;
+
+       err = mlx5e_rx_res_ptp_init(res);
+       if (err)
+               goto err_channels_destroy;
+
+       return 0;
+
+err_channels_destroy:
+       mlx5e_rx_res_channels_destroy(res);
+err_rss_destroy:
+       mlx5e_rx_res_rss_destroy(res);
+       return err;
+}
+
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res)
+{
+       mlx5e_rx_res_ptp_destroy(res);
+       mlx5e_rx_res_channels_destroy(res);
+       mlx5e_rx_res_rss_destroy(res);
+}
+
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res)
+{
+       kvfree(res);
+}
+
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK));
+
+       return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+       return mlx5e_tir_get_tirn(&res->rss[tt].indir_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+       WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT));
+       return mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir);
+}
+
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res)
+{
+       WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP));
+       return mlx5e_tir_get_tirn(&res->ptp.tir);
+}
+
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt);
+}
+
+static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res)
+{
+       int err;
+
+       res->rss_active = true;
+
+       err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch,
+                                      res->rss_params.hash.hfunc,
+                                      &res->rss_params.indir);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->indir_rqt), err);
+}
+
+static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res)
+{
+       int err;
+
+       res->rss_active = false;
+
+       err = mlx5e_rqt_redirect_direct(&res->indir_rqt, res->drop_rqn);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to drop RQ %#x: err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->indir_rqt), res->drop_rqn, err);
+}
+
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs)
+{
+       unsigned int nch, ix;
+       int err;
+
+       nch = mlx5e_channels_get_num(chs);
+
+       for (ix = 0; ix < chs->num; ix++)
+               mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]);
+       res->rss_nch = chs->num;
+
+       mlx5e_rx_res_rss_enable(res);
+
+       for (ix = 0; ix < nch; ix++) {
+               u32 rqn;
+
+               mlx5e_channels_get_regular_rqn(chs, ix, &rqn);
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                      rqn, ix, err);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+                       rqn = res->drop_rqn;
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                      rqn, ix, err);
+       }
+       for (ix = nch; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                      res->drop_rqn, ix, err);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                      res->drop_rqn, ix, err);
+       }
+
+       if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+               u32 rqn;
+
+               if (mlx5e_channels_get_ptp_rqn(chs, &rqn))
+                       rqn = res->drop_rqn;
+
+               err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                      rqn, err);
+       }
+}
+
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res)
+{
+       unsigned int ix;
+       int err;
+
+       mlx5e_rx_res_rss_disable(res);
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
+                                      res->drop_rqn, ix, err);
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_XSK))
+                       continue;
+
+               err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                                      res->drop_rqn, ix, err);
+       }
+
+       if (res->features & MLX5E_RX_RES_FEATURE_PTP) {
+               err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->ptp.rqt),
+                                      res->drop_rqn, err);
+       }
+}
+
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+                             unsigned int ix)
+{
+       u32 rqn;
+       int err;
+
+       if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn))
+               return -EINVAL;
+
+       err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                              rqn, ix, err);
+       return err;
+}
+
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
+{
+       int err;
+
+       err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn);
+       if (err)
+               mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n",
+                              mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
+                              res->drop_rqn, ix, err);
+       return err;
+}
+
  struct mlx5e_rss_params_traffic_type
-mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt)
+mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
  {
         struct mlx5e_rss_params_traffic_type rss_tt;
  
@@ -71,3 +616,216 @@ mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traf
         rss_tt.rx_hash_fields = res->rss_params.rx_hash_fields[tt];
         return rss_tt;
  }
+
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch)
+{
+       mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, nch);
+
+       if (!res->rss_active)
+               return;
+
+       mlx5e_rx_res_rss_enable(res);
+}
+
+void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc)
+{
+       unsigned int i;
+
+       if (indir)
+               for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+                       indir[i] = res->rss_params.indir.table[i];
+
+       if (key)
+               memcpy(key, res->rss_params.hash.toeplitz_hash_key,
+                      sizeof(res->rss_params.hash.toeplitz_hash_key));
+
+       if (hfunc)
+               *hfunc = res->rss_params.hash.hfunc;
+}
+
+static int mlx5e_rx_res_rss_update_tir(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+                                      bool inner)
+{
+       struct mlx5e_rss_params_traffic_type rss_tt;
+       struct mlx5e_tir_builder *builder;
+       struct mlx5e_tir *tir;
+       int err;
+
+       builder = mlx5e_tir_builder_alloc(true);
+       if (!builder)
+               return -ENOMEM;
+
+       rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
+
+       mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, inner);
+       tir = inner ? &res->rss[tt].inner_indir_tir : &res->rss[tt].indir_tir;
+       err = mlx5e_tir_modify(tir, builder);
+
+       mlx5e_tir_builder_free(builder);
+       return err;
+}
+
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir,
+                             const u8 *key, const u8 *hfunc)
+{
+       enum mlx5_traffic_types tt;
+       bool changed_indir = false;
+       bool changed_hash = false;
+       int err;
+
+       if (hfunc && *hfunc != res->rss_params.hash.hfunc) {
+               switch (*hfunc) {
+               case ETH_RSS_HASH_XOR:
+               case ETH_RSS_HASH_TOP:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               changed_hash = true;
+               changed_indir = true;
+               res->rss_params.hash.hfunc = *hfunc;
+       }
+
+       if (key) {
+               if (res->rss_params.hash.hfunc == ETH_RSS_HASH_TOP)
+                       changed_hash = true;
+               memcpy(res->rss_params.hash.toeplitz_hash_key, key,
+                      sizeof(res->rss_params.hash.toeplitz_hash_key));
+       }
+
+       if (indir) {
+               unsigned int i;
+
+               changed_indir = true;
+
+               for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
+                       res->rss_params.indir.table[i] = indir[i];
+       }
+
+       if (changed_indir && res->rss_active) {
+               err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch,
+                                              res->rss_params.hash.hfunc,
+                                              &res->rss_params.indir);
+               if (err)
+                       mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n",
+                                      mlx5e_rqt_get_rqtn(&res->indir_rqt), err);
+       }
+
+       if (changed_hash)
+               for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+                       err = mlx5e_rx_res_rss_update_tir(res, tt, false);
+                       if (err)
+                               mlx5_core_warn(res->mdev, "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n",
+                                              tt, err);
+
+                       if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT))
+                               continue;
+
+                       err = mlx5e_rx_res_rss_update_tir(res, tt, true);
+                       if (err)
+                               mlx5_core_warn(res->mdev, "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n",
+                                              tt, err);
+               }
+
+       return 0;
+}
+
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt)
+{
+       return res->rss_params.rx_hash_fields[tt];
+}
+
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+                                    u8 rx_hash_fields)
+{
+       u8 old_rx_hash_fields;
+       int err;
+
+       old_rx_hash_fields = res->rss_params.rx_hash_fields[tt];
+
+       if (old_rx_hash_fields == rx_hash_fields)
+               return 0;
+
+       res->rss_params.rx_hash_fields[tt] = rx_hash_fields;
+
+       err = mlx5e_rx_res_rss_update_tir(res, tt, false);
+       if (err) {
+               res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields;
+               mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n",
+                              tt, err);
+               return err;
+       }
+
+       if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT))
+               return 0;
+
+       err = mlx5e_rx_res_rss_update_tir(res, tt, true);
+       if (err) {
+               /* Partial update happened. Try to revert - it may fail too, but
+                * there is nothing more we can do.
+                */
+               res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields;
+               mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n",
+                              tt, err);
+               if (mlx5e_rx_res_rss_update_tir(res, tt, false))
+                       mlx5_core_warn(res->mdev, "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n",
+                                      tt);
+       }
+
+       return err;
+}
+
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
+{
+       struct mlx5e_tir_builder *builder;
+       enum mlx5_traffic_types tt;
+       int err, final_err;
+       unsigned int ix;
+
+       builder = mlx5e_tir_builder_alloc(true);
+       if (!builder)
+               return -ENOMEM;
+
+       mlx5e_tir_builder_build_lro(builder, lro_param);
+
+       final_err = 0;
+
+       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
+               err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n",
+                                      mlx5e_tir_get_tirn(&res->rss[tt].indir_tir), tt, err);
+                       if (!final_err)
+                               final_err = err;
+               }
+
+               if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT))
+                       continue;
+
+               err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n",
+                                      mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir), tt, err);
+                       if (!final_err)
+                               final_err = err;
+               }
+       }
+
+       for (ix = 0; ix < res->max_nch; ix++) {
+               err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
+               if (err) {
+                       mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
+                                      mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
+                       if (!final_err)
+                               final_err = err;
+               }
+       }
+
+       mlx5e_tir_builder_free(builder);
+       return final_err;
+}
+
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res)
+{
+       return res->rss_params.hash;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h

index 068e481..1baeec5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -9,39 +9,59 @@
  #include "tir.h"
  #include "fs.h"
  
-#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2)
+struct mlx5e_rx_res;
  
-struct mlx5e_rss_params {
-       struct mlx5e_rss_params_hash hash;
-       struct mlx5e_rss_params_indir indir;
-       u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS];
-};
+struct mlx5e_channels;
+struct mlx5e_rss_params_hash;
  
-struct mlx5e_rx_res {
-       struct mlx5e_rss_params rss_params;
-
-       struct mlx5e_rqt indir_rqt;
-       struct {
-               struct mlx5e_tir indir_tir;
-               struct mlx5e_tir inner_indir_tir;
-       } rss[MLX5E_NUM_INDIR_TIRS];
-
-       struct {
-               struct mlx5e_rqt direct_rqt;
-               struct mlx5e_tir direct_tir;
-               struct mlx5e_rqt xsk_rqt;
-               struct mlx5e_tir xsk_tir;
-       } channels[MLX5E_MAX_NUM_CHANNELS];
-
-       struct {
-               struct mlx5e_rqt rqt;
-               struct mlx5e_tir tir;
-       } ptp;
+enum mlx5e_rx_res_features {
+       MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0),
+       MLX5E_RX_RES_FEATURE_XSK = BIT(1),
+       MLX5E_RX_RES_FEATURE_PTP = BIT(2),
  };
  
  struct mlx5e_rss_params_traffic_type
-mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt);
+mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt);
+
+/* Setup */
+struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
+int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
+                     enum mlx5e_rx_res_features features, unsigned int max_nch,
+                     u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+                     unsigned int init_nch);
+void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
+void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
+
+/* TIRN getters for flow steering */
+u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix);
+u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res);
+
+/* RQTN getters for modules that create their own TIRs */
+u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Activate/deactivate API */
+void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs);
+void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res);
+int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs,
+                             unsigned int ix);
+int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix);
+
+/* Configuration API */
  struct mlx5e_rss_params_traffic_type
-mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt);
+mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch);
+void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc);
+int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir,
+                             const u8 *key, const u8 *hfunc);
+u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
+int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
+                                    u8 rx_hash_fields);
+int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
+
+/* Workaround for hairpin */
+struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res);
  
  #endif /* __MLX5_EN_RX_RES_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c

index 91e7a01..b1707b8 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -2138,6 +2138,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
         struct mlx5_tc_ct_priv *ct_priv;
         struct mlx5_core_dev *dev;
         const char *msg;
+       u64 mapping_id;
         int err;
  
         dev = priv->mdev;
@@ -2153,13 +2154,17 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
         if (!ct_priv)
                 goto err_alloc;
  
-       ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
+       mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+       ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
+                                                     sizeof(u16), 0, true);
         if (IS_ERR(ct_priv->zone_mapping)) {
                 err = PTR_ERR(ct_priv->zone_mapping);
                 goto err_mapping_zone;
         }
  
-       ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
+       ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
+                                                       sizeof(u32) * 4, 0, true);
         if (IS_ERR(ct_priv->labels_mapping)) {
                 err = PTR_ERR(ct_priv->labels_mapping);
                 goto err_mapping_labels;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c

index afaf5b4..d54607a 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -37,7 +37,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params
         struct mlx5e_priv *priv = t->priv;
  
         rq->wq_type      = params->rq_wq_type;
-       rq->pdev         = mdev->device;
+       rq->pdev         = t->pdev;
         rq->netdev       = priv->netdev;
         rq->priv         = priv;
         rq->clock        = &mdev->clock;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c

index 71e8d66..7b562d2 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c
@@ -122,7 +122,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
          * any Fill Ring entries at the setup stage.
          */
  
-       err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]);
+       err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix);
         if (unlikely(err))
                 goto err_deactivate;
  
@@ -169,7 +169,7 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix)
                 goto remove_pool;
  
         c = priv->channels.c[ix];
-       mlx5e_xsk_redirect_rqt_to_drop(priv, ix);
+       mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix);
         mlx5e_deactivate_xsk(c);
         mlx5e_close_xsk(c);
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c

index ab485d0..c062674 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -183,59 +183,3 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c)
         mlx5e_deactivate_rq(&c->xskrq);
         /* TX queue is disabled on close. */
  }
-
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c)
-{
-       return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[c->ix].xsk_rqt, c->xskrq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix)
-{
-       return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[ix].xsk_rqt, priv->drop_rq.rqn);
-}
-
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
-       int err, i;
-
-       if (!priv->xsk.refcnt)
-               return 0;
-
-       for (i = 0; i < chs->num; i++) {
-               struct mlx5e_channel *c = chs->c[i];
-
-               if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
-                       continue;
-
-               err = mlx5e_xsk_redirect_rqt_to_channel(priv, c);
-               if (unlikely(err))
-                       goto err_stop;
-       }
-
-       return 0;
-
-err_stop:
-       for (i--; i >= 0; i--) {
-               if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
-                       continue;
-
-               mlx5e_xsk_redirect_rqt_to_drop(priv, i);
-       }
-
-       return err;
-}
-
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
-{
-       int i;
-
-       if (!priv->xsk.refcnt)
-               return;
-
-       for (i = 0; i < chs->num; i++) {
-               if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state))
-                       continue;
-
-               mlx5e_xsk_redirect_rqt_to_drop(priv, i);
-       }
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h

index ca20f1f..50e111b 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h
@@ -17,9 +17,5 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
  void mlx5e_close_xsk(struct mlx5e_channel *c);
  void mlx5e_activate_xsk(struct mlx5e_channel *c);
  void mlx5e_deactivate_xsk(struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c);
-int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix);
-int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
-void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs);
  
  #endif /* __MLX5_EN_XSK_SETUP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c

index e51f60b..4c4ee52 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -16,13 +16,13 @@ struct mlx5e_accel_fs_tcp {
         struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES];
  };
  
-static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
+static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
  {
         switch (i) {
         case ACCEL_FS_IPV4_TCP:
-               return MLX5E_TT_IPV4_TCP;
+               return MLX5_TT_IPV4_TCP;
         default: /* ACCEL_FS_IPV6_TCP */
-               return MLX5E_TT_IPV6_TCP;
+               return MLX5_TT_IPV6_TCP;
         }
  }
  
@@ -161,7 +161,7 @@ static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv,
         fs_tcp = priv->fs.accel_tcp;
         accel_fs_t = &fs_tcp->tables[type];
  
-       dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type));
+       dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type));
         rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
         if (IS_ERR(rule)) {
                 err = PTR_ERR(rule);
@@ -307,7 +307,7 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
  
         for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
                 /* Modify ttc rules destination to point back to the indir TIRs */
-               err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i));
+               err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i));
                 if (err) {
                         netdev_err(priv->netdev,
                                    "%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -329,7 +329,7 @@ static int accel_fs_tcp_enable(struct mlx5e_priv *priv)
                 dest.ft = priv->fs.accel_tcp->tables[i].t;
  
                 /* Modify ttc rules destination to point on the accel_fs FTs */
-               err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest);
+               err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest);
                 if (err) {
                         netdev_err(priv->netdev,
                                    "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c

index 34119ce..17da23d 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -41,11 +41,11 @@ struct mlx5e_ipsec_tx {
  };
  
  /* IPsec RX flow steering */
-static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
+static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i)
  {
         if (i == ACCEL_FS_ESP4)
-               return MLX5E_TT_IPV4_IPSEC_ESP;
-       return MLX5E_TT_IPV6_IPSEC_ESP;
+               return MLX5_TT_IPV4_IPSEC_ESP;
+       return MLX5_TT_IPV6_IPSEC_ESP;
  }
  
  static int rx_err_add_rule(struct mlx5e_priv *priv,
@@ -265,7 +265,8 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
         accel_esp = priv->ipsec->rx_fs;
         fs_prot = &accel_esp->fs_prot[type];
  
-       fs_prot->default_dest = mlx5e_ttc_get_default_dest(priv, fs_esp2tt(type));
+       fs_prot->default_dest =
+               mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type));
  
         err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err);
         if (err)
@@ -301,7 +302,7 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
         /* connect */
         dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
         dest.ft = fs_prot->ft;
-       mlx5e_ttc_fwd_dest(priv, fs_esp2tt(type), &dest);
+       mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest);
  
  out:
         mutex_unlock(&fs_prot->prot_mutex);
@@ -320,7 +321,7 @@ static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type)
                 goto out;
  
         /* disconnect */
-       mlx5e_ttc_fwd_default_dest(priv, fs_esp2tt(type));
+       mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type));
  
         /* remove FT */
         rx_destroy(priv, type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c

index bfdbc30..62abce0 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -628,7 +628,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
         priv_rx->sw_stats = &priv->tls->sw_stats;
         mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
  
-       rqtn = mlx5e_rqt_get_rqtn(&priv->rx_res->channels[rxq].direct_rqt);
+       rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq);
  
         err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn);
         if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c

index db6c6a9..fe5d82f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -98,17 +98,17 @@ struct arfs_rule {
         for (j = 0; j < ARFS_HASH_SIZE; j++) \
                 hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist)
  
-static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type)
+static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type)
  {
         switch (type) {
         case ARFS_IPV4_TCP:
-               return MLX5E_TT_IPV4_TCP;
+               return MLX5_TT_IPV4_TCP;
         case ARFS_IPV4_UDP:
-               return MLX5E_TT_IPV4_UDP;
+               return MLX5_TT_IPV4_UDP;
         case ARFS_IPV6_TCP:
-               return MLX5E_TT_IPV6_TCP;
+               return MLX5_TT_IPV6_TCP;
         case ARFS_IPV6_UDP:
-               return MLX5E_TT_IPV6_UDP;
+               return MLX5_TT_IPV6_UDP;
         default:
                 return -EINVAL;
         }
@@ -120,7 +120,7 @@ static int arfs_disable(struct mlx5e_priv *priv)
  
         for (i = 0; i < ARFS_NUM_TYPES; i++) {
                 /* Modify ttc rules destination back to their default */
-               err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i));
+               err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i));
                 if (err) {
                         netdev_err(priv->netdev,
                                    "%s: modify ttc[%d] default destination failed, err(%d)\n",
@@ -149,7 +149,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
         for (i = 0; i < ARFS_NUM_TYPES; i++) {
                 dest.ft = priv->fs.arfs->arfs_tables[i].ft.t;
                 /* Modify ttc rules destination to point on the aRFS FTs */
-               err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest);
+               err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest);
                 if (err) {
                         netdev_err(priv->netdev,
                                    "%s: modify ttc[%d] dest to arfs, failed err(%d)\n",
@@ -194,7 +194,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
         struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type];
         struct mlx5_flow_destination dest = {};
         MLX5_DECLARE_FLOW_ACT(flow_act);
-       enum mlx5e_traffic_types tt;
+       enum mlx5_traffic_types tt;
         int err = 0;
  
         dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
@@ -205,10 +205,10 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
                 return -EINVAL;
         }
  
-       /* FIXME: Must use mlx5e_ttc_get_default_dest(),
+       /* FIXME: Must use mlx5_ttc_get_default_dest(),
          * but can't since TTC default is not setup yet !
          */
-       dest.tir_num = priv->rx_res->rss[tt].indir_tir.tirn;
+       dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
         arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL,
                                                    &flow_act,
                                                    &dest, 1);
@@ -552,7 +552,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
                        16);
         }
         dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       dest.tir_num = priv->rx_res->channels[arfs_rule->rxq].direct_tir.tirn;
+       dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq);
         rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
         if (IS_ERR(rule)) {
                 err = PTR_ERR(rule);
@@ -575,7 +575,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
         int err = 0;
  
         dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       dst.tir_num = priv->rx_res->channels[rxq].direct_tir.tirn;
+       dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq);
         err =  mlx5_modify_rule_destination(rule, &dst, NULL);
         if (err)
                 netdev_warn(priv->netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

index 9264d18..2cf59bb 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1172,7 +1172,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev,
  
  u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
  {
-       return sizeof(priv->rx_res->rss_params.hash.toeplitz_hash_key);
+       return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key);
  }
  
  static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
@@ -1198,18 +1198,10 @@ int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
                    u8 *hfunc)
  {
         struct mlx5e_priv *priv = netdev_priv(netdev);
-       struct mlx5e_rss_params *rss;
  
-       rss = &priv->rx_res->rss_params;
-
-       if (indir)
-               memcpy(indir, rss->indir.table, sizeof(rss->indir.table));
-
-       if (key)
-               memcpy(key, rss->hash.toeplitz_hash_key, sizeof(rss->hash.toeplitz_hash_key));
-
-       if (hfunc)
-               *hfunc = rss->hash.hfunc;
+       mutex_lock(&priv->state_lock);
+       mlx5e_rx_res_rss_get_rxfh(priv->rx_res, indir, key, hfunc);
+       mutex_unlock(&priv->state_lock);
  
         return 0;
  }
@@ -1218,58 +1210,13 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
                    const u8 *key, const u8 hfunc)
  {
         struct mlx5e_priv *priv = netdev_priv(dev);
-       struct mlx5e_rss_params *rss;
-       bool refresh_tirs = false;
-       bool refresh_rqt = false;
-
-       if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
-           (hfunc != ETH_RSS_HASH_XOR) &&
-           (hfunc != ETH_RSS_HASH_TOP))
-               return -EINVAL;
+       int err;
  
         mutex_lock(&priv->state_lock);
-
-       rss = &priv->rx_res->rss_params;
-
-       if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hash.hfunc) {
-               rss->hash.hfunc = hfunc;
-               refresh_rqt = true;
-               refresh_tirs = true;
-       }
-
-       if (indir) {
-               memcpy(rss->indir.table, indir, sizeof(rss->indir.table));
-               refresh_rqt = true;
-       }
-
-       if (key) {
-               memcpy(rss->hash.toeplitz_hash_key, key, sizeof(rss->hash.toeplitz_hash_key));
-               refresh_tirs = refresh_tirs || rss->hash.hfunc == ETH_RSS_HASH_TOP;
-       }
-
-       if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-               u32 *rqns;
-
-               rqns = kvmalloc_array(priv->channels.num, sizeof(*rqns), GFP_KERNEL);
-               if (rqns) {
-                       unsigned int ix;
-
-                       for (ix = 0; ix < priv->channels.num; ix++)
-                               rqns[ix] = priv->channels.c[ix]->rq.rqn;
-
-                       mlx5e_rqt_redirect_indir(&priv->rx_res->indir_rqt, rqns,
-                                                priv->channels.num,
-                                                rss->hash.hfunc, &rss->indir);
-                       kvfree(rqns);
-               }
-       }
-
-       if (refresh_tirs)
-               mlx5e_modify_tirs_hash(priv);
-
+       err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, indir, key,
+                                       hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc);
         mutex_unlock(&priv->state_lock);
-
-       return 0;
+       return err;
  }
  
  #define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC                100
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c

index e798157..5c754e9 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -718,7 +718,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv)
         if (!spec)
                 return -ENOMEM;
         dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-       dest.ft = priv->fs.ttc.ft.t;
+       dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
  
         rule_p = &priv->fs.promisc.rule;
         *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
@@ -854,587 +854,59 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft)
         ft->t = NULL;
  }
  
-static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
-{
-       int i;
-
-       for (i = 0; i < MLX5E_NUM_TT; i++) {
-               if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
-                       mlx5_del_flow_rules(ttc->rules[i].rule);
-                       ttc->rules[i].rule = NULL;
-               }
-       }
-
-       for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) {
-               if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
-                       mlx5_del_flow_rules(ttc->tunnel_rules[i]);
-                       ttc->tunnel_rules[i] = NULL;
-               }
-       }
-}
-
-struct mlx5e_etype_proto {
-       u16 etype;
-       u8 proto;
-};
-
-static struct mlx5e_etype_proto ttc_rules[] = {
-       [MLX5E_TT_IPV4_TCP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_TCP,
-       },
-       [MLX5E_TT_IPV6_TCP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_TCP,
-       },
-       [MLX5E_TT_IPV4_UDP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_UDP,
-       },
-       [MLX5E_TT_IPV6_UDP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_UDP,
-       },
-       [MLX5E_TT_IPV4_IPSEC_AH] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_AH,
-       },
-       [MLX5E_TT_IPV6_IPSEC_AH] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_AH,
-       },
-       [MLX5E_TT_IPV4_IPSEC_ESP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_ESP,
-       },
-       [MLX5E_TT_IPV6_IPSEC_ESP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_ESP,
-       },
-       [MLX5E_TT_IPV4] = {
-               .etype = ETH_P_IP,
-               .proto = 0,
-       },
-       [MLX5E_TT_IPV6] = {
-               .etype = ETH_P_IPV6,
-               .proto = 0,
-       },
-       [MLX5E_TT_ANY] = {
-               .etype = 0,
-               .proto = 0,
-       },
-};
-
-static struct mlx5e_etype_proto ttc_tunnel_rules[] = {
-       [MLX5E_TT_IPV4_GRE] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_GRE,
-       },
-       [MLX5E_TT_IPV6_GRE] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_GRE,
-       },
-       [MLX5E_TT_IPV4_IPIP] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_IPIP,
-       },
-       [MLX5E_TT_IPV6_IPIP] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_IPIP,
-       },
-       [MLX5E_TT_IPV4_IPV6] = {
-               .etype = ETH_P_IP,
-               .proto = IPPROTO_IPV6,
-       },
-       [MLX5E_TT_IPV6_IPV6] = {
-               .etype = ETH_P_IPV6,
-               .proto = IPPROTO_IPV6,
-       },
-
-};
-
-u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt)
-{
-       return ttc_tunnel_rules[tt].proto;
-}
-
-static bool mlx5e_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, u8 proto_type)
-{
-       switch (proto_type) {
-       case IPPROTO_GRE:
-               return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
-       case IPPROTO_IPIP:
-       case IPPROTO_IPV6:
-               return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
-                       MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
-       default:
-               return false;
-       }
-}
-
-static bool mlx5e_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
-{
-       int tt;
-
-       for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
-               if (mlx5e_tunnel_proto_supported_rx(mdev, ttc_tunnel_rules[tt].proto))
-                       return true;
-       }
-       return false;
-}
-
-bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
-{
-       return (mlx5e_tunnel_any_rx_proto_supported(mdev) &&
-               MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version));
-}
-
-static u8 mlx5e_etype_to_ipv(u16 ethertype)
-{
-       if (ethertype == ETH_P_IP)
-               return 4;
-
-       if (ethertype == ETH_P_IPV6)
-               return 6;
-
-       return 0;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
-                       struct mlx5_flow_table *ft,
-                       struct mlx5_flow_destination *dest,
-                       u16 etype,
-                       u8 proto)
-{
-       int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
-       MLX5_DECLARE_FLOW_ACT(flow_act);
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_spec *spec;
-       int err = 0;
-       u8 ipv;
-
-       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-       if (!spec)
-               return ERR_PTR(-ENOMEM);
-
-       if (proto) {
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
-               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
-       }
-
-       ipv = mlx5e_etype_to_ipv(etype);
-       if (match_ipv_outer && ipv) {
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
-               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
-       } else if (etype) {
-               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
-               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
-       }
-
-       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
-       if (IS_ERR(rule)) {
-               err = PTR_ERR(rule);
-               netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
-       }
-
-       kvfree(spec);
-       return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv,
-                                         struct ttc_params *params,
-                                         struct mlx5e_ttc_table *ttc)
-{
-       struct mlx5_flow_destination dest = {};
-       struct mlx5_flow_handle **trules;
-       struct mlx5e_ttc_rule *rules;
-       struct mlx5_flow_table *ft;
-       int tt;
-       int err;
-
-       ft = ttc->ft.t;
-       rules = ttc->rules;
-
-       dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-       for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
-               struct mlx5e_ttc_rule *rule = &rules[tt];
-
-               if (tt == MLX5E_TT_ANY)
-                       dest.tir_num = params->any_tt_tirn;
-               else
-                       dest.tir_num = params->indir_tirn[tt];
-
-               rule->rule = mlx5e_generate_ttc_rule(priv, ft, &dest,
-                                                    ttc_rules[tt].etype,
-                                                    ttc_rules[tt].proto);
-               if (IS_ERR(rule->rule)) {
-                       err = PTR_ERR(rule->rule);
-                       rule->rule = NULL;
-                       goto del_rules;
-               }
-               rule->default_dest = dest;
-       }
-
-       if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               return 0;
-
-       trules    = ttc->tunnel_rules;
-       dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-       dest.ft   = params->inner_ttc->ft.t;
-       for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
-               if (!mlx5e_tunnel_proto_supported_rx(priv->mdev,
-                                                    ttc_tunnel_rules[tt].proto))
-                       continue;
-               trules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest,
-                                                    ttc_tunnel_rules[tt].etype,
-                                                    ttc_tunnel_rules[tt].proto);
-               if (IS_ERR(trules[tt])) {
-                       err = PTR_ERR(trules[tt]);
-                       trules[tt] = NULL;
-                       goto del_rules;
-               }
-       }
-
-       return 0;
-
-del_rules:
-       mlx5e_cleanup_ttc_rules(ttc);
-       return err;
-}
-
-static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
-                                        bool use_ipv)
-{
-       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int ix = 0;
-       u32 *in;
-       int err;
-       u8 *mc;
-
-       ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS,
-                       sizeof(*ft->g), GFP_KERNEL);
-       if (!ft->g)
-               return -ENOMEM;
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in) {
-               kfree(ft->g);
-               ft->g = NULL;
-               return -ENOMEM;
-       }
-
-       /* L4 Group */
-       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
-       MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
-       if (use_ipv)
-               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
-       else
-               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
-       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_TTC_GROUP1_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* L3 Group */
-       MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_TTC_GROUP2_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* Any Group */
-       memset(in, 0, inlen);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_TTC_GROUP3_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       kvfree(in);
-       return 0;
-
-err:
-       err = PTR_ERR(ft->g[ft->num_groups]);
-       ft->g[ft->num_groups] = NULL;
-       kvfree(in);
-
-       return err;
-}
-
-static struct mlx5_flow_handle *
-mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv,
-                             struct mlx5_flow_table *ft,
-                             struct mlx5_flow_destination *dest,
-                             u16 etype, u8 proto)
-{
-       MLX5_DECLARE_FLOW_ACT(flow_act);
-       struct mlx5_flow_handle *rule;
-       struct mlx5_flow_spec *spec;
-       int err = 0;
-       u8 ipv;
-
-       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-       if (!spec)
-               return ERR_PTR(-ENOMEM);
-
-       ipv = mlx5e_etype_to_ipv(etype);
-       if (etype && ipv) {
-               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
-               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
-       }
-
-       if (proto) {
-               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
-               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
-               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
-       }
-
-       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
-       if (IS_ERR(rule)) {
-               err = PTR_ERR(rule);
-               netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
-       }
-
-       kvfree(spec);
-       return err ? ERR_PTR(err) : rule;
-}
-
-static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv,
-                                               struct ttc_params *params,
-                                               struct mlx5e_ttc_table *ttc)
-{
-       struct mlx5_flow_destination dest = {};
-       struct mlx5e_ttc_rule *rules;
-       struct mlx5_flow_table *ft;
-       int err;
-       int tt;
-
-       ft = ttc->ft.t;
-       rules = ttc->rules;
-       dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-
-       for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
-               struct mlx5e_ttc_rule *rule = &rules[tt];
-
-               if (tt == MLX5E_TT_ANY)
-                       dest.tir_num = params->any_tt_tirn;
-               else
-                       dest.tir_num = params->indir_tirn[tt];
-
-               rule->rule = mlx5e_generate_inner_ttc_rule(priv, ft, &dest,
-                                                          ttc_rules[tt].etype,
-                                                          ttc_rules[tt].proto);
-               if (IS_ERR(rule->rule)) {
-                       err = PTR_ERR(rule->rule);
-                       rule->rule = NULL;
-                       goto del_rules;
-               }
-               rule->default_dest = dest;
-       }
-
-       return 0;
-
-del_rules:
-
-       mlx5e_cleanup_ttc_rules(ttc);
-       return err;
-}
-
-static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc)
-{
-       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int ix = 0;
-       u32 *in;
-       int err;
-       u8 *mc;
-
-       ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
-       if (!ft->g)
-               return -ENOMEM;
-       in = kvzalloc(inlen, GFP_KERNEL);
-       if (!in) {
-               kfree(ft->g);
-               ft->g = NULL;
-               return -ENOMEM;
-       }
-
-       /* L4 Group */
-       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
-       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
-       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
-       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_INNER_TTC_GROUP1_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* L3 Group */
-       MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_INNER_TTC_GROUP2_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       /* Any Group */
-       memset(in, 0, inlen);
-       MLX5_SET_CFG(in, start_flow_index, ix);
-       ix += MLX5E_INNER_TTC_GROUP3_SIZE;
-       MLX5_SET_CFG(in, end_flow_index, ix - 1);
-       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
-       if (IS_ERR(ft->g[ft->num_groups]))
-               goto err;
-       ft->num_groups++;
-
-       kvfree(in);
-       return 0;
-
-err:
-       err = PTR_ERR(ft->g[ft->num_groups]);
-       ft->g[ft->num_groups] = NULL;
-       kvfree(in);
-
-       return err;
-}
-
-void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv,
-                               struct ttc_params *ttc_params)
-{
-       ttc_params->any_tt_tirn = priv->rx_res->channels[0].direct_tir.tirn;
-       ttc_params->inner_ttc = &priv->fs.inner_ttc;
-}
-
-static void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params)
+static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv,
+                                      struct ttc_params *ttc_params)
  {
         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+       int tt;
  
-       ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE;
+       memset(ttc_params, 0, sizeof(*ttc_params));
+       ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+                                                MLX5_FLOW_NAMESPACE_KERNEL);
         ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL;
         ft_attr->prio = MLX5E_NIC_PRIO;
+
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+               ttc_params->dests[tt].tir_num =
+                       tt == MLX5_TT_ANY ?
+                               mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+                               mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res,
+                                                               tt);
+       }
  }
  
-void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params)
+void mlx5e_set_ttc_params(struct mlx5e_priv *priv,
+                         struct ttc_params *ttc_params, bool tunnel)
  
  {
         struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
+       int tt;
  
-       ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
+       memset(ttc_params, 0, sizeof(*ttc_params));
+       ttc_params->ns = mlx5_get_flow_namespace(priv->mdev,
+                                                MLX5_FLOW_NAMESPACE_KERNEL);
         ft_attr->level = MLX5E_TTC_FT_LEVEL;
         ft_attr->prio = MLX5E_NIC_PRIO;
-}
-
-static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                                       struct mlx5e_ttc_table *ttc)
-{
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int err;
  
-       ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
-       if (IS_ERR(ft->t)) {
-               err = PTR_ERR(ft->t);
-               ft->t = NULL;
-               return err;
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+               ttc_params->dests[tt].tir_num =
+                       tt == MLX5_TT_ANY ?
+                               mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) :
+                               mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt);
         }
  
-       err = mlx5e_create_inner_ttc_table_groups(ttc);
-       if (err)
-               goto err;
-
-       err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc);
-       if (err)
-               goto err;
-
-       return 0;
-
-err:
-       mlx5e_destroy_flow_table(ft);
-       return err;
-}
-
-static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv,
-                                         struct mlx5e_ttc_table *ttc)
-{
-       mlx5e_cleanup_ttc_rules(ttc);
-       mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv,
-                            struct mlx5e_ttc_table *ttc)
-{
-       mlx5e_cleanup_ttc_rules(ttc);
-       mlx5e_destroy_flow_table(&ttc->ft);
-}
-
-int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params,
-                          struct mlx5e_ttc_table *ttc)
-{
-       bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version);
-       struct mlx5e_flow_table *ft = &ttc->ft;
-       int err;
+       ttc_params->inner_ttc = tunnel;
+       if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev))
+               return;
  
-       ft->t = mlx5_create_flow_table(priv->fs.ns, &params->ft_attr);
-       if (IS_ERR(ft->t)) {
-               err = PTR_ERR(ft->t);
-               ft->t = NULL;
-               return err;
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               ttc_params->tunnel_dests[tt].type =
+                       MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+               ttc_params->tunnel_dests[tt].ft =
+                       mlx5_get_ttc_flow_table(priv->fs.inner_ttc);
         }
-
-       err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer);
-       if (err)
-               goto err;
-
-       err = mlx5e_generate_ttc_table_rules(priv, params, ttc);
-       if (err)
-               goto err;
-
-       return 0;
-err:
-       mlx5e_destroy_flow_table(ft);
-       return err;
-}
-
-int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type,
-                      struct mlx5_flow_destination *new_dest)
-{
-       return mlx5_modify_rule_destination(priv->fs.ttc.rules[type].rule, new_dest, NULL);
-}
-
-struct mlx5_flow_destination
-mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
-       struct mlx5_flow_destination *dest = &priv->fs.ttc.rules[type].default_dest;
-
-       WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
-                 "TTC[%d] default dest is not setup yet", type);
-
-       return *dest;
-}
-
-int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type)
-{
-       struct mlx5_flow_destination dest = mlx5e_ttc_get_default_dest(priv, type);
-
-       return mlx5e_ttc_fwd_dest(priv, type, &dest);
  }
  
  static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
@@ -1467,7 +939,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
                                outer_headers.dmac_47_16);
  
         dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-       dest.ft = priv->fs.ttc.ft.t;
+       dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
  
         switch (type) {
         case MLX5E_FULLMATCH:
@@ -1763,10 +1235,46 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
         kvfree(priv->fs.vlan);
  }
  
-int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv)
+{
+       if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+               return;
+       mlx5_destroy_ttc_table(priv->fs.inner_ttc);
+}
+
+void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv)
+{
+       mlx5_destroy_ttc_table(priv->fs.ttc);
+}
+
+static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv)
  {
         struct ttc_params ttc_params = {};
-       int tt, err;
+
+       if (!mlx5_tunnel_inner_ft_supported(priv->mdev))
+               return 0;
+
+       mlx5e_set_inner_ttc_params(priv, &ttc_params);
+       priv->fs.inner_ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(priv->fs.inner_ttc))
+               return PTR_ERR(priv->fs.inner_ttc);
+       return 0;
+}
+
+int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
+{
+       struct ttc_params ttc_params = {};
+
+       mlx5e_set_ttc_params(priv, &ttc_params, true);
+       priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(priv->fs.ttc))
+               return PTR_ERR(priv->fs.ttc);
+       return 0;
+}
+
+int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
+{
+       int err;
  
         priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
                                                MLX5_FLOW_NAMESPACE_KERNEL);
@@ -1781,26 +1289,15 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv)
                 priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
         }
  
-       mlx5e_set_ttc_basic_params(priv, &ttc_params);
-
-       if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) {
-               mlx5e_set_inner_ttc_ft_params(&ttc_params);
-               for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-                       ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].inner_indir_tir.tirn;
-
-               err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc);
-               if (err) {
-                       netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n",
-                                  err);
-                       goto err_destroy_arfs_tables;
-               }
+       err = mlx5e_create_inner_ttc_table(priv);
+       if (err) {
+               netdev_err(priv->netdev,
+                          "Failed to create inner ttc table, err=%d\n",
+                          err);
+               goto err_destroy_arfs_tables;
         }
  
-       mlx5e_set_ttc_ft_params(&ttc_params);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn;
-
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+       err = mlx5e_create_ttc_table(priv);
         if (err) {
                 netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
                            err);
@@ -1834,10 +1331,9 @@ err_destory_vlan_table:
  err_destroy_l2_table:
         mlx5e_destroy_l2_table(priv);
  err_destroy_ttc_table:
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+       mlx5e_destroy_ttc_table(priv);
  err_destroy_inner_ttc_table:
-       if (mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+       mlx5e_destroy_inner_ttc_table(priv);
  err_destroy_arfs_tables:
         mlx5e_arfs_destroy_tables(priv);
  
@@ -1849,9 +1345,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
         mlx5e_ptp_free_rx_fs(priv);
         mlx5e_destroy_vlan_table(priv);
         mlx5e_destroy_l2_table(priv);
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-       if (mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc);
+       mlx5e_destroy_ttc_table(priv);
+       mlx5e_destroy_inner_ttc_table(priv);
         mlx5e_arfs_destroy_tables(priv);
         mlx5e_ethtool_cleanup_steering(priv);
  }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c

index 494f6f8..3d8918f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -433,9 +433,9 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv,
  
                 dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
                 if (group == MLX5E_RQ_GROUP_XSK)
-                       dst->tir_num = priv->rx_res->channels[ix].xsk_tir.tirn;
+                       dst->tir_num = mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix);
                 else
-                       dst->tir_num = priv->rx_res->channels[ix].direct_tir.tirn;
+                       dst->tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix);
                 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
         }
  
@@ -786,43 +786,44 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv)
         INIT_LIST_HEAD(&priv->fs.ethtool.rules);
  }
  
-static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type)
+static int flow_type_to_traffic_type(u32 flow_type)
  {
         switch (flow_type) {
         case TCP_V4_FLOW:
-               return  MLX5E_TT_IPV4_TCP;
+               return MLX5_TT_IPV4_TCP;
         case TCP_V6_FLOW:
-               return MLX5E_TT_IPV6_TCP;
+               return MLX5_TT_IPV6_TCP;
         case UDP_V4_FLOW:
-               return MLX5E_TT_IPV4_UDP;
+               return MLX5_TT_IPV4_UDP;
         case UDP_V6_FLOW:
-               return MLX5E_TT_IPV6_UDP;
+               return MLX5_TT_IPV6_UDP;
         case AH_V4_FLOW:
-               return MLX5E_TT_IPV4_IPSEC_AH;
+               return MLX5_TT_IPV4_IPSEC_AH;
         case AH_V6_FLOW:
-               return MLX5E_TT_IPV6_IPSEC_AH;
+               return MLX5_TT_IPV6_IPSEC_AH;
         case ESP_V4_FLOW:
-               return MLX5E_TT_IPV4_IPSEC_ESP;
+               return MLX5_TT_IPV4_IPSEC_ESP;
         case ESP_V6_FLOW:
-               return MLX5E_TT_IPV6_IPSEC_ESP;
+               return MLX5_TT_IPV6_IPSEC_ESP;
         case IPV4_FLOW:
-               return MLX5E_TT_IPV4;
+               return MLX5_TT_IPV4;
         case IPV6_FLOW:
-               return MLX5E_TT_IPV6;
+               return MLX5_TT_IPV6;
         default:
-               return MLX5E_NUM_INDIR_TIRS;
+               return -EINVAL;
         }
  }
  
  static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
                                   struct ethtool_rxnfc *nfc)
  {
-       enum mlx5e_traffic_types tt;
         u8 rx_hash_field = 0;
+       int err;
+       int tt;
  
         tt = flow_type_to_traffic_type(nfc->flow_type);
-       if (tt == MLX5E_NUM_INDIR_TIRS)
-               return -EINVAL;
+       if (tt < 0)
+               return tt;
  
         /*  RSS does not support anything other than hashing to queues
          *  on src IP, dest IP, TCP/UDP src port and TCP/UDP dest
@@ -848,29 +849,23 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
                 rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT;
  
         mutex_lock(&priv->state_lock);
-
-       if (rx_hash_field == priv->rx_res->rss_params.rx_hash_fields[tt])
-               goto out;
-
-       priv->rx_res->rss_params.rx_hash_fields[tt] = rx_hash_field;
-       mlx5e_modify_tirs_hash(priv);
-
-out:
+       err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field);
         mutex_unlock(&priv->state_lock);
-       return 0;
+
+       return err;
  }
  
  static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv,
                                   struct ethtool_rxnfc *nfc)
  {
-       enum mlx5e_traffic_types tt;
         u32 hash_field = 0;
+       int tt;
  
         tt = flow_type_to_traffic_type(nfc->flow_type);
-       if (tt == MLX5E_NUM_INDIR_TIRS)
-               return -EINVAL;
+       if (tt < 0)
+               return tt;
  
-       hash_field = priv->rx_res->rss_params.rx_hash_fields[tt];
+       hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt);
         nfc->data = 0;
  
         if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c

index c663811..ccc569c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1627,7 +1627,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
                                   (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
  
         MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
-       MLX5_SET(cqc,   cqc, c_eqn,         eqn);
+       MLX5_SET(cqc,   cqc, c_eqn_or_apu_element, eqn);
         MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
         MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
                                             MLX5_ADAPTER_PAGE_SHIFT);
@@ -2194,202 +2194,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
         chs->num = 0;
  }
  
-int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv)
-{
-       int err;
-
-       err = mlx5e_rqt_init_direct(&priv->rx_res->indir_rqt, priv->mdev, true,
-                                   priv->drop_rq.rqn);
-       if (err)
-               mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err);
-       return err;
-}
-
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
-{
-       int err;
-       int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].direct_rqt,
-                                           priv->mdev, false, priv->drop_rq.rqn);
-               if (unlikely(err))
-                       goto err_destroy_rqts;
-       }
-
-       return 0;
-
-err_destroy_rqts:
-       mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err);
-       while (--ix >= 0)
-               mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt);
-
-       return err;
-}
-
-static int mlx5e_create_xsk_rqts(struct mlx5e_priv *priv)
-{
-       int err;
-       int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].xsk_rqt,
-                                           priv->mdev, false, priv->drop_rq.rqn);
-               if (unlikely(err))
-                       goto err_destroy_rqts;
-       }
-
-       return 0;
-
-err_destroy_rqts:
-       mlx5_core_warn(priv->mdev, "create xsk rqts failed, %d\n", err);
-       while (--ix >= 0)
-               mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt);
-
-       return err;
-}
-
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
-{
-       unsigned int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt);
-}
-
-static void mlx5e_destroy_xsk_rqts(struct mlx5e_priv *priv)
-{
-       unsigned int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt);
-}
-
-static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
-                                           struct mlx5e_channels *chs)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       unsigned int ix;
-       u32 *rqns;
-
-       rqns = kvmalloc_array(chs->num, sizeof(*rqns), GFP_KERNEL);
-       if (rqns) {
-               for (ix = 0; ix < chs->num; ix++)
-                       rqns[ix] = chs->c[ix]->rq.rqn;
-
-               mlx5e_rqt_redirect_indir(&res->indir_rqt, rqns, chs->num,
-                                        res->rss_params.hash.hfunc,
-                                        &res->rss_params.indir);
-               kvfree(rqns);
-       }
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               u32 rqn = priv->drop_rq.rqn;
-
-               if (ix < chs->num)
-                       rqn = chs->c[ix]->rq.rqn;
-
-               mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn);
-       }
-
-       if (priv->profile->rx_ptp_support) {
-               u32 rqn;
-
-               if (mlx5e_ptp_get_rqn(priv->channels.ptp, &rqn))
-                       rqn = priv->drop_rq.rqn;
-
-               mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn);
-       }
-}
-
-static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       unsigned int ix;
-
-       mlx5e_rqt_redirect_direct(&res->indir_rqt, priv->drop_rq.rqn);
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, priv->drop_rq.rqn);
-
-       if (priv->profile->rx_ptp_support)
-               mlx5e_rqt_redirect_direct(&res->ptp.rqt, priv->drop_rq.rqn);
-}
-
-int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash;
-       struct mlx5e_rss_params_traffic_type rss_tt;
-       struct mlx5e_rx_res *res = priv->rx_res;
-       struct mlx5e_tir_builder *builder;
-       enum mlx5e_traffic_types tt;
-
-       builder = mlx5e_tir_builder_alloc(true);
-       if (!builder)
-               return -ENOMEM;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false);
-               mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
-               mlx5e_tir_builder_clear(builder);
-       }
-
-       /* Verify inner tirs resources allocated */
-       if (!res->rss[0].inner_indir_tir.tirn)
-               goto out;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true);
-               mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
-               mlx5e_tir_builder_clear(builder);
-       }
-
-out:
-       mlx5e_tir_builder_free(builder);
-       return 0;
-}
-
  static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
  {
         struct mlx5e_rx_res *res = priv->rx_res;
-       struct mlx5e_tir_builder *builder;
         struct mlx5e_lro_param lro_param;
-       enum mlx5e_traffic_types tt;
-       int err;
-       int ix;
-
-       builder = mlx5e_tir_builder_alloc(true);
-       if (!builder)
-               return -ENOMEM;
  
         lro_param = mlx5e_get_lro_param(&priv->channels.params);
-       mlx5e_tir_builder_build_lro(builder, &lro_param);
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder);
-               if (err)
-                       goto err_free_builder;
-
-               /* Verify inner tirs resources allocated */
-               if (!res->rss[0].inner_indir_tir.tirn)
-                       continue;
-
-               err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder);
-               if (err)
-                       goto err_free_builder;
-       }
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
-               if (err)
-                       goto err_free_builder;
-       }
  
-err_free_builder:
-       mlx5e_tir_builder_free(builder);
-       return err;
+       return mlx5e_rx_res_lro_set_param(res, &lro_param);
  }
  
  static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
@@ -2572,8 +2384,7 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
  
         /* This function may be called on attach, before priv->rx_res is created. */
         if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
-               mlx5e_build_default_indir_rqt(priv->rx_res->rss_params.indir.table,
-                                             MLX5E_INDIR_RQT_SIZE, count);
+               mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
  
         return 0;
  }
@@ -2633,18 +2444,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
  
         mlx5e_wait_channels_min_rx_wqes(&priv->channels);
  
-       if (priv->rx_res) {
-               mlx5e_redirect_rqts_to_channels(priv, &priv->channels);
-               mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels);
-       }
+       if (priv->rx_res)
+               mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
  }
  
  void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
  {
-       if (priv->rx_res) {
-               mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels);
-               mlx5e_redirect_rqts_to_drop(priv);
-       }
+       if (priv->rx_res)
+               mlx5e_rx_res_channels_deactivate(priv->rx_res);
  
         if (mlx5e_is_vport_rep(priv))
                 mlx5e_remove_sqs_fwd_rules(priv);
@@ -3019,194 +2826,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
         mlx5e_destroy_tises(priv);
  }
  
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
-{
-       struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash;
-       bool inner_ft_support = priv->channels.params.tunneled_offload_en;
-       struct mlx5e_rss_params_traffic_type rss_tt;
-       struct mlx5e_rx_res *res = priv->rx_res;
-       enum mlx5e_traffic_types tt, max_tt;
-       struct mlx5e_tir_builder *builder;
-       struct mlx5e_lro_param lro_param;
-       u32 indir_rqtn;
-       int err = 0;
-
-       builder = mlx5e_tir_builder_alloc(false);
-       if (!builder)
-               return -ENOMEM;
-
-       lro_param = mlx5e_get_lro_param(&priv->channels.params);
-       indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt);
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn,
-                                           indir_rqtn, inner_ft_support);
-               mlx5e_tir_builder_build_lro(builder, &lro_param);
-               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false);
-
-               err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, priv->mdev, true);
-               if (err) {
-                       mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
-                       goto err_destroy_tirs;
-               }
-
-               mlx5e_tir_builder_clear(builder);
-       }
-
-       if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
-               goto out;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-               mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn,
-                                           indir_rqtn, inner_ft_support);
-               mlx5e_tir_builder_build_lro(builder, &lro_param);
-               rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true);
-
-               err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, priv->mdev, true);
-               if (err) {
-                       mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
-                       goto err_destroy_inner_tirs;
-               }
-
-               mlx5e_tir_builder_clear(builder);
-       }
-
-       goto out;
-
-err_destroy_inner_tirs:
-       max_tt = tt;
-       for (tt = 0; tt < max_tt; tt++)
-               mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
-
-       tt = MLX5E_NUM_INDIR_TIRS;
-err_destroy_tirs:
-       max_tt = tt;
-       for (tt = 0; tt < max_tt; tt++)
-               mlx5e_tir_destroy(&res->rss[tt].indir_tir);
-
-out:
-       mlx5e_tir_builder_free(builder);
-
-       return err;
-}
-
-static int mlx5e_create_direct_tir(struct mlx5e_priv *priv, struct mlx5e_tir *tir,
-                                  struct mlx5e_tir_builder *builder, struct mlx5e_rqt *rqt)
-{
-       bool inner_ft_support = priv->channels.params.tunneled_offload_en;
-       struct mlx5e_lro_param lro_param;
-       int err = 0;
-
-       lro_param = mlx5e_get_lro_param(&priv->channels.params);
-
-       mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn,
-                                   mlx5e_rqt_get_rqtn(rqt), inner_ft_support);
-       mlx5e_tir_builder_build_lro(builder, &lro_param);
-       mlx5e_tir_builder_build_direct(builder);
-
-       err = mlx5e_tir_init(tir, builder, priv->mdev, true);
-       if (unlikely(err))
-               mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err);
-
-       mlx5e_tir_builder_clear(builder);
-
-       return err;
-}
-
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       struct mlx5e_tir_builder *builder;
-       int err = 0;
-       int ix;
-
-       builder = mlx5e_tir_builder_alloc(false);
-       if (!builder)
-               return -ENOMEM;
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_create_direct_tir(priv, &res->channels[ix].direct_tir,
-                                             builder, &res->channels[ix].direct_rqt);
-               if (err)
-                       goto err_destroy_tirs;
-       }
-
-       goto out;
-
-err_destroy_tirs:
-       while (--ix >= 0)
-               mlx5e_tir_destroy(&res->channels[ix].direct_tir);
-
-out:
-       mlx5e_tir_builder_free(builder);
-
-       return err;
-}
-
-static int mlx5e_create_xsk_tirs(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       struct mlx5e_tir_builder *builder;
-       int err;
-       int ix;
-
-       builder = mlx5e_tir_builder_alloc(false);
-       if (!builder)
-               return -ENOMEM;
-
-       for (ix = 0; ix < priv->max_nch; ix++) {
-               err = mlx5e_create_direct_tir(priv, &res->channels[ix].xsk_tir,
-                                             builder, &res->channels[ix].xsk_rqt);
-               if (err)
-                       goto err_destroy_tirs;
-       }
-
-       goto out;
-
-err_destroy_tirs:
-       while (--ix >= 0)
-               mlx5e_tir_destroy(&res->channels[ix].xsk_tir);
-
-out:
-       mlx5e_tir_builder_free(builder);
-
-       return err;
-}
-
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
-{
-       struct mlx5e_rx_res *res = priv->rx_res;
-       enum mlx5e_traffic_types tt;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               mlx5e_tir_destroy(&res->rss[tt].indir_tir);
-
-       /* Verify inner tirs resources allocated */
-       if (!res->rss[0].inner_indir_tir.tirn)
-               return;
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir);
-}
-
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
-{
-       unsigned int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_tir_destroy(&priv->rx_res->channels[ix].direct_tir);
-}
-
-static void mlx5e_destroy_xsk_tirs(struct mlx5e_priv *priv)
-{
-       unsigned int ix;
-
-       for (ix = 0; ix < priv->max_nch; ix++)
-               mlx5e_tir_destroy(&priv->rx_res->channels[ix].xsk_tir);
-}
-
  static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable)
  {
         int err = 0;
@@ -3223,7 +2842,7 @@ static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool en
  
  static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
  {
-       int err = 0;
+       int err;
         int i;
  
         for (i = 0; i < chs->num; i++) {
@@ -3231,6 +2850,8 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
                 if (err)
                         return err;
         }
+       if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
+               return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
  
         return 0;
  }
@@ -3668,6 +3289,24 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
         return 0;
  }
  
+static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev,
+                                                      netdev_features_t features)
+{
+       features &= ~NETIF_F_HW_TLS_RX;
+       if (netdev->features & NETIF_F_HW_TLS_RX)
+               netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
+
+       features &= ~NETIF_F_HW_TLS_TX;
+       if (netdev->features & NETIF_F_HW_TLS_TX)
+               netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
+
+       features &= ~NETIF_F_NTUPLE;
+       if (netdev->features & NETIF_F_NTUPLE)
+               netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
+
+       return features;
+}
+
  static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
                                             netdev_features_t features)
  {
@@ -3699,15 +3338,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
                         netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
         }
  
-       if (mlx5e_is_uplink_rep(priv)) {
-               features &= ~NETIF_F_HW_TLS_RX;
-               if (netdev->features & NETIF_F_HW_TLS_RX)
-                       netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
-
-               features &= ~NETIF_F_HW_TLS_TX;
-               if (netdev->features & NETIF_F_HW_TLS_TX)
-                       netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
-       }
+       if (mlx5e_is_uplink_rep(priv))
+               features = mlx5e_fix_uplink_rep_features(netdev, features);
  
         mutex_unlock(&priv->state_lock);
  
@@ -4446,15 +4078,6 @@ const struct net_device_ops mlx5e_netdev_ops = {
         .ndo_get_devlink_port    = mlx5e_get_devlink_port,
  };
  
-void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
-                                  int num_channels)
-{
-       int i;
-
-       for (i = 0; i < len; i++)
-               indirection_rqt[i] = i % num_channels;
-}
-
  static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
  {
         int i;
@@ -4467,21 +4090,6 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo
         return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
  }
  
-void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
-                           u16 num_channels)
-{
-       enum mlx5e_traffic_types tt;
-
-       rss_params->hash.hfunc = ETH_RSS_HASH_TOP;
-       netdev_rss_key_fill(rss_params->hash.toeplitz_hash_key,
-                           sizeof(rss_params->hash.toeplitz_hash_key));
-       mlx5e_build_default_indir_rqt(rss_params->indir.table,
-                                     MLX5E_INDIR_RQT_SIZE, num_channels);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               rss_params->rx_hash_fields[tt] =
-                       mlx5e_rss_get_default_tt_config(tt).rx_hash_fields;
-}
-
  void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
  {
         struct mlx5e_params *params = &priv->channels.params;
@@ -4543,7 +4151,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
         /* TX inline */
         mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
  
-       params->tunneled_offload_en = mlx5e_tunnel_inner_ft_supported(mdev);
+       params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
  
         /* AF_XDP */
         params->xsk = xsk;
@@ -4603,8 +4211,8 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
  {
         int tt;
  
-       for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) {
-               if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5e_get_proto_by_tunnel_type(tt)))
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
                         return true;
         }
         return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
@@ -4701,6 +4309,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
         if (MLX5_CAP_ETH(mdev, scatter_fcs))
                 netdev->hw_features |= NETIF_F_RXFCS;
  
+       if (mlx5_qos_is_supported(mdev))
+               netdev->hw_features |= NETIF_F_HW_TC;
+
         netdev->features          = netdev->hw_features;
  
         /* Defaults */
@@ -4721,8 +4332,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
                 netdev->hw_features      |= NETIF_F_NTUPLE;
  #endif
         }
-       if (mlx5_qos_is_supported(mdev))
-               netdev->features |= NETIF_F_HW_TC;
  
         netdev->features         |= NETIF_F_HIGHDMA;
         netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
@@ -4805,15 +4414,14 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
  static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
  {
         struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5e_tir_builder *tir_builder;
+       enum mlx5e_rx_res_features features;
+       struct mlx5e_lro_param lro_param;
         int err;
  
-       priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL);
+       priv->rx_res = mlx5e_rx_res_alloc();
         if (!priv->rx_res)
                 return -ENOMEM;
  
-       mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels);
-
         mlx5e_create_q_counters(priv);
  
         err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -4822,50 +4430,20 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
                 goto err_destroy_q_counters;
         }
  
-       err = mlx5e_create_indirect_rqt(priv);
+       features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
+       if (priv->channels.params.tunneled_offload_en)
+               features |= MLX5E_RX_RES_FEATURE_INNER_FT;
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
+                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->channels.params.num_channels);
         if (err)
                 goto err_close_drop_rq;
  
-       err = mlx5e_create_direct_rqts(priv);
-       if (err)
-               goto err_destroy_indirect_rqts;
-
-       err = mlx5e_create_indirect_tirs(priv, true);
-       if (err)
-               goto err_destroy_direct_rqts;
-
-       err = mlx5e_create_direct_tirs(priv);
-       if (err)
-               goto err_destroy_indirect_tirs;
-
-       err = mlx5e_create_xsk_rqts(priv);
-       if (unlikely(err))
-               goto err_destroy_direct_tirs;
-
-       err = mlx5e_create_xsk_tirs(priv);
-       if (unlikely(err))
-               goto err_destroy_xsk_rqts;
-
-       err = mlx5e_rqt_init_direct(&priv->rx_res->ptp.rqt, priv->mdev, false,
-                                   priv->drop_rq.rqn);
-       if (err)
-               goto err_destroy_xsk_tirs;
-
-       tir_builder = mlx5e_tir_builder_alloc(false);
-       if (!tir_builder) {
-               err = -ENOMEM;
-               goto err_destroy_ptp_rqt;
-       }
-       err = mlx5e_create_direct_tir(priv, &priv->rx_res->ptp.tir, tir_builder,
-                                     &priv->rx_res->ptp.rqt);
-       mlx5e_tir_builder_free(tir_builder);
-       if (err)
-               goto err_destroy_ptp_rqt;
-
         err = mlx5e_create_flow_steering(priv);
         if (err) {
                 mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
-               goto err_destroy_ptp_direct_tir;
+               goto err_destroy_rx_res;
         }
  
         err = mlx5e_tc_nic_init(priv);
@@ -4886,27 +4464,13 @@ err_tc_nic_cleanup:
         mlx5e_tc_nic_cleanup(priv);
  err_destroy_flow_steering:
         mlx5e_destroy_flow_steering(priv);
-err_destroy_ptp_direct_tir:
-       mlx5e_tir_destroy(&priv->rx_res->ptp.tir);
-err_destroy_ptp_rqt:
-       mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt);
-err_destroy_xsk_tirs:
-       mlx5e_destroy_xsk_tirs(priv);
-err_destroy_xsk_rqts:
-       mlx5e_destroy_xsk_rqts(priv);
-err_destroy_direct_tirs:
-       mlx5e_destroy_direct_tirs(priv);
-err_destroy_indirect_tirs:
-       mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
-       mlx5e_destroy_direct_rqts(priv);
-err_destroy_indirect_rqts:
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+err_destroy_rx_res:
+       mlx5e_rx_res_destroy(priv->rx_res);
  err_close_drop_rq:
         mlx5e_close_drop_rq(&priv->drop_rq);
  err_destroy_q_counters:
         mlx5e_destroy_q_counters(priv);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
         priv->rx_res = NULL;
         return err;
  }
@@ -4916,17 +4480,10 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
         mlx5e_accel_cleanup_rx(priv);
         mlx5e_tc_nic_cleanup(priv);
         mlx5e_destroy_flow_steering(priv);
-       mlx5e_tir_destroy(&priv->rx_res->ptp.tir);
-       mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt);
-       mlx5e_destroy_xsk_tirs(priv);
-       mlx5e_destroy_xsk_rqts(priv);
-       mlx5e_destroy_direct_tirs(priv);
-       mlx5e_destroy_indirect_tirs(priv);
-       mlx5e_destroy_direct_rqts(priv);
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+       mlx5e_rx_res_destroy(priv->rx_res);
         mlx5e_close_drop_rq(&priv->drop_rq);
         mlx5e_destroy_q_counters(priv);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
         priv->rx_res = NULL;
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c

index 2c54951..c54aaef 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -49,6 +49,7 @@
  #include "en/devlink.h"
  #include "fs_core.h"
  #include "lib/mlx5.h"
+#include "lib/devcom.h"
  #define CREATE_TRACE_POINTS
  #include "diag/en_rep_tracepoint.h"
  #include "en_accel/ipsec.h"
@@ -310,6 +311,8 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
         rpriv = mlx5e_rep_to_rep_priv(rep);
         list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
                 mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+               if (rep_sq->send_to_vport_rule_peer)
+                       mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
                 list_del(&rep_sq->list);
                 kfree(rep_sq);
         }
@@ -319,6 +322,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                                  struct mlx5_eswitch_rep *rep,
                                  u32 *sqns_array, int sqns_num)
  {
+       struct mlx5_eswitch *peer_esw = NULL;
         struct mlx5_flow_handle *flow_rule;
         struct mlx5e_rep_priv *rpriv;
         struct mlx5e_rep_sq *rep_sq;
@@ -329,6 +333,10 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                 return 0;
  
         rpriv = mlx5e_rep_to_rep_priv(rep);
+       if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
+               peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
+                                                    MLX5_DEVCOM_ESW_OFFLOADS);
+
         for (i = 0; i < sqns_num; i++) {
                 rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
                 if (!rep_sq) {
@@ -337,7 +345,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                 }
  
                 /* Add re-inject rule to the PF/representor sqs */
-               flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep,
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep,
                                                                 sqns_array[i]);
                 if (IS_ERR(flow_rule)) {
                         err = PTR_ERR(flow_rule);
@@ -345,12 +353,34 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
                         goto out_err;
                 }
                 rep_sq->send_to_vport_rule = flow_rule;
+               rep_sq->sqn = sqns_array[i];
+
+               if (peer_esw) {
+                       flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
+                                                                       rep, sqns_array[i]);
+                       if (IS_ERR(flow_rule)) {
+                               err = PTR_ERR(flow_rule);
+                               mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
+                               kfree(rep_sq);
+                               goto out_err;
+                       }
+                       rep_sq->send_to_vport_rule_peer = flow_rule;
+               }
+
                 list_add(&rep_sq->list, &rpriv->vport_sqs_list);
         }
+
+       if (peer_esw)
+               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
         return 0;
  
  out_err:
         mlx5e_sqs2vport_stop(esw, rep);
+
+       if (peer_esw)
+               mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+
         return err;
  }
  
@@ -647,27 +677,24 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
  {
         struct mlx5e_rep_priv *rpriv = priv->ppriv;
         struct mlx5_eswitch_rep *rep = rpriv->rep;
-       struct mlx5e_rx_res *res = priv->rx_res;
         struct ttc_params ttc_params = {};
-       int tt, err;
+       int err;
  
         priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
                                               MLX5_FLOW_NAMESPACE_KERNEL);
  
         /* The inner_ttc in the ttc params is intentionally not set */
-       ttc_params.any_tt_tirn = res->channels[0].direct_tir.tirn;
-       mlx5e_set_ttc_ft_params(&ttc_params);
+       mlx5e_set_ttc_params(priv, &ttc_params, false);
  
         if (rep->vport != MLX5_VPORT_UPLINK)
                 /* To give uplik rep TTC a lower level for chaining from root ft */
                 ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1;
  
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = res->rss[tt].indir_tir.tirn;
-
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
-       if (err) {
-               netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err);
+       priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(priv->fs.ttc)) {
+               err = PTR_ERR(priv->fs.ttc);
+               netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n",
+                          err);
                 return err;
         }
         return 0;
@@ -685,7 +712,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv)
                 /* non uplik reps will skip any bypass tables and go directly to
                  * their own ttc
                  */
-               rpriv->root_ft = priv->fs.ttc.ft.t;
+               rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc);
                 return 0;
         }
  
@@ -758,14 +785,13 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
  static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
  {
         struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5e_lro_param lro_param;
         int err;
  
-       priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL);
+       priv->rx_res = mlx5e_rx_res_alloc();
         if (!priv->rx_res)
                 return -ENOMEM;
  
-       mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels);
-
         mlx5e_init_l2_addr(priv);
  
         err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -774,25 +800,16 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
                 return err;
         }
  
-       err = mlx5e_create_indirect_rqt(priv);
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->channels.params.num_channels);
         if (err)
                 goto err_close_drop_rq;
  
-       err = mlx5e_create_direct_rqts(priv);
-       if (err)
-               goto err_destroy_indirect_rqts;
-
-       err = mlx5e_create_indirect_tirs(priv, false);
-       if (err)
-               goto err_destroy_direct_rqts;
-
-       err = mlx5e_create_direct_tirs(priv);
-       if (err)
-               goto err_destroy_indirect_tirs;
-
         err = mlx5e_create_rep_ttc_table(priv);
         if (err)
-               goto err_destroy_direct_tirs;
+               goto err_destroy_rx_res;
  
         err = mlx5e_create_rep_root_ft(priv);
         if (err)
@@ -809,18 +826,12 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
  err_destroy_root_ft:
         mlx5e_destroy_rep_root_ft(priv);
  err_destroy_ttc_table:
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-err_destroy_direct_tirs:
-       mlx5e_destroy_direct_tirs(priv);
-err_destroy_indirect_tirs:
-       mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
-       mlx5e_destroy_direct_rqts(priv);
-err_destroy_indirect_rqts:
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+       mlx5_destroy_ttc_table(priv->fs.ttc);
+err_destroy_rx_res:
+       mlx5e_rx_res_destroy(priv->rx_res);
  err_close_drop_rq:
         mlx5e_close_drop_rq(&priv->drop_rq);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
         priv->rx_res = NULL;
         return err;
  }
@@ -830,13 +841,10 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
         mlx5e_ethtool_cleanup_steering(priv);
         rep_vport_rx_rule_destroy(priv);
         mlx5e_destroy_rep_root_ft(priv);
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
-       mlx5e_destroy_direct_tirs(priv);
-       mlx5e_destroy_indirect_tirs(priv);
-       mlx5e_destroy_direct_rqts(priv);
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+       mlx5_destroy_ttc_table(priv->fs.ttc);
+       mlx5e_rx_res_destroy(priv->rx_res);
         mlx5e_close_drop_rq(&priv->drop_rq);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
         priv->rx_res = NULL;
  }
  
@@ -1269,10 +1277,64 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
         return rpriv->netdev;
  }
  
+static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
+{
+       struct mlx5e_rep_priv *rpriv;
+       struct mlx5e_rep_sq *rep_sq;
+
+       rpriv = mlx5e_rep_to_rep_priv(rep);
+       list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+               if (!rep_sq->send_to_vport_rule_peer)
+                       continue;
+               mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
+               rep_sq->send_to_vport_rule_peer = NULL;
+       }
+}
+
+static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
+                                     struct mlx5_eswitch_rep *rep,
+                                     struct mlx5_eswitch *peer_esw)
+{
+       struct mlx5_flow_handle *flow_rule;
+       struct mlx5e_rep_priv *rpriv;
+       struct mlx5e_rep_sq *rep_sq;
+
+       rpriv = mlx5e_rep_to_rep_priv(rep);
+       list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
+               if (rep_sq->send_to_vport_rule_peer)
+                       continue;
+               flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
+               if (IS_ERR(flow_rule))
+                       goto err_out;
+               rep_sq->send_to_vport_rule_peer = flow_rule;
+       }
+
+       return 0;
+err_out:
+       mlx5e_vport_rep_event_unpair(rep);
+       return PTR_ERR(flow_rule);
+}
+
+static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
+                                struct mlx5_eswitch_rep *rep,
+                                enum mlx5_switchdev_event event,
+                                void *data)
+{
+       int err = 0;
+
+       if (event == MLX5_SWITCHDEV_EVENT_PAIR)
+               err = mlx5e_vport_rep_event_pair(esw, rep, data);
+       else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
+               mlx5e_vport_rep_event_unpair(rep);
+
+       return err;
+}
+
  static const struct mlx5_eswitch_rep_ops rep_ops = {
         .load = mlx5e_vport_rep_load,
         .unload = mlx5e_vport_rep_unload,
-       .get_proto_dev = mlx5e_vport_rep_get_proto_dev
+       .get_proto_dev = mlx5e_vport_rep_get_proto_dev,
+       .event = mlx5e_vport_rep_event,
  };
  
  static int mlx5e_rep_probe(struct auxiliary_device *adev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h

index 47a2dfb..8f0c824 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -207,6 +207,8 @@ struct mlx5e_encap_entry {
  
  struct mlx5e_rep_sq {
         struct mlx5_flow_handle *send_to_vport_rule;
+       struct mlx5_flow_handle *send_to_vport_rule_peer;
+       u32 sqn;
         struct list_head         list;
  };
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

index 0cee2fa..e5c4344 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -34,19 +34,13 @@
  #include <net/flow_offload.h>
  #include <net/sch_generic.h>
  #include <net/pkt_cls.h>
-#include <net/tc_act/tc_gact.h>
-#include <net/tc_act/tc_skbedit.h>
  #include <linux/mlx5/fs.h>
  #include <linux/mlx5/device.h>
  #include <linux/rhashtable.h>
  #include <linux/refcount.h>
  #include <linux/completion.h>
-#include <net/tc_act/tc_mirred.h>
-#include <net/tc_act/tc_vlan.h>
-#include <net/tc_act/tc_tunnel_key.h>
  #include <net/tc_act/tc_pedit.h>
  #include <net/tc_act/tc_csum.h>
-#include <net/tc_act/tc_mpls.h>
  #include <net/psample.h>
  #include <net/arp.h>
  #include <net/ipv6_stubs.h>
@@ -345,7 +339,7 @@ struct mlx5e_hairpin {
         int num_channels;
         struct mlx5e_rqt indir_rqt;
         struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
-       struct mlx5e_ttc_table ttc;
+       struct mlx5_ttc_table *ttc;
  };
  
  struct mlx5e_hairpin_entry {
@@ -452,12 +446,32 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
  static
  struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
  {
+       struct mlx5_core_dev *mdev;
         struct net_device *netdev;
         struct mlx5e_priv *priv;
  
-       netdev = __dev_get_by_index(net, ifindex);
+       netdev = dev_get_by_index(net, ifindex);
+       if (!netdev)
+               return ERR_PTR(-ENODEV);
+
         priv = netdev_priv(netdev);
-       return priv->mdev;
+       mdev = priv->mdev;
+       dev_put(netdev);
+
+       /* Mirred tc action holds a refcount on the ifindex net_device (see
+        * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
+        * after dev_put(netdev), while we're in the context of adding a tc flow.
+        *
+        * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
+        * stored in a hairpin object, which exists until all flows, that refer to it, get
+        * removed.
+        *
+        * On the other hand, after a hairpin object has been created, the peer net_device may
+        * be removed/unbound while there are still some hairpin flows that are using it. This
+        * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
+        * NETDEV_UNREGISTER event of the peer net_device.
+        */
+       return mdev;
  }
  
  static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
@@ -505,9 +519,10 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
         if (!indir)
                 return -ENOMEM;
  
-       mlx5e_build_default_indir_rqt(indir->table, MLX5E_INDIR_RQT_SIZE, hp->num_channels);
+       mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
         err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
-                                   priv->rx_res->rss_params.hash.hfunc, indir);
+                                  mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
+                                  indir);
  
         kvfree(indir);
         return err;
@@ -516,8 +531,8 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
  static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
  {
         struct mlx5e_priv *priv = hp->func_priv;
-       struct mlx5e_rss_params_hash *rss_hash;
-       enum mlx5e_traffic_types tt, max_tt;
+       struct mlx5e_rss_params_hash rss_hash;
+       enum mlx5_traffic_types tt, max_tt;
         struct mlx5e_tir_builder *builder;
         int err = 0;
  
@@ -525,7 +540,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
         if (!builder)
                 return -ENOMEM;
  
-       rss_hash = &priv->rx_res->rss_params.hash;
+       rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
  
         for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
                 struct mlx5e_rss_params_traffic_type rss_tt;
@@ -535,7 +550,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
                 mlx5e_tir_builder_build_rqt(builder, hp->tdn,
                                             mlx5e_rqt_get_rqtn(&hp->indir_rqt),
                                             false);
-               mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false);
+               mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
  
                 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
                 if (err) {
@@ -574,12 +589,16 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
  
         memset(ttc_params, 0, sizeof(*ttc_params));
  
-       ttc_params->any_tt_tirn = mlx5e_tir_get_tirn(&hp->direct_tir);
-
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params->indir_tirn[tt] = mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+       ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
+                                                MLX5_FLOW_NAMESPACE_KERNEL);
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+               ttc_params->dests[tt].tir_num =
+                       tt == MLX5_TT_ANY ?
+                               mlx5e_tir_get_tirn(&hp->direct_tir) :
+                               mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
+       }
  
-       ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
         ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
         ft_attr->prio = MLX5E_TC_PRIO;
  }
@@ -599,12 +618,15 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
                 goto err_create_indirect_tirs;
  
         mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
-       if (err)
+       hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
+       if (IS_ERR(hp->ttc)) {
+               err = PTR_ERR(hp->ttc);
                 goto err_create_ttc_table;
+       }
  
         netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
-                  hp->num_channels, hp->ttc.ft.t->id);
+                  hp->num_channels,
+                  mlx5_get_ttc_flow_table(priv->fs.ttc)->id);
  
         return 0;
  
@@ -618,9 +640,7 @@ err_create_indirect_tirs:
  
  static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
  {
-       struct mlx5e_priv *priv = hp->func_priv;
-
-       mlx5e_destroy_ttc_table(priv, &hp->ttc);
+       mlx5_destroy_ttc_table(hp->ttc);
         mlx5e_hairpin_destroy_indirect_tirs(hp);
         mlx5e_rqt_destroy(&hp->indir_rqt);
  }
@@ -640,6 +660,10 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params
  
         func_mdev = priv->mdev;
         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+       if (IS_ERR(peer_mdev)) {
+               err = PTR_ERR(peer_mdev);
+               goto create_pair_err;
+       }
  
         pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
         if (IS_ERR(pair)) {
@@ -778,6 +802,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
         int err;
  
         peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
+       if (IS_ERR(peer_mdev)) {
+               NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
+               return PTR_ERR(peer_mdev);
+       }
+
         if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
                 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
                 return -EOPNOTSUPP;
@@ -855,7 +884,8 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
  attach_flow:
         if (hpe->hp->num_channels > 1) {
                 flow_flag_set(flow, HAIRPIN_RSS);
-               flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
+               flow->attr->nic_attr->hairpin_ft =
+                       mlx5_get_ttc_flow_table(hpe->hp->ttc);
         } else {
                 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
         }
@@ -1001,15 +1031,17 @@ err_ft_get:
  
  static int
  mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
-                     struct mlx5e_tc_flow_parse_attr *parse_attr,
                       struct mlx5e_tc_flow *flow,
                       struct netlink_ext_ack *extack)
  {
+       struct mlx5e_tc_flow_parse_attr *parse_attr;
         struct mlx5_flow_attr *attr = flow->attr;
         struct mlx5_core_dev *dev = priv->mdev;
-       struct mlx5_fc *counter = NULL;
+       struct mlx5_fc *counter;
         int err;
  
+       parse_attr = attr->parse_attr;
+
         if (flow_flag_test(flow, HAIRPIN)) {
                 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
                 if (err)
@@ -1329,9 +1361,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
         bool vf_tun = false, encap_valid = true;
         struct net_device *encap_dev = NULL;
         struct mlx5_esw_flow_attr *esw_attr;
-       struct mlx5_fc *counter = NULL;
         struct mlx5e_rep_priv *rpriv;
         struct mlx5e_priv *out_priv;
+       struct mlx5_fc *counter;
         u32 max_prio, max_chain;
         int err = 0;
         int out_index;
@@ -3297,10 +3329,10 @@ static int validate_goto_chain(struct mlx5e_priv *priv,
  
  static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                                 struct flow_action *flow_action,
-                               struct mlx5e_tc_flow_parse_attr *parse_attr,
                                 struct mlx5e_tc_flow *flow,
                                 struct netlink_ext_ack *extack)
  {
+       struct mlx5e_tc_flow_parse_attr *parse_attr;
         struct mlx5_flow_attr *attr = flow->attr;
         struct pedit_headers_action hdrs[2] = {};
         const struct flow_action_entry *act;
@@ -3316,8 +3348,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                 return -EOPNOTSUPP;
  
         nic_attr = attr->nic_attr;
-
         nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+       parse_attr = attr->parse_attr;
  
         flow_action_for_each(i, act, flow_action) {
                 switch (act->id) {
@@ -3326,10 +3358,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                                   MLX5_FLOW_CONTEXT_ACTION_COUNT;
                         break;
                 case FLOW_ACTION_DROP:
-                       action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
-                       if (MLX5_CAP_FLOWTABLE(priv->mdev,
-                                              flow_table_properties_nic_receive.flow_counter))
-                               action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                       action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+                                 MLX5_FLOW_CONTEXT_ACTION_COUNT;
                         break;
                 case FLOW_ACTION_MANGLE:
                 case FLOW_ACTION_ADD:
@@ -3370,7 +3400,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                                                    "device is not on same HW, can't offload");
                                 netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
                                             peer_dev->name);
-                               return -EINVAL;
+                               return -EOPNOTSUPP;
                         }
                         }
                         break;
@@ -3380,7 +3410,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                         if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
                                 NL_SET_ERR_MSG_MOD(extack,
                                                    "Bad flow mark - only 16 bit is supported");
-                               return -EINVAL;
+                               return -EOPNOTSUPP;
                         }
  
                         nic_attr->flow_tag = mark;
@@ -3677,8 +3707,7 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv,
  static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                 struct flow_action *flow_action,
                                 struct mlx5e_tc_flow *flow,
-                               struct netlink_ext_ack *extack,
-                               struct net_device *filter_dev)
+                               struct netlink_ext_ack *extack)
  {
         struct pedit_headers_action hdrs[2] = {};
         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -3743,7 +3772,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                                    "mpls pop supported only as first action");
                                 return -EOPNOTSUPP;
                         }
-                       if (!netif_is_bareudp(filter_dev)) {
+                       if (!netif_is_bareudp(parse_attr->filter_dev)) {
                                 NL_SET_ERR_MSG_MOD(extack,
                                                    "mpls pop supported only on bareudp devices");
                                 return -EOPNOTSUPP;
@@ -3892,7 +3921,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                             "devices %s %s not on same switch HW, can't offload forwarding\n",
                                             priv->netdev->name,
                                             out_dev->name);
-                               return -EINVAL;
+                               return -EOPNOTSUPP;
                         }
                         }
                         break;
@@ -4245,7 +4274,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
         if (err)
                 goto err_free;
  
-       err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
+       err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
         if (err)
                 goto err_free;
  
@@ -4391,11 +4420,11 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv,
         if (err)
                 goto err_free;
  
-       err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
+       err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
         if (err)
                 goto err_free;
  
-       err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+       err = mlx5e_tc_add_nic_flow(priv, flow, extack);
         if (err)
                 goto err_free;
  
@@ -4822,6 +4851,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
         struct mlx5_core_dev *dev = priv->mdev;
         struct mapping_ctx *chains_mapping;
         struct mlx5_chains_attr attr = {};
+       u64 mapping_id;
         int err;
  
         mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
@@ -4835,8 +4865,12 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
  
         lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
  
-       chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj),
-                                       MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+       mapping_id = mlx5_query_nic_system_image_guid(dev);
+
+       chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+                                              sizeof(struct mlx5_mapped_obj),
+                                              MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
+
         if (IS_ERR(chains_mapping)) {
                 err = PTR_ERR(chains_mapping);
                 goto err_mapping;
@@ -4925,6 +4959,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
         struct mapping_ctx *mapping;
         struct mlx5_eswitch *esw;
         struct mlx5e_priv *priv;
+       u64 mapping_id;
         int err = 0;
  
         uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
@@ -4941,8 +4976,12 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
         uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev));
  #endif
  
-       mapping = mapping_create(sizeof(struct tunnel_match_key),
-                                TUNNEL_INFO_BITS_MASK, true);
+       mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+       mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
+                                       sizeof(struct tunnel_match_key),
+                                       TUNNEL_INFO_BITS_MASK, true);
+
         if (IS_ERR(mapping)) {
                 err = PTR_ERR(mapping);
                 goto err_tun_mapping;
@@ -4950,7 +4989,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
         uplink_priv->tunnel_mapping = mapping;
  
         /* 0xFFF is reserved for stack devices slow path table mark */
-       mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
+       mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
+                                       sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true);
         if (IS_ERR(mapping)) {
                 err = PTR_ERR(mapping);
                 goto err_enc_opts_mapping;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c

index 505bf81..2e504c7 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -15,6 +15,15 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
         vport->egress.offloads.fwd_rule = NULL;
  }
  
+static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport)
+{
+       if (!vport->egress.offloads.bounce_rule)
+               return;
+
+       mlx5_del_flow_rules(vport->egress.offloads.bounce_rule);
+       vport->egress.offloads.bounce_rule = NULL;
+}
+
  static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
                                                 struct mlx5_vport *vport,
                                                 struct mlx5_flow_destination *fwd_dest)
@@ -87,6 +96,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
  {
         esw_acl_egress_vlan_destroy(vport);
         esw_acl_egress_ofld_fwd2vport_destroy(vport);
+       esw_acl_egress_ofld_bounce_rule_destroy(vport);
  }
  
  static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
@@ -145,6 +155,12 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
                 mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
                 vport->egress.offloads.fwd_grp = NULL;
         }
+
+       if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) {
+               mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp);
+               vport->egress.offloads.bounce_grp = NULL;
+       }
+
         esw_acl_egress_vlan_grp_destroy(vport);
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c

index 97e6cb6..7ffea23 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1458,8 +1458,6 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
  
         esw->mode = mode;
  
-       mlx5_lag_update(esw->dev);
-
         if (mode == MLX5_ESWITCH_LEGACY) {
                 err = esw_legacy_enable(esw);
         } else {
@@ -1506,6 +1504,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
         if (!mlx5_esw_allowed(esw))
                 return 0;
  
+       mlx5_lag_disable_change(esw->dev);
         down_write(&esw->mode_lock);
         if (esw->mode == MLX5_ESWITCH_NONE) {
                 ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
@@ -1519,6 +1518,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
                         esw->esw_funcs.num_vfs = num_vfs;
         }
         up_write(&esw->mode_lock);
+       mlx5_lag_enable_change(esw->dev);
         return ret;
  }
  
@@ -1550,8 +1550,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
         old_mode = esw->mode;
         esw->mode = MLX5_ESWITCH_NONE;
  
-       mlx5_lag_update(esw->dev);
-
         if (old_mode == MLX5_ESWITCH_OFFLOADS)
                 mlx5_rescan_drivers(esw->dev);
  
@@ -1567,10 +1565,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
         if (!mlx5_esw_allowed(esw))
                 return;
  
+       mlx5_lag_disable_change(esw->dev);
         down_write(&esw->mode_lock);
         mlx5_eswitch_disable_locked(esw, clear_vf);
         esw->esw_funcs.num_vfs = 0;
         up_write(&esw->mode_lock);
+       mlx5_lag_enable_change(esw->dev);
  }
  
  static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
@@ -1759,7 +1759,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
         ida_init(&esw->offloads.vport_metadata_ida);
         xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
         mutex_init(&esw->state_lock);
+       lockdep_register_key(&esw->mode_lock_key);
         init_rwsem(&esw->mode_lock);
+       lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
  
         esw->enabled_vports = 0;
         esw->mode = MLX5_ESWITCH_NONE;
@@ -1793,6 +1795,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
  
         esw->dev->priv.eswitch = NULL;
         destroy_workqueue(esw->work_queue);
+       lockdep_unregister_key(&esw->mode_lock_key);
         mutex_destroy(&esw->state_lock);
         WARN_ON(!xa_empty(&esw->offloads.vhca_map));
         xa_destroy(&esw->offloads.vhca_map);
@@ -1889,8 +1892,7 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num)
                mlx5_esw_is_sf_vport(esw, vport_num);
  }
  
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
                                            u8 *hw_addr, int *hw_addr_len,
                                            struct netlink_ext_ack *extack)
  {
@@ -1899,7 +1901,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
         int err = -EOPNOTSUPP;
         u16 vport_num;
  
-       esw = mlx5_devlink_eswitch_get(devlink);
+       esw = mlx5_devlink_eswitch_get(port->devlink);
         if (IS_ERR(esw))
                 return PTR_ERR(esw);
  
@@ -1923,8 +1925,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
         return err;
  }
  
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
                                            const u8 *hw_addr, int hw_addr_len,
                                            struct netlink_ext_ack *extack)
  {
@@ -1933,7 +1934,7 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
         int err = -EOPNOTSUPP;
         u16 vport_num;
  
-       esw = mlx5_devlink_eswitch_get(devlink);
+       esw = mlx5_devlink_eswitch_get(port->devlink);
         if (IS_ERR(esw)) {
                 NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr");
                 return PTR_ERR(esw);
@@ -2366,9 +2367,22 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
   */
  void mlx5_esw_unlock(struct mlx5_eswitch *esw)
  {
+       if (!mlx5_esw_allowed(esw))
+               return;
         up_write(&esw->mode_lock);
  }
  
+/**
+ * mlx5_esw_lock() - Take write lock on esw mode lock
+ * @esw: eswitch device.
+ */
+void mlx5_esw_lock(struct mlx5_eswitch *esw)
+{
+       if (!mlx5_esw_allowed(esw))
+               return;
+       down_write(&esw->mode_lock);
+}
+
  /**
   * mlx5_eswitch_get_total_vports - Get total vports of the eswitch
   *
@@ -2384,3 +2398,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
         return mlx5_esw_allowed(esw) ? esw->total_vports : 0;
  }
  EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
+
+/**
+ * mlx5_eswitch_get_core_dev - Get the mdev device
+ * @esw : eswitch device.
+ *
+ * Return the mellanox core device which manages the eswitch.
+ */
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+       return mlx5_esw_allowed(esw) ? esw->dev : NULL;
+}
+EXPORT_SYMBOL(mlx5_eswitch_get_core_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h

index 48cac5b..01e8dfb 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -86,6 +86,14 @@ struct mlx5_mapped_obj {
  #define esw_chains(esw) \
         ((esw)->fdb_table.offloads.esw_chains_priv)
  
+enum {
+       MAPPING_TYPE_CHAIN,
+       MAPPING_TYPE_TUNNEL,
+       MAPPING_TYPE_TUNNEL_ENC_OPTS,
+       MAPPING_TYPE_LABELS,
+       MAPPING_TYPE_ZONE,
+};
+
  struct vport_ingress {
         struct mlx5_flow_table *acl;
         struct mlx5_flow_handle *allow_rule;
@@ -124,6 +132,8 @@ struct vport_egress {
                 struct {
                         struct mlx5_flow_group *fwd_grp;
                         struct mlx5_flow_handle *fwd_rule;
+                       struct mlx5_flow_handle *bounce_rule;
+                       struct mlx5_flow_group *bounce_grp;
                 } offloads;
         };
  };
@@ -315,6 +325,7 @@ struct mlx5_eswitch {
                 u32             large_group_num;
         }  params;
         struct blocking_notifier_head n_head;
+       struct lock_class_key mode_lock_key;
  };
  
  void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -475,12 +486,10 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink,
                                         struct netlink_ext_ack *extack);
  int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
                                         enum devlink_eswitch_encap_mode *encap);
-int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port,
                                            u8 *hw_addr, int *hw_addr_len,
                                            struct netlink_ext_ack *extack);
-int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink,
-                                          struct devlink_port *port,
+int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port,
                                            const u8 *hw_addr, int hw_addr_len,
                                            struct netlink_ext_ack *extack);
  
@@ -636,7 +645,7 @@ struct esw_vport_tbl_namespace {
  };
  
  struct mlx5_vport_tbl_attr {
-       u16 chain;
+       u32 chain;
         u16 prio;
         u16 vport;
         const struct esw_vport_tbl_namespace *vport_ns;
@@ -699,11 +708,18 @@ void mlx5_esw_get(struct mlx5_core_dev *dev);
  void mlx5_esw_put(struct mlx5_core_dev *dev);
  int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
  void mlx5_esw_unlock(struct mlx5_eswitch *esw);
+void mlx5_esw_lock(struct mlx5_eswitch *esw);
  
  void esw_vport_change_handle_locked(struct mlx5_vport *vport);
  
  bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller);
  
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+                                           struct mlx5_eswitch *slave_esw);
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+                                             struct mlx5_eswitch *slave_esw);
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
+
  #else  /* CONFIG_MLX5_ESWITCH */
  /* eswitch API stubs */
  static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
@@ -719,6 +735,9 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
         return ERR_PTR(-EOPNOTSUPP);
  }
  
+static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
+static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
+
  static inline struct mlx5_flow_handle *
  esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
  {
@@ -731,6 +750,23 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev,
  {
         return vport_num;
  }
+
+static inline int
+mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+                                       struct mlx5_eswitch *slave_esw)
+{
+       return 0;
+}
+
+static inline void
+mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+                                        struct mlx5_eswitch *slave_esw) {}
+
+static inline int
+mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+       return 0;
+}
  #endif /* CONFIG_MLX5_ESWITCH */
  
  #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

index 7579f34..0e3645c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -382,10 +382,11 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
  {
         dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
         dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport;
-       dest[dest_idx].vport.vhca_id =
-               MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
-       if (MLX5_CAP_ESW(esw->dev, merged_eswitch))
+       if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
+               dest[dest_idx].vport.vhca_id =
+                       MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
                 dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+       }
         if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
                 if (pkt_reformat) {
                         flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
@@ -925,6 +926,7 @@ out:
  
  struct mlx5_flow_handle *
  mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+                                   struct mlx5_eswitch *from_esw,
                                     struct mlx5_eswitch_rep *rep,
                                     u32 sqn)
  {
@@ -943,10 +945,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
         misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
         MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
         /* source vport is the esw manager */
-       MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport);
+       MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport);
         if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
                 MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
-                        MLX5_CAP_GEN(rep->esw->dev, vhca_id));
+                        MLX5_CAP_GEN(from_esw->dev, vhca_id));
  
         misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
         MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
@@ -962,6 +964,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
         dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
  
+       if (rep->vport == MLX5_VPORT_UPLINK)
+               spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
         flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
                                         spec, &flow_act, &dest, 1);
         if (IS_ERR(flow_rule))
@@ -1612,7 +1617,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
                 goto ns_err;
         }
  
-       table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+       /* To be strictly correct:
+        *      MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ)
+        * should be:
+        *      esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+        *      peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ
+        * but as the peer device might not be in switchdev mode it's not
+        * possible. We use the fact that by default FW sets max vfs and max sfs
+        * to the same value on both devices. If it needs to be changed in the future note
+        * the peer miss group should also be created based on the number of
+        * total vports of the peer (currently is also uses esw->total_vports).
+        */
+       table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) +
                 MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs;
  
         /* create the slow path fdb with encap set, so further table instances
@@ -1669,7 +1685,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw)
                          source_eswitch_owner_vhca_id_valid, 1);
         }
  
-       ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ;
+       /* See comment above table_size calculation */
+       ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ);
         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1);
  
@@ -2309,14 +2326,293 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num)
                 mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
  }
  
+static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master,
+                                            struct mlx5_core_dev *slave)
+{
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {};
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+       struct mlx5_eswitch *esw;
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+       struct mlx5_vport *vport;
+       int err;
+
+       MLX5_SET(set_flow_table_root_in, in, opcode,
+                MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+       MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL);
+       MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
+       MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK);
+
+       if (master) {
+               esw = master->priv.eswitch;
+               vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+               MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1);
+               MLX5_SET(set_flow_table_root_in, in, table_vport_number,
+                        MLX5_VPORT_UPLINK);
+
+               ns = mlx5_get_flow_vport_acl_namespace(master,
+                                                      MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+                                                      vport->index);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id_valid, 1);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(master, vhca_id));
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       } else {
+               esw = slave->priv.eswitch;
+               vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+               ns = mlx5_get_flow_vport_acl_namespace(slave,
+                                                      MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+                                                      vport->index);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+               MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id);
+       }
+
+       err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+       mutex_unlock(&root->chain_lock);
+
+       return err;
+}
+
+static int esw_set_slave_root_fdb(struct mlx5_core_dev *master,
+                                 struct mlx5_core_dev *slave)
+{
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {};
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+       int err;
+
+       MLX5_SET(set_flow_table_root_in, in, opcode,
+                MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+       MLX5_SET(set_flow_table_root_in, in, table_type,
+                FS_FT_FDB);
+
+       if (master) {
+               ns = mlx5_get_flow_namespace(master,
+                                            MLX5_FLOW_NAMESPACE_FDB);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id_valid, 1);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(master, vhca_id));
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       } else {
+               ns = mlx5_get_flow_namespace(slave,
+                                            MLX5_FLOW_NAMESPACE_FDB);
+               root = find_root(&ns->node);
+               mutex_lock(&root->chain_lock);
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       }
+
+       err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+       mutex_unlock(&root->chain_lock);
+
+       return err;
+}
+
+static int __esw_set_master_egress_rule(struct mlx5_core_dev *master,
+                                       struct mlx5_core_dev *slave,
+                                       struct mlx5_vport *vport,
+                                       struct mlx5_flow_table *acl)
+{
+       struct mlx5_flow_handle *flow_rule = NULL;
+       struct mlx5_flow_destination dest = {};
+       struct mlx5_flow_act flow_act = {};
+       struct mlx5_flow_spec *spec;
+       int err = 0;
+       void *misc;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return -ENOMEM;
+
+       spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+       misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                           misc_parameters);
+       MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK);
+       MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
+                MLX5_CAP_GEN(slave, vhca_id));
+
+       misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
+       MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
+       MLX5_SET_TO_ONES(fte_match_set_misc, misc,
+                        source_eswitch_owner_vhca_id);
+
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+       dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+       dest.vport.num = slave->priv.eswitch->manager_vport;
+       dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id);
+       dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+       flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act,
+                                       &dest, 1);
+       if (IS_ERR(flow_rule))
+               err = PTR_ERR(flow_rule);
+       else
+               vport->egress.offloads.bounce_rule = flow_rule;
+
+       kvfree(spec);
+       return err;
+}
+
+static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
+                                     struct mlx5_core_dev *slave)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       struct mlx5_eswitch *esw = master->priv.eswitch;
+       struct mlx5_flow_table_attr ft_attr = {
+               .max_fte = 1, .prio = 0, .level = 0,
+       };
+       struct mlx5_flow_namespace *egress_ns;
+       struct mlx5_flow_table *acl;
+       struct mlx5_flow_group *g;
+       struct mlx5_vport *vport;
+       void *match_criteria;
+       u32 *flow_group_in;
+       int err;
+
+       vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+       if (IS_ERR(vport))
+               return PTR_ERR(vport);
+
+       egress_ns = mlx5_get_flow_vport_acl_namespace(master,
+                                                     MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+                                                     vport->index);
+       if (!egress_ns)
+               return -EINVAL;
+
+       if (vport->egress.acl)
+               return -EINVAL;
+
+       flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+       if (!flow_group_in)
+               return -ENOMEM;
+
+       acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport);
+       if (IS_ERR(acl)) {
+               err = PTR_ERR(acl);
+               goto out;
+       }
+
+       match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+                                     match_criteria);
+       MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+                        misc_parameters.source_port);
+       MLX5_SET_TO_ONES(fte_match_param, match_criteria,
+                        misc_parameters.source_eswitch_owner_vhca_id);
+       MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+                MLX5_MATCH_MISC_PARAMETERS);
+
+       MLX5_SET(create_flow_group_in, flow_group_in,
+                source_eswitch_owner_vhca_id_valid, 1);
+       MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+       MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+       g = mlx5_create_flow_group(acl, flow_group_in);
+       if (IS_ERR(g)) {
+               err = PTR_ERR(g);
+               goto err_group;
+       }
+
+       err = __esw_set_master_egress_rule(master, slave, vport, acl);
+       if (err)
+               goto err_rule;
+
+       vport->egress.acl = acl;
+       vport->egress.offloads.bounce_grp = g;
+
+       kvfree(flow_group_in);
+
+       return 0;
+
+err_rule:
+       mlx5_destroy_flow_group(g);
+err_group:
+       mlx5_destroy_flow_table(acl);
+out:
+       kvfree(flow_group_in);
+       return err;
+}
+
+static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev)
+{
+       struct mlx5_vport *vport;
+
+       vport = mlx5_eswitch_get_vport(dev->priv.eswitch,
+                                      dev->priv.eswitch->manager_vport);
+
+       esw_acl_egress_ofld_cleanup(vport);
+}
+
+int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw,
+                                           struct mlx5_eswitch *slave_esw)
+{
+       int err;
+
+       err = esw_set_uplink_slave_ingress_root(master_esw->dev,
+                                               slave_esw->dev);
+       if (err)
+               return -EINVAL;
+
+       err = esw_set_slave_root_fdb(master_esw->dev,
+                                    slave_esw->dev);
+       if (err)
+               goto err_fdb;
+
+       err = esw_set_master_egress_rule(master_esw->dev,
+                                        slave_esw->dev);
+       if (err)
+               goto err_acl;
+
+       return err;
+
+err_acl:
+       esw_set_slave_root_fdb(NULL, slave_esw->dev);
+
+err_fdb:
+       esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+
+       return err;
+}
+
+void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw,
+                                             struct mlx5_eswitch *slave_esw)
+{
+       esw_unset_master_egress_rule(master_esw->dev);
+       esw_set_slave_root_fdb(NULL, slave_esw->dev);
+       esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev);
+}
+
  #define ESW_OFFLOADS_DEVCOM_PAIR       (0)
  #define ESW_OFFLOADS_DEVCOM_UNPAIR     (1)
  
-static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
-                                 struct mlx5_eswitch *peer_esw)
+static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw)
  {
+       const struct mlx5_eswitch_rep_ops *ops;
+       struct mlx5_eswitch_rep *rep;
+       unsigned long i;
+       u8 rep_type;
  
-       return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+       mlx5_esw_for_each_rep(esw, i, rep) {
+               rep_type = NUM_REP_TYPES;
+               while (rep_type--) {
+                       ops = esw->offloads.rep_ops[rep_type];
+                       if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+                           ops->event)
+                               ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL);
+               }
+       }
  }
  
  static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
@@ -2324,9 +2620,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
  #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
         mlx5e_tc_clean_fdb_peer_flows(esw);
  #endif
+       mlx5_esw_offloads_rep_event_unpair(esw);
         esw_del_fdb_peer_miss_rules(esw);
  }
  
+static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
+                                 struct mlx5_eswitch *peer_esw)
+{
+       const struct mlx5_eswitch_rep_ops *ops;
+       struct mlx5_eswitch_rep *rep;
+       unsigned long i;
+       u8 rep_type;
+       int err;
+
+       err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
+       if (err)
+               return err;
+
+       mlx5_esw_for_each_rep(esw, i, rep) {
+               for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
+                       ops = esw->offloads.rep_ops[rep_type];
+                       if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED &&
+                           ops->event) {
+                               err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw);
+                               if (err)
+                                       goto err_out;
+                       }
+               }
+       }
+
+       return 0;
+
+err_out:
+       mlx5_esw_offloads_unpair(esw);
+       return err;
+}
+
  static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
                                          struct mlx5_eswitch *peer_esw,
                                          bool pair)
@@ -2367,6 +2696,9 @@ static int mlx5_esw_offloads_devcom_event(int event,
  
         switch (event) {
         case ESW_OFFLOADS_DEVCOM_PAIR:
+               if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev)
+                       break;
+
                 if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
                     mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
                         break;
@@ -2614,6 +2946,31 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
         esw_vport_destroy_offloads_acl_tables(esw, vport);
  }
  
+int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
+{
+       struct mlx5_eswitch_rep *rep;
+       unsigned long i;
+       int ret;
+
+       if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS)
+               return 0;
+
+       rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+       if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
+               return 0;
+
+       ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
+       if (ret)
+               return ret;
+
+       mlx5_esw_for_each_rep(esw, i, rep) {
+               if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
+                       mlx5_esw_offloads_rep_load(esw, rep->vport);
+       }
+
+       return 0;
+}
+
  static int esw_offloads_steering_init(struct mlx5_eswitch *esw)
  {
         struct mlx5_esw_indir_table *indir;
@@ -2783,6 +3140,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
         struct mapping_ctx *reg_c0_obj_pool;
         struct mlx5_vport *vport;
         unsigned long i;
+       u64 mapping_id;
         int err;
  
         if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
@@ -2806,9 +3164,13 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
         if (err)
                 goto err_vport_metadata;
  
-       reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj),
-                                        ESW_REG_C0_USER_DATA_METADATA_MASK,
-                                        true);
+       mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
+
+       reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
+                                               sizeof(struct mlx5_mapped_obj),
+                                               ESW_REG_C0_USER_DATA_METADATA_MASK,
+                                               true);
+
         if (IS_ERR(reg_c0_obj_pool)) {
                 err = PTR_ERR(reg_c0_obj_pool);
                 goto err_pool;
@@ -2986,10 +3348,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
         if (esw_mode_from_devlink(mode, &mlx5_mode))
                 return -EINVAL;
  
+       mlx5_lag_disable_change(esw->dev);
         err = mlx5_esw_try_lock(esw);
         if (err < 0) {
                 NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
-               return err;
+               goto enable_lag;
         }
         cur_mlx5_mode = err;
         err = 0;
@@ -3006,6 +3369,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
  
  unlock:
         mlx5_esw_unlock(esw);
+enable_lag:
+       mlx5_lag_enable_change(esw->dev);
         return err;
  }
  
@@ -3079,8 +3444,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
  
         switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) {
         case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
-               if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE)
+               if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) {
+                       err = 0;
                         goto out;
+               }
+
                 fallthrough;
         case MLX5_CAP_INLINE_MODE_L2:
                 NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c

index bd66ab2..9b2cca6 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -454,7 +454,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
  
         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
         MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
         MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
         MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
                            MLX5_ADAPTER_PAGE_SHIFT);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c

index 896a6c3..7db8df6 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -152,17 +152,56 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns)
         return 0;
  }
  
+static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
+                                      struct mlx5_core_dev *slave,
+                                      bool ft_id_valid,
+                                      u32 ft_id)
+{
+       u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
+       struct mlx5_flow_root_namespace *root;
+       struct mlx5_flow_namespace *ns;
+
+       MLX5_SET(set_flow_table_root_in, in, opcode,
+                MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
+       MLX5_SET(set_flow_table_root_in, in, table_type,
+                FS_FT_FDB);
+       if (ft_id_valid) {
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id_valid, 1);
+               MLX5_SET(set_flow_table_root_in, in,
+                        table_eswitch_owner_vhca_id,
+                        MLX5_CAP_GEN(master, vhca_id));
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        ft_id);
+       } else {
+               ns = mlx5_get_flow_namespace(slave,
+                                            MLX5_FLOW_NAMESPACE_FDB);
+               root = find_root(&ns->node);
+               MLX5_SET(set_flow_table_root_in, in, table_id,
+                        root->root_ft->id);
+       }
+
+       return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
+}
+
  static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
                                    struct mlx5_flow_table *ft, u32 underlay_qpn,
                                    bool disconnect)
  {
         u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
         struct mlx5_core_dev *dev = ns->dev;
+       int err;
  
         if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
             underlay_qpn == 0)
                 return 0;
  
+       if (ft->type == FS_FT_FDB &&
+           mlx5_lag_is_shared_fdb(dev) &&
+           !mlx5_lag_is_master(dev))
+               return 0;
+
         MLX5_SET(set_flow_table_root_in, in, opcode,
                  MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
         MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
@@ -177,7 +216,24 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
         MLX5_SET(set_flow_table_root_in, in, other_vport,
                  !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT));
  
-       return mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+       err = mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+       if (!err &&
+           ft->type == FS_FT_FDB &&
+           mlx5_lag_is_shared_fdb(dev) &&
+           mlx5_lag_is_master(dev)) {
+               err = mlx5_cmd_set_slave_root_fdb(dev,
+                                                 mlx5_lag_get_peer_mdev(dev),
+                                                 !disconnect, (!disconnect) ?
+                                                 ft->id : 0);
+               if (err && !disconnect) {
+                       MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+                       MLX5_SET(set_flow_table_root_in, in, table_id,
+                                ns->root_ft->id);
+                       mlx5_cmd_exec_in(dev, set_flow_table_root, in);
+               }
+       }
+
+       return err;
  }
  
  static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c

index d7bf0a3..8481027 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -413,7 +413,7 @@ static bool check_valid_spec(const struct mlx5_flow_spec *spec)
         return true;
  }
  
-static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
  {
         struct fs_node *root;
         struct mlx5_flow_namespace *ns;
@@ -1024,17 +1024,19 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
  static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
                               struct fs_prio *prio)
  {
-       struct mlx5_flow_table *next_ft;
+       struct mlx5_flow_table *next_ft, *first_ft;
         int err = 0;
  
         /* Connect_prev_fts and update_root_ft_create are mutually exclusive */
  
-       if (list_empty(&prio->node.children)) {
+       first_ft = list_first_entry_or_null(&prio->node.children,
+                                           struct mlx5_flow_table, node.list);
+       if (!first_ft || first_ft->level > ft->level) {
                 err = connect_prev_fts(dev, ft, prio);
                 if (err)
                         return err;
  
-               next_ft = find_next_chained_ft(prio);
+               next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
                 err = connect_fwd_rules(dev, ft, next_ft);
                 if (err)
                         return err;
@@ -2120,7 +2122,7 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft)
                                 node.list) == ft))
                 return 0;
  
-       next_ft = find_next_chained_ft(prio);
+       next_ft = find_next_ft(ft);
         err = connect_fwd_rules(dev, next_ft, ft);
         if (err)
                 return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h

index 7317cde..98240ba 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -294,6 +294,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
  int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports);
  void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev);
  
+struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
+
  #define fs_get_obj(v, _node)  {v = container_of((_node), typeof(*v), node); }
  
  #define fs_list_for_each_entry(pos, root)              \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c

index 9ff163c..9abeb80 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -626,8 +626,16 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
         }
         fw_reporter_ctx.err_synd = health->synd;
         fw_reporter_ctx.miss_counter = health->miss_counter;
-       devlink_health_report(health->fw_fatal_reporter,
-                             "FW fatal error reported", &fw_reporter_ctx);
+       if (devlink_health_report(health->fw_fatal_reporter,
+                                 "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
+               /* If recovery wasn't performed, due to grace period,
+                * unload the driver. This ensures that the driver
+                * closes all its resources and it is not subjected to
+                * requests from the kernel.
+                */
+               mlx5_core_err(dev, "Driver is in error state. Unloading\n");
+               mlx5_unload_one(dev);
+       }
  }
  
  static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c

index a126cbc..67571e5 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -314,8 +314,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
  
  static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
  {
-       struct ttc_params ttc_params = {};
-       int tt, err;
+       int err;
  
         priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
                                                MLX5_FLOW_NAMESPACE_KERNEL);
@@ -330,12 +329,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
                 priv->netdev->hw_features &= ~NETIF_F_NTUPLE;
         }
  
-       mlx5e_set_ttc_basic_params(priv, &ttc_params);
-       mlx5e_set_ttc_ft_params(&ttc_params);
-       for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
-               ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn;
-
-       err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+       err = mlx5e_create_ttc_table(priv);
         if (err) {
                 netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n",
                            err);
@@ -352,21 +346,20 @@ err_destroy_arfs_tables:
  
  static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
  {
-       mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
+       mlx5e_destroy_ttc_table(priv);
         mlx5e_arfs_destroy_tables(priv);
  }
  
  static int mlx5i_init_rx(struct mlx5e_priv *priv)
  {
         struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5e_lro_param lro_param;
         int err;
  
-       priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL);
+       priv->rx_res = mlx5e_rx_res_alloc();
         if (!priv->rx_res)
                 return -ENOMEM;
  
-       mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels);
-
         mlx5e_create_q_counters(priv);
  
         err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
@@ -375,41 +368,26 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
                 goto err_destroy_q_counters;
         }
  
-       err = mlx5e_create_indirect_rqt(priv);
+       lro_param = mlx5e_get_lro_param(&priv->channels.params);
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+                               priv->max_nch, priv->drop_rq.rqn, &lro_param,
+                               priv->channels.params.num_channels);
         if (err)
                 goto err_close_drop_rq;
  
-       err = mlx5e_create_direct_rqts(priv);
-       if (err)
-               goto err_destroy_indirect_rqts;
-
-       err = mlx5e_create_indirect_tirs(priv, false);
-       if (err)
-               goto err_destroy_direct_rqts;
-
-       err = mlx5e_create_direct_tirs(priv);
-       if (err)
-               goto err_destroy_indirect_tirs;
-
         err = mlx5i_create_flow_steering(priv);
         if (err)
-               goto err_destroy_direct_tirs;
+               goto err_destroy_rx_res;
  
         return 0;
  
-err_destroy_direct_tirs:
-       mlx5e_destroy_direct_tirs(priv);
-err_destroy_indirect_tirs:
-       mlx5e_destroy_indirect_tirs(priv);
-err_destroy_direct_rqts:
-       mlx5e_destroy_direct_rqts(priv);
-err_destroy_indirect_rqts:
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+err_destroy_rx_res:
+       mlx5e_rx_res_destroy(priv->rx_res);
  err_close_drop_rq:
         mlx5e_close_drop_rq(&priv->drop_rq);
  err_destroy_q_counters:
         mlx5e_destroy_q_counters(priv);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
         priv->rx_res = NULL;
         return err;
  }
@@ -417,13 +395,10 @@ err_destroy_q_counters:
  static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
  {
         mlx5i_destroy_flow_steering(priv);
-       mlx5e_destroy_direct_tirs(priv);
-       mlx5e_destroy_indirect_tirs(priv);
-       mlx5e_destroy_direct_rqts(priv);
-       mlx5e_rqt_destroy(&priv->rx_res->indir_rqt);
+       mlx5e_rx_res_destroy(priv->rx_res);
         mlx5e_close_drop_rq(&priv->drop_rq);
         mlx5e_destroy_q_counters(priv);
-       kvfree(priv->rx_res);
+       mlx5e_rx_res_free(priv->rx_res);
         priv->rx_res = NULL;
  }
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c

index 5c043c5..f4dfa55 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -32,7 +32,9 @@
  
  #include <linux/netdevice.h>
  #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
  #include <linux/mlx5/vport.h>
+#include "lib/devcom.h"
  #include "mlx5_core.h"
  #include "eswitch.h"
  #include "lag.h"
@@ -45,7 +47,7 @@
  static DEFINE_SPINLOCK(lag_lock);
  
  static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
-                              u8 remap_port2)
+                              u8 remap_port2, bool shared_fdb)
  {
         u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
         void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
@@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
  
         MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
         MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+       MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
  
         return mlx5_cmd_exec_in(dev, create_lag, in);
  }
@@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
  }
  
  static int mlx5_create_lag(struct mlx5_lag *ldev,
-                          struct lag_tracker *tracker)
+                          struct lag_tracker *tracker,
+                          bool shared_fdb)
  {
         struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+       u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
         int err;
  
         mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
                                        &ldev->v2p_map[MLX5_LAG_P2]);
  
-       mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
-                      ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
+       mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
+                      ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
+                      shared_fdb);
  
         err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
-                                 ldev->v2p_map[MLX5_LAG_P2]);
-       if (err)
+                                 ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
+       if (err) {
                 mlx5_core_err(dev0,
                               "Failed to create LAG (%d)\n",
                               err);
+               return err;
+       }
+
+       if (shared_fdb) {
+               err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
+                                                             dev1->priv.eswitch);
+               if (err)
+                       mlx5_core_err(dev0, "Can't enable single FDB mode\n");
+               else
+                       mlx5_core_info(dev0, "Operation mode is single FDB\n");
+       }
+
+       if (err) {
+               MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+               if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
+                       mlx5_core_err(dev0,
+                                     "Failed to deactivate RoCE LAG; driver restart required\n");
+       }
+
         return err;
  }
  
  int mlx5_activate_lag(struct mlx5_lag *ldev,
                       struct lag_tracker *tracker,
-                     u8 flags)
+                     u8 flags,
+                     bool shared_fdb)
  {
         bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
         struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
         int err;
  
-       err = mlx5_create_lag(ldev, tracker);
+       err = mlx5_create_lag(ldev, tracker, shared_fdb);
         if (err) {
                 if (roce_lag) {
                         mlx5_core_err(dev0,
@@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
         }
  
         ldev->flags |= flags;
+       ldev->shared_fdb = shared_fdb;
         return 0;
  }
  
@@ -278,6 +306,12 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
  
         ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
  
+       if (ldev->shared_fdb) {
+               mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
+                                                        ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
+               ldev->shared_fdb = false;
+       }
+
         MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
         err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
         if (err) {
@@ -333,6 +367,10 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
                 if (!ldev->pf[i].dev)
                         continue;
  
+               if (ldev->pf[i].dev->priv.flags &
+                   MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
+                       continue;
+
                 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
                 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
         }
@@ -342,12 +380,15 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
  {
         struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
         struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+       bool shared_fdb = ldev->shared_fdb;
         bool roce_lag;
         int err;
  
         roce_lag = __mlx5_lag_is_roce(ldev);
  
-       if (roce_lag) {
+       if (shared_fdb) {
+               mlx5_lag_remove_devices(ldev);
+       } else if (roce_lag) {
                 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
                         dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
                         mlx5_rescan_drivers_locked(dev0);
@@ -359,8 +400,34 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
         if (err)
                 return;
  
-       if (roce_lag)
+       if (shared_fdb || roce_lag)
                 mlx5_lag_add_devices(ldev);
+
+       if (shared_fdb) {
+               if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+                       mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+               if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
+                       mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+       }
+}
+
+static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
+{
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
+
+       if (is_mdev_switchdev_mode(dev0) &&
+           is_mdev_switchdev_mode(dev1) &&
+           mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
+           mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
+           mlx5_devcom_is_paired(dev0->priv.devcom,
+                                 MLX5_DEVCOM_ESW_OFFLOADS) &&
+           MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
+           MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
+           MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
+               return true;
+
+       return false;
  }
  
  static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -371,14 +438,17 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
         bool do_bond, roce_lag;
         int err;
  
-       if (!mlx5_lag_is_ready(ldev))
-               return;
-
-       tracker = ldev->tracker;
+       if (!mlx5_lag_is_ready(ldev)) {
+               do_bond = false;
+       } else {
+               tracker = ldev->tracker;
  
-       do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+               do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
+       }
  
         if (do_bond && !__mlx5_lag_is_active(ldev)) {
+               bool shared_fdb = mlx5_shared_fdb_supported(ldev);
+
                 roce_lag = !mlx5_sriov_is_enabled(dev0) &&
                            !mlx5_sriov_is_enabled(dev1);
  
@@ -388,23 +458,40 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
                            dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
  #endif
  
-               if (roce_lag)
+               if (shared_fdb || roce_lag)
                         mlx5_lag_remove_devices(ldev);
  
                 err = mlx5_activate_lag(ldev, &tracker,
                                         roce_lag ? MLX5_LAG_FLAG_ROCE :
-                                       MLX5_LAG_FLAG_SRIOV);
+                                                  MLX5_LAG_FLAG_SRIOV,
+                                       shared_fdb);
                 if (err) {
-                       if (roce_lag)
+                       if (shared_fdb || roce_lag)
                                 mlx5_lag_add_devices(ldev);
  
                         return;
-               }
-
-               if (roce_lag) {
+               } else if (roce_lag) {
                         dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
                         mlx5_rescan_drivers_locked(dev0);
                         mlx5_nic_vport_enable_roce(dev1);
+               } else if (shared_fdb) {
+                       dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+                       mlx5_rescan_drivers_locked(dev0);
+
+                       err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+                       if (!err)
+                               err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+
+                       if (err) {
+                               dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
+                               mlx5_rescan_drivers_locked(dev0);
+                               mlx5_deactivate_lag(ldev);
+                               mlx5_lag_add_devices(ldev);
+                               mlx5_eswitch_reload_reps(dev0->priv.eswitch);
+                               mlx5_eswitch_reload_reps(dev1->priv.eswitch);
+                               mlx5_core_err(dev0, "Failed to enable lag\n");
+                               return;
+                       }
                 }
         } else if (do_bond && __mlx5_lag_is_active(ldev)) {
                 mlx5_modify_lag(ldev, &tracker);
@@ -418,21 +505,48 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
         queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
  }
  
+static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
+                                   struct mlx5_core_dev *dev1)
+{
+       if (dev0)
+               mlx5_esw_lock(dev0->priv.eswitch);
+       if (dev1)
+               mlx5_esw_lock(dev1->priv.eswitch);
+}
+
+static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
+                                     struct mlx5_core_dev *dev1)
+{
+       if (dev1)
+               mlx5_esw_unlock(dev1->priv.eswitch);
+       if (dev0)
+               mlx5_esw_unlock(dev0->priv.eswitch);
+}
+
  static void mlx5_do_bond_work(struct work_struct *work)
  {
         struct delayed_work *delayed_work = to_delayed_work(work);
         struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
                                              bond_work);
+       struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
         int status;
  
         status = mlx5_dev_list_trylock();
         if (!status) {
-               /* 1 sec delay. */
                 mlx5_queue_bond_work(ldev, HZ);
                 return;
         }
  
+       if (ldev->mode_changes_in_progress) {
+               mlx5_dev_list_unlock();
+               mlx5_queue_bond_work(ldev, HZ);
+               return;
+       }
+
+       mlx5_lag_lock_eswitches(dev0, dev1);
         mlx5_do_bond(ldev);
+       mlx5_lag_unlock_eswitches(dev0, dev1);
         mlx5_dev_list_unlock();
  }
  
@@ -630,7 +744,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
  }
  
  /* Must be called with intf_mutex held */
-static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
+static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
  {
         struct mlx5_lag *ldev = NULL;
         struct mlx5_core_dev *tmp_dev;
@@ -638,7 +752,7 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
         if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
             !MLX5_CAP_GEN(dev, lag_master) ||
             MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
-               return;
+               return 0;
  
         tmp_dev = mlx5_get_next_phys_dev(dev);
         if (tmp_dev)
@@ -648,15 +762,17 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
                 ldev = mlx5_lag_dev_alloc(dev);
                 if (!ldev) {
                         mlx5_core_err(dev, "Failed to alloc lag dev\n");
-                       return;
+                       return 0;
                 }
         } else {
+               if (ldev->mode_changes_in_progress)
+                       return -EAGAIN;
                 mlx5_ldev_get(ldev);
         }
  
         mlx5_ldev_add_mdev(ldev, dev);
  
-       return;
+       return 0;
  }
  
  void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
@@ -667,7 +783,13 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
         if (!ldev)
                 return;
  
+recheck:
         mlx5_dev_list_lock();
+       if (ldev->mode_changes_in_progress) {
+               mlx5_dev_list_unlock();
+               msleep(100);
+               goto recheck;
+       }
         mlx5_ldev_remove_mdev(ldev, dev);
         mlx5_dev_list_unlock();
         mlx5_ldev_put(ldev);
@@ -675,8 +797,16 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
  
  void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
  {
+       int err;
+
+recheck:
         mlx5_dev_list_lock();
-       __mlx5_lag_dev_add_mdev(dev);
+       err = __mlx5_lag_dev_add_mdev(dev);
+       if (err) {
+               mlx5_dev_list_unlock();
+               msleep(100);
+               goto recheck;
+       }
         mlx5_dev_list_unlock();
  }
  
@@ -690,11 +820,11 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
         if (!ldev)
                 return;
  
-       if (__mlx5_lag_is_active(ldev))
-               mlx5_disable_lag(ldev);
-
         mlx5_ldev_remove_netdev(ldev, netdev);
         ldev->flags &= ~MLX5_LAG_FLAG_READY;
+
+       if (__mlx5_lag_is_active(ldev))
+               mlx5_queue_bond_work(ldev, 0);
  }
  
  /* Must be called with intf_mutex held */
@@ -716,6 +846,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
  
         if (i >= MLX5_MAX_PORTS)
                 ldev->flags |= MLX5_LAG_FLAG_READY;
+       mlx5_queue_bond_work(ldev, 0);
  }
  
  bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
@@ -746,6 +877,21 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
  }
  EXPORT_SYMBOL(mlx5_lag_is_active);
  
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+       bool res;
+
+       spin_lock(&lag_lock);
+       ldev = mlx5_lag_dev(dev);
+       res = ldev && __mlx5_lag_is_active(ldev) &&
+               dev == ldev->pf[MLX5_LAG_P1].dev;
+       spin_unlock(&lag_lock);
+
+       return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_master);
+
  bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
  {
         struct mlx5_lag *ldev;
@@ -760,19 +906,50 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
  }
  EXPORT_SYMBOL(mlx5_lag_is_sriov);
  
-void mlx5_lag_update(struct mlx5_core_dev *dev)
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+       bool res;
+
+       spin_lock(&lag_lock);
+       ldev = mlx5_lag_dev(dev);
+       res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
+       spin_unlock(&lag_lock);
+
+       return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
+
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
  {
+       struct mlx5_core_dev *dev0;
+       struct mlx5_core_dev *dev1;
         struct mlx5_lag *ldev;
  
         mlx5_dev_list_lock();
+
         ldev = mlx5_lag_dev(dev);
-       if (!ldev)
-               goto unlock;
+       dev0 = ldev->pf[MLX5_LAG_P1].dev;
+       dev1 = ldev->pf[MLX5_LAG_P2].dev;
  
-       mlx5_do_bond(ldev);
+       ldev->mode_changes_in_progress++;
+       if (__mlx5_lag_is_active(ldev)) {
+               mlx5_lag_lock_eswitches(dev0, dev1);
+               mlx5_disable_lag(ldev);
+               mlx5_lag_unlock_eswitches(dev0, dev1);
+       }
+       mlx5_dev_list_unlock();
+}
  
-unlock:
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
+{
+       struct mlx5_lag *ldev;
+
+       mlx5_dev_list_lock();
+       ldev = mlx5_lag_dev(dev);
+       ldev->mode_changes_in_progress--;
         mlx5_dev_list_unlock();
+       mlx5_queue_bond_work(ldev, 0);
  }
  
  struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
@@ -827,6 +1004,26 @@ unlock:
  }
  EXPORT_SYMBOL(mlx5_lag_get_slave_port);
  
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_dev *peer_dev = NULL;
+       struct mlx5_lag *ldev;
+
+       spin_lock(&lag_lock);
+       ldev = mlx5_lag_dev(dev);
+       if (!ldev)
+               goto unlock;
+
+       peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
+                          ldev->pf[MLX5_LAG_P2].dev :
+                          ldev->pf[MLX5_LAG_P1].dev;
+
+unlock:
+       spin_unlock(&lag_lock);
+       return peer_dev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
+
  int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
                                  u64 *values,
                                  int num_counters,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h

index 191392c..d4bae52 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -39,6 +39,8 @@ struct lag_tracker {
   */
  struct mlx5_lag {
         u8                        flags;
+       int                       mode_changes_in_progress;
+       bool                      shared_fdb;
         u8                        v2p_map[MLX5_MAX_PORTS];
         struct kref               ref;
         struct lag_func           pf[MLX5_MAX_PORTS];
@@ -71,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
                      struct lag_tracker *tracker);
  int mlx5_activate_lag(struct mlx5_lag *ldev,
                       struct lag_tracker *tracker,
-                     u8 flags);
+                     u8 flags,
+                     bool shared_fdb);
  int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
                                 struct net_device *ndev);
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c

index c4bf8b6..011b639 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
                 struct lag_tracker tracker;
  
                 tracker = ldev->tracker;
-               mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
+               mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
         }
  
         mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c

new file mode 100644 (file)

index 0000000..749d17c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES.
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/fs_ttc.h"
+
+#define MLX5_TTC_NUM_GROUPS    3
+#define MLX5_TTC_GROUP1_SIZE   (BIT(3) + MLX5_NUM_TUNNEL_TT)
+#define MLX5_TTC_GROUP2_SIZE    BIT(1)
+#define MLX5_TTC_GROUP3_SIZE    BIT(0)
+#define MLX5_TTC_TABLE_SIZE    (MLX5_TTC_GROUP1_SIZE +\
+                                MLX5_TTC_GROUP2_SIZE +\
+                                MLX5_TTC_GROUP3_SIZE)
+
+#define MLX5_INNER_TTC_NUM_GROUPS      3
+#define MLX5_INNER_TTC_GROUP1_SIZE     BIT(3)
+#define MLX5_INNER_TTC_GROUP2_SIZE     BIT(1)
+#define MLX5_INNER_TTC_GROUP3_SIZE     BIT(0)
+#define MLX5_INNER_TTC_TABLE_SIZE      (MLX5_INNER_TTC_GROUP1_SIZE +\
+                                        MLX5_INNER_TTC_GROUP2_SIZE +\
+                                        MLX5_INNER_TTC_GROUP3_SIZE)
+
+/* L3/L4 traffic type classifier */
+struct mlx5_ttc_table {
+       int num_groups;
+       struct mlx5_flow_table *t;
+       struct mlx5_flow_group **g;
+       struct mlx5_ttc_rule rules[MLX5_NUM_TT];
+       struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc)
+{
+       return ttc->t;
+}
+
+static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc)
+{
+       int i;
+
+       for (i = 0; i < MLX5_NUM_TT; i++) {
+               if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) {
+                       mlx5_del_flow_rules(ttc->rules[i].rule);
+                       ttc->rules[i].rule = NULL;
+               }
+       }
+
+       for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) {
+               if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) {
+                       mlx5_del_flow_rules(ttc->tunnel_rules[i]);
+                       ttc->tunnel_rules[i] = NULL;
+               }
+       }
+}
+
+struct mlx5_etype_proto {
+       u16 etype;
+       u8 proto;
+};
+
+static struct mlx5_etype_proto ttc_rules[] = {
+       [MLX5_TT_IPV4_TCP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_TCP,
+       },
+       [MLX5_TT_IPV6_TCP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_TCP,
+       },
+       [MLX5_TT_IPV4_UDP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_UDP,
+       },
+       [MLX5_TT_IPV6_UDP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_UDP,
+       },
+       [MLX5_TT_IPV4_IPSEC_AH] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_AH,
+       },
+       [MLX5_TT_IPV6_IPSEC_AH] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_AH,
+       },
+       [MLX5_TT_IPV4_IPSEC_ESP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_ESP,
+       },
+       [MLX5_TT_IPV6_IPSEC_ESP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_ESP,
+       },
+       [MLX5_TT_IPV4] = {
+               .etype = ETH_P_IP,
+               .proto = 0,
+       },
+       [MLX5_TT_IPV6] = {
+               .etype = ETH_P_IPV6,
+               .proto = 0,
+       },
+       [MLX5_TT_ANY] = {
+               .etype = 0,
+               .proto = 0,
+       },
+};
+
+static struct mlx5_etype_proto ttc_tunnel_rules[] = {
+       [MLX5_TT_IPV4_GRE] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_GRE,
+       },
+       [MLX5_TT_IPV6_GRE] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_GRE,
+       },
+       [MLX5_TT_IPV4_IPIP] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_IPIP,
+       },
+       [MLX5_TT_IPV6_IPIP] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_IPIP,
+       },
+       [MLX5_TT_IPV4_IPV6] = {
+               .etype = ETH_P_IP,
+               .proto = IPPROTO_IPV6,
+       },
+       [MLX5_TT_IPV6_IPV6] = {
+               .etype = ETH_P_IPV6,
+               .proto = IPPROTO_IPV6,
+       },
+
+};
+
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt)
+{
+       return ttc_tunnel_rules[tt].proto;
+}
+
+static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev,
+                                          u8 proto_type)
+{
+       switch (proto_type) {
+       case IPPROTO_GRE:
+               return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
+       case IPPROTO_IPIP:
+       case IPPROTO_IPV6:
+               return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
+                       MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx));
+       default:
+               return false;
+       }
+}
+
+static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev)
+{
+       int tt;
+
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               if (mlx5_tunnel_proto_supported_rx(mdev,
+                                                  ttc_tunnel_rules[tt].proto))
+                       return true;
+       }
+       return false;
+}
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
+{
+       return (mlx5_tunnel_any_rx_proto_supported(mdev) &&
+               MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+                                         ft_field_support.inner_ip_version));
+}
+
+static u8 mlx5_etype_to_ipv(u16 ethertype)
+{
+       if (ethertype == ETH_P_IP)
+               return 4;
+
+       if (ethertype == ETH_P_IPV6)
+               return 6;
+
+       return 0;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft,
+                      struct mlx5_flow_destination *dest, u16 etype, u8 proto)
+{
+       int match_ipv_outer =
+               MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+                                         ft_field_support.outer_ip_version);
+       MLX5_DECLARE_FLOW_ACT(flow_act);
+       struct mlx5_flow_handle *rule;
+       struct mlx5_flow_spec *spec;
+       int err = 0;
+       u8 ipv;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return ERR_PTR(-ENOMEM);
+
+       if (proto) {
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol);
+               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto);
+       }
+
+       ipv = mlx5_etype_to_ipv(etype);
+       if (match_ipv_outer && ipv) {
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv);
+       } else if (etype) {
+               spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype);
+               MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
+       }
+
+       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+       if (IS_ERR(rule)) {
+               err = PTR_ERR(rule);
+               mlx5_core_err(dev, "%s: add rule failed\n", __func__);
+       }
+
+       kvfree(spec);
+       return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev,
+                                        struct ttc_params *params,
+                                        struct mlx5_ttc_table *ttc)
+{
+       struct mlx5_flow_handle **trules;
+       struct mlx5_ttc_rule *rules;
+       struct mlx5_flow_table *ft;
+       int tt;
+       int err;
+
+       ft = ttc->t;
+       rules = ttc->rules;
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               struct mlx5_ttc_rule *rule = &rules[tt];
+
+               rule->rule = mlx5_generate_ttc_rule(dev, ft, &params->dests[tt],
+                                                   ttc_rules[tt].etype,
+                                                   ttc_rules[tt].proto);
+               if (IS_ERR(rule->rule)) {
+                       err = PTR_ERR(rule->rule);
+                       rule->rule = NULL;
+                       goto del_rules;
+               }
+               rule->default_dest = params->dests[tt];
+       }
+
+       if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev))
+               return 0;
+
+       trules    = ttc->tunnel_rules;
+       for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
+               if (!mlx5_tunnel_proto_supported_rx(dev,
+                                                   ttc_tunnel_rules[tt].proto))
+                       continue;
+               trules[tt] = mlx5_generate_ttc_rule(dev, ft,
+                                                   &params->tunnel_dests[tt],
+                                                   ttc_tunnel_rules[tt].etype,
+                                                   ttc_tunnel_rules[tt].proto);
+               if (IS_ERR(trules[tt])) {
+                       err = PTR_ERR(trules[tt]);
+                       trules[tt] = NULL;
+                       goto del_rules;
+               }
+       }
+
+       return 0;
+
+del_rules:
+       mlx5_cleanup_ttc_rules(ttc);
+       return err;
+}
+
+static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc,
+                                       bool use_ipv)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       int ix = 0;
+       u32 *in;
+       int err;
+       u8 *mc;
+
+       ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL);
+       if (!ttc->g)
+               return -ENOMEM;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in) {
+               kfree(ttc->g);
+               ttc->g = NULL;
+               return -ENOMEM;
+       }
+
+       /* L4 Group */
+       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+       MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol);
+       if (use_ipv)
+               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version);
+       else
+               MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype);
+       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_TTC_GROUP1_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* L3 Group */
+       MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_TTC_GROUP2_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* Any Group */
+       memset(in, 0, inlen);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_TTC_GROUP3_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       kvfree(in);
+       return 0;
+
+err:
+       err = PTR_ERR(ttc->g[ttc->num_groups]);
+       ttc->g[ttc->num_groups] = NULL;
+       kvfree(in);
+
+       return err;
+}
+
+static struct mlx5_flow_handle *
+mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev,
+                            struct mlx5_flow_table *ft,
+                            struct mlx5_flow_destination *dest,
+                            u16 etype, u8 proto)
+{
+       MLX5_DECLARE_FLOW_ACT(flow_act);
+       struct mlx5_flow_handle *rule;
+       struct mlx5_flow_spec *spec;
+       int err = 0;
+       u8 ipv;
+
+       spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+       if (!spec)
+               return ERR_PTR(-ENOMEM);
+
+       ipv = mlx5_etype_to_ipv(etype);
+       if (etype && ipv) {
+               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version);
+               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv);
+       }
+
+       if (proto) {
+               spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS;
+               MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol);
+               MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto);
+       }
+
+       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
+       if (IS_ERR(rule)) {
+               err = PTR_ERR(rule);
+               mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__);
+       }
+
+       kvfree(spec);
+       return err ? ERR_PTR(err) : rule;
+}
+
+static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev,
+                                              struct ttc_params *params,
+                                              struct mlx5_ttc_table *ttc)
+{
+       struct mlx5_ttc_rule *rules;
+       struct mlx5_flow_table *ft;
+       int err;
+       int tt;
+
+       ft = ttc->t;
+       rules = ttc->rules;
+
+       for (tt = 0; tt < MLX5_NUM_TT; tt++) {
+               struct mlx5_ttc_rule *rule = &rules[tt];
+
+               rule->rule = mlx5_generate_inner_ttc_rule(dev, ft,
+                                                         &params->dests[tt],
+                                                         ttc_rules[tt].etype,
+                                                         ttc_rules[tt].proto);
+               if (IS_ERR(rule->rule)) {
+                       err = PTR_ERR(rule->rule);
+                       rule->rule = NULL;
+                       goto del_rules;
+               }
+               rule->default_dest = params->dests[tt];
+       }
+
+       return 0;
+
+del_rules:
+
+       mlx5_cleanup_ttc_rules(ttc);
+       return err;
+}
+
+static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       int ix = 0;
+       u32 *in;
+       int err;
+       u8 *mc;
+
+       ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g),
+                        GFP_KERNEL);
+       if (!ttc->g)
+               return -ENOMEM;
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in) {
+               kfree(ttc->g);
+               ttc->g = NULL;
+               return -ENOMEM;
+       }
+
+       /* L4 Group */
+       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol);
+       MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version);
+       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_INNER_TTC_GROUP1_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* L3 Group */
+       MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_INNER_TTC_GROUP2_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       /* Any Group */
+       memset(in, 0, inlen);
+       MLX5_SET_CFG(in, start_flow_index, ix);
+       ix += MLX5_INNER_TTC_GROUP3_SIZE;
+       MLX5_SET_CFG(in, end_flow_index, ix - 1);
+       ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in);
+       if (IS_ERR(ttc->g[ttc->num_groups]))
+               goto err;
+       ttc->num_groups++;
+
+       kvfree(in);
+       return 0;
+
+err:
+       err = PTR_ERR(ttc->g[ttc->num_groups]);
+       ttc->g[ttc->num_groups] = NULL;
+       kvfree(in);
+
+       return err;
+}
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+                                                  struct ttc_params *params)
+{
+       struct mlx5_ttc_table *ttc;
+       int err;
+
+       ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+       if (!ttc)
+               return ERR_PTR(-ENOMEM);
+
+       WARN_ON_ONCE(params->ft_attr.max_fte);
+       params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE;
+       ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+       if (IS_ERR(ttc->t)) {
+               err = PTR_ERR(ttc->t);
+               kvfree(ttc);
+               return ERR_PTR(err);
+       }
+
+       err = mlx5_create_inner_ttc_table_groups(ttc);
+       if (err)
+               goto destroy_ft;
+
+       err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc);
+       if (err)
+               goto destroy_ft;
+
+       return ttc;
+
+destroy_ft:
+       mlx5_destroy_ttc_table(ttc);
+       return ERR_PTR(err);
+}
+
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc)
+{
+       int i;
+
+       mlx5_cleanup_ttc_rules(ttc);
+       for (i = ttc->num_groups - 1; i >= 0; i--) {
+               if (!IS_ERR_OR_NULL(ttc->g[i]))
+                       mlx5_destroy_flow_group(ttc->g[i]);
+               ttc->g[i] = NULL;
+       }
+
+       kfree(ttc->g);
+       mlx5_destroy_flow_table(ttc->t);
+       kvfree(ttc);
+}
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+                                            struct ttc_params *params)
+{
+       bool match_ipv_outer =
+               MLX5_CAP_FLOWTABLE_NIC_RX(dev,
+                                         ft_field_support.outer_ip_version);
+       struct mlx5_ttc_table *ttc;
+       int err;
+
+       ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL);
+       if (!ttc)
+               return ERR_PTR(-ENOMEM);
+
+       WARN_ON_ONCE(params->ft_attr.max_fte);
+       params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE;
+       ttc->t = mlx5_create_flow_table(params->ns, &params->ft_attr);
+       if (IS_ERR(ttc->t)) {
+               err = PTR_ERR(ttc->t);
+               kvfree(ttc);
+               return ERR_PTR(err);
+       }
+
+       err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer);
+       if (err)
+               goto destroy_ft;
+
+       err = mlx5_generate_ttc_table_rules(dev, params, ttc);
+       if (err)
+               goto destroy_ft;
+
+       return ttc;
+
+destroy_ft:
+       mlx5_destroy_ttc_table(ttc);
+       return ERR_PTR(err);
+}
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+                     struct mlx5_flow_destination *new_dest)
+{
+       return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest,
+                                           NULL);
+}
+
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+                         enum mlx5_traffic_types type)
+{
+       struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest;
+
+       WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR,
+                 "TTC[%d] default dest is not setup yet", type);
+
+       return *dest;
+}
+
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+                             enum mlx5_traffic_types type)
+{
+       struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type);
+
+       return mlx5_ttc_fwd_dest(ttc, type, &dest);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h

new file mode 100644 (file)

index 0000000..ce95be8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __ML5_FS_TTC_H__
+#define __ML5_FS_TTC_H__
+
+#include <linux/mlx5/fs.h>
+
+enum mlx5_traffic_types {
+       MLX5_TT_IPV4_TCP,
+       MLX5_TT_IPV6_TCP,
+       MLX5_TT_IPV4_UDP,
+       MLX5_TT_IPV6_UDP,
+       MLX5_TT_IPV4_IPSEC_AH,
+       MLX5_TT_IPV6_IPSEC_AH,
+       MLX5_TT_IPV4_IPSEC_ESP,
+       MLX5_TT_IPV6_IPSEC_ESP,
+       MLX5_TT_IPV4,
+       MLX5_TT_IPV6,
+       MLX5_TT_ANY,
+       MLX5_NUM_TT,
+       MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY,
+};
+
+enum mlx5_tunnel_types {
+       MLX5_TT_IPV4_GRE,
+       MLX5_TT_IPV6_GRE,
+       MLX5_TT_IPV4_IPIP,
+       MLX5_TT_IPV6_IPIP,
+       MLX5_TT_IPV4_IPV6,
+       MLX5_TT_IPV6_IPV6,
+       MLX5_NUM_TUNNEL_TT,
+};
+
+struct mlx5_ttc_rule {
+       struct mlx5_flow_handle *rule;
+       struct mlx5_flow_destination default_dest;
+};
+
+struct mlx5_ttc_table;
+
+struct ttc_params {
+       struct mlx5_flow_namespace *ns;
+       struct mlx5_flow_table_attr ft_attr;
+       struct mlx5_flow_destination dests[MLX5_NUM_TT];
+       bool   inner_ttc;
+       struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT];
+};
+
+struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev,
+                                            struct ttc_params *params);
+void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc);
+
+struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev,
+                                                  struct ttc_params *params);
+
+int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type,
+                     struct mlx5_flow_destination *new_dest);
+struct mlx5_flow_destination
+mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc,
+                         enum mlx5_traffic_types type);
+int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc,
+                             enum mlx5_traffic_types type);
+
+bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev);
+u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt);
+
+#endif /* __MLX5_FS_TTC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c

index eb1b316..6fe5603 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1179,6 +1179,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
                 goto err_ec;
         }
  
+       mlx5_lag_add_mdev(dev);
         err = mlx5_sriov_attach(dev);
         if (err) {
                 mlx5_core_err(dev, "sriov init failed %d\n", err);
@@ -1186,11 +1187,11 @@ static int mlx5_load(struct mlx5_core_dev *dev)
         }
  
         mlx5_sf_dev_table_create(dev);
-       mlx5_lag_add_mdev(dev);
  
         return 0;
  
  err_sriov:
+       mlx5_lag_remove_mdev(dev);
         mlx5_ec_cleanup(dev);
  err_ec:
         mlx5_sf_hw_table_destroy(dev);
@@ -1222,9 +1223,9 @@ err_irq_table:
  
  static void mlx5_unload(struct mlx5_core_dev *dev)
  {
-       mlx5_lag_remove_mdev(dev);
         mlx5_sf_dev_table_destroy(dev);
         mlx5_sriov_detach(dev);
+       mlx5_lag_remove_mdev(dev);
         mlx5_ec_cleanup(dev);
         mlx5_sf_hw_table_destroy(dev);
         mlx5_vhca_event_stop(dev);
@@ -1271,7 +1272,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
  
         set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
  
-       err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
+       err = mlx5_devlink_register(priv_to_devlink(dev));
         if (err)
                 goto err_devlink_reg;
  
@@ -1452,7 +1453,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
         struct devlink *devlink;
         int err;
  
-       devlink = mlx5_devlink_alloc();
+       devlink = mlx5_devlink_alloc(&pdev->dev);
         if (!devlink) {
                 dev_err(&pdev->dev, "devlink alloc failed\n");
                 return -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h

index 343807a..14ffd74 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -168,6 +168,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
  void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
  void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
  void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
+void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
+void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
  
  int mlx5_events_init(struct mlx5_core_dev *dev);
  void mlx5_events_cleanup(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c

index 42c8ee0..052f480 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
@@ -14,7 +14,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
         struct devlink *devlink;
         int err;
  
-       devlink = mlx5_devlink_alloc();
+       devlink = mlx5_devlink_alloc(&adev->dev);
         if (!devlink)
                 return -ENOMEM;
  
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c

index 1be0487..720195c 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -164,12 +164,12 @@ static bool mlx5_sf_is_active(const struct mlx5_sf *sf)
         return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE;
  }
  
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
                                       enum devlink_port_fn_state *state,
                                       enum devlink_port_fn_opstate *opstate,
                                       struct netlink_ext_ack *extack)
  {
-       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
         struct mlx5_sf_table *table;
         struct mlx5_sf *sf;
         int err = 0;
@@ -248,11 +248,11 @@ out:
         return err;
  }
  
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
                                       enum devlink_port_fn_state state,
                                       struct netlink_ext_ack *extack)
  {
-       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink);
         struct mlx5_sf_table *table;
         struct mlx5_sf *sf;
         int err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h

index 81ce13b..3a480e0 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
@@ -24,11 +24,11 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink,
                              unsigned int *new_port_index);
  int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index,
                              struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port,
                                       enum devlink_port_fn_state *state,
                                       enum devlink_port_fn_opstate *opstate,
                                       struct netlink_ext_ack *extack);
-int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port,
+int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port,
                                       enum devlink_port_fn_state state,
                                       struct netlink_ext_ack *extack);
  #else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c

index 12cf323..754f892 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -790,7 +790,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
  
         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
         MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
         MLX5_SET(cqc, cqc, uar_page, uar->index);
         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
                  MLX5_ADAPTER_PAGE_SHIFT);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c

index e775f08..f080fab 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1927,7 +1927,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
  
         if (!reload) {
                 alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size;
-               devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size);
+               devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size,
+                                       mlxsw_bus_info->dev);
                 if (!devlink) {
                         err = -ENOMEM;
                         goto err_devlink_alloc;
@@ -1974,7 +1975,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
                 goto err_emad_init;
  
         if (!reload) {
-               err = devlink_register(devlink, mlxsw_bus_info->dev);
+               err = devlink_register(devlink);
                 if (err)
                         goto err_devlink_register;
         }
diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig

index d39ae2a..7bdbb2d 100644 (file)
--- a/drivers/net/ethernet/microchip/sparx5/Kconfig
+++ b/drivers/net/ethernet/microchip/sparx5/Kconfig
@@ -1,6 +1,5 @@
  config SPARX5_SWITCH
         tristate "Sparx5 switch driver"
-       depends on BRIDGE || BRIDGE=n
         depends on NET_SWITCHDEV
         depends on HAS_IOMEM
         depends on OF
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c

index 9d485a9..cb68eaa 100644 (file)
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
@@ -13,19 +13,26 @@
   */
  #define VSTAX 73
  
-static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
+#define ifh_encode_bitfield(ifh, value, pos, _width)                   \
+       ({                                                              \
+               u32 width = (_width);                                   \
+                                                                       \
+               /* Max width is 5 bytes - 40 bits. In worst case this will
+                * spread over 6 bytes - 48 bits
+                */                                                     \
+               compiletime_assert(width <= 40,                         \
+                                  "Unsupported width, must be <= 40"); \
+               __ifh_encode_bitfield((ifh), (value), (pos), width);    \
+       })
+
+static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width)
  {
         u8 *ifh_hdr = ifh;
         /* Calculate the Start IFH byte position of this IFH bit position */
         u32 byte = (35 - (pos / 8));
         /* Calculate the Start bit position in the Start IFH byte */
         u32 bit  = (pos % 8);
-       u64 encode = GENMASK(bit + width - 1, bit) & (value << bit);
-
-       /* Max width is 5 bytes - 40 bits. In worst case this will
-        * spread over 6 bytes - 48 bits
-        */
-       compiletime_assert(width <= 40, "Unsupported width, must be <= 40");
+       u64 encode = GENMASK_ULL(bit + width - 1, bit) & (value << bit);
  
         /* The b0-b7 goes into the start IFH byte */
         if (encode & 0xFF)
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c

index 4bd7e9d..aa41c9c 100644 (file)
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -1103,7 +1103,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
         if (!np && !pdev->dev.platform_data)
                 return -ENODEV;
  
-       devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot));
+       devlink =
+               devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot), &pdev->dev);
         if (!devlink)
                 return -ENOMEM;
  
@@ -1187,7 +1188,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
         if (err)
                 goto out_put_ports;
  
-       err = devlink_register(devlink, ocelot->dev);
+       err = devlink_register(devlink);
         if (err)
                 goto out_ocelot_deinit;
  
diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c

index ce3eca5..d74a80f 100644 (file)
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -193,8 +193,6 @@ static int jazz_sonic_probe(struct platform_device *pdev)
         SET_NETDEV_DEV(dev, &pdev->dev);
         platform_set_drvdata(pdev, dev);
  
-       netdev_boot_setup_check(dev);
-
         dev->base_addr = res->start;
         dev->irq = platform_get_irq(pdev, 0);
         err = sonic_probe1(dev);
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c

index bd9d026..3f98203 100644 (file)
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -819,7 +819,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
                 printk(version);
  #endif
  
-       i = pci_enable_device(pdev);
+       i = pcim_enable_device(pdev);
         if (i) return i;
  
         /* natsemi has a non-standard PM control register
@@ -852,7 +852,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
         ioaddr = ioremap(iostart, iosize);
         if (!ioaddr) {
                 i = -ENOMEM;
-               goto err_ioremap;
+               goto err_pci_request_regions;
         }
  
         /* Work around the dropped serial bit. */
@@ -974,9 +974,6 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
   err_register_netdev:
         iounmap(ioaddr);
  
- err_ioremap:
-       pci_release_regions(pdev);
-
   err_pci_request_regions:
         free_netdev(dev);
         return i;
@@ -3241,7 +3238,6 @@ static void natsemi_remove1(struct pci_dev *pdev)
  
         NATSEMI_REMOVE_FILE(pdev, dspcfg_workaround);
         unregister_netdev (dev);
-       pci_release_regions (pdev);
         iounmap(ioaddr);
         free_netdev (dev);
  }
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c

index 28d9e98..ca46860 100644 (file)
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -215,7 +215,6 @@ int xtsonic_probe(struct platform_device *pdev)
         lp->device = &pdev->dev;
         platform_set_drvdata(pdev, dev);
         SET_NETDEV_DEV(dev, &pdev->dev);
-       netdev_boot_setup_check(dev);
  
         dev->base_addr = resmem->start;
         dev->irq = resirq->start;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c

index 20fb4ad..df4a3f3 100644 (file)
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3512,13 +3512,13 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev)
  
         kfree(vdev->vpaths);
  
-       /* we are safe to free it now */
-       free_netdev(dev);
-
         vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered",
                         buf);
         vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d  Exiting...", buf,
                              __func__, __LINE__);
+
+       /* we are safe to free it now */
+       free_netdev(dev);
  }
  
  /*
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c

index 742a420..bb3b8a7 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -692,7 +692,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
                 goto err_pci_disable;
         }
  
-       devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf));
+       devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev);
         if (!devlink) {
                 err = -ENOMEM;
                 goto err_rel_regions;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c

index 15078f9..5bfa22a 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3281,17 +3281,12 @@ static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
         for (r = 0; r < nn->max_r_vecs; r++)
                 nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
  
-       err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
+       err = netif_set_real_num_queues(nn->dp.netdev,
+                                       nn->dp.num_stack_tx_rings,
+                                       nn->dp.num_rx_rings);
         if (err)
                 return err;
  
-       if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
-               err = netif_set_real_num_tx_queues(nn->dp.netdev,
-                                                  nn->dp.num_stack_tx_rings);
-               if (err)
-                       return err;
-       }
-
         return nfp_net_set_config_and_enable(nn);
  }
  
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c

index a213784..0bf2ff5 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -286,6 +286,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
  
         /* Init to unknowns */
         ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+       ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
         cmd->base.port = PORT_OTHER;
         cmd->base.speed = SPEED_UNKNOWN;
         cmd->base.duplex = DUPLEX_UNKNOWN;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c

index 921db40..d10a938 100644 (file)
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -701,7 +701,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf)
         if (err)
                 goto err_unmap;
  
-       err = devlink_register(devlink, &pf->pdev->dev);
+       err = devlink_register(devlink);
         if (err)
                 goto err_app_clean;
  
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c

index cd520e4..c7d0e19 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
@@ -64,7 +64,7 @@ struct ionic *ionic_devlink_alloc(struct device *dev)
  {
         struct devlink *dl;
  
-       dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic));
+       dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev);
  
         return devlink_priv(dl);
  }
@@ -82,7 +82,7 @@ int ionic_devlink_register(struct ionic *ionic)
         struct devlink_port_attrs attrs = {};
         int err;
  
-       err = devlink_register(dl, ionic->dev);
+       err = devlink_register(dl);
         if (err) {
                 dev_warn(ionic->dev, "devlink_register failed: %d\n", err);
                 return err;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c

index f21f80c..f52c47a 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -30,7 +30,7 @@ static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = {
                                       */
  };
  
-static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode);
+static void ionic_lif_rx_mode(struct ionic_lif *lif);
  static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
  static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
  static void ionic_link_status_check(struct ionic_lif *lif);
@@ -54,7 +54,19 @@ static void ionic_dim_work(struct work_struct *work)
         cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
         qcq = container_of(dim, struct ionic_qcq, dim);
         new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec);
-       qcq->intr.dim_coal_hw = new_coal ? new_coal : 1;
+       new_coal = new_coal ? new_coal : 1;
+
+       if (qcq->intr.dim_coal_hw != new_coal) {
+               unsigned int qi = qcq->cq.bound_q->index;
+               struct ionic_lif *lif = qcq->q.lif;
+
+               qcq->intr.dim_coal_hw = new_coal;
+
+               ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
+                                    lif->rxqcqs[qi]->intr.index,
+                                    qcq->intr.dim_coal_hw);
+       }
+
         dim->state = DIM_START_MEASURE;
  }
  
@@ -78,7 +90,7 @@ static void ionic_lif_deferred_work(struct work_struct *work)
  
                 switch (w->type) {
                 case IONIC_DW_TYPE_RX_MODE:
-                       ionic_lif_rx_mode(lif, w->rx_mode);
+                       ionic_lif_rx_mode(lif);
                         break;
                 case IONIC_DW_TYPE_RX_ADDR_ADD:
                         ionic_lif_addr_add(lif, w->addr);
@@ -1302,10 +1314,8 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr)
         return 0;
  }
  
-static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add,
-                         bool can_sleep)
+static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add)
  {
-       struct ionic_deferred_work *work;
         unsigned int nmfilters;
         unsigned int nufilters;
  
@@ -1331,97 +1341,46 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add,
                         lif->nucast--;
         }
  
-       if (!can_sleep) {
-               work = kzalloc(sizeof(*work), GFP_ATOMIC);
-               if (!work)
-                       return -ENOMEM;
-               work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD :
-                                  IONIC_DW_TYPE_RX_ADDR_DEL;
-               memcpy(work->addr, addr, ETH_ALEN);
-               netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n",
-                          add ? "add" : "del", addr);
-               ionic_lif_deferred_enqueue(&lif->deferred, work);
-       } else {
-               netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
-                          add ? "add" : "del", addr);
-               if (add)
-                       return ionic_lif_addr_add(lif, addr);
-               else
-                       return ionic_lif_addr_del(lif, addr);
-       }
+       netdev_dbg(lif->netdev, "rx_filter %s %pM\n",
+                  add ? "add" : "del", addr);
+       if (add)
+               return ionic_lif_addr_add(lif, addr);
+       else
+               return ionic_lif_addr_del(lif, addr);
  
         return 0;
  }
  
  static int ionic_addr_add(struct net_device *netdev, const u8 *addr)
  {
-       return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_SLEEP);
-}
-
-static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr)
-{
-       return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_NOT_SLEEP);
+       return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR);
  }
  
  static int ionic_addr_del(struct net_device *netdev, const u8 *addr)
  {
-       return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_SLEEP);
+       return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR);
  }
  
-static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr)
+static void ionic_lif_rx_mode(struct ionic_lif *lif)
  {
-       return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_NOT_SLEEP);
-}
-
-static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode)
-{
-       struct ionic_admin_ctx ctx = {
-               .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
-               .cmd.rx_mode_set = {
-                       .opcode = IONIC_CMD_RX_MODE_SET,
-                       .lif_index = cpu_to_le16(lif->index),
-                       .rx_mode = cpu_to_le16(rx_mode),
-               },
-       };
+       struct net_device *netdev = lif->netdev;
+       unsigned int nfilters;
+       unsigned int nd_flags;
         char buf[128];
-       int err;
+       u16 rx_mode;
         int i;
  #define REMAIN(__x) (sizeof(buf) - (__x))
  
-       i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
-                     lif->rx_mode, rx_mode);
-       if (rx_mode & IONIC_RX_MODE_F_UNICAST)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
-       if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
-       if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
-       if (rx_mode & IONIC_RX_MODE_F_PROMISC)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
-       if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
-               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
-       netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf);
-
-       err = ionic_adminq_post_wait(lif, &ctx);
-       if (err)
-               netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n",
-                           rx_mode, err);
-       else
-               lif->rx_mode = rx_mode;
-}
+       mutex_lock(&lif->config_lock);
  
-static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
-{
-       struct ionic_lif *lif = netdev_priv(netdev);
-       struct ionic_deferred_work *work;
-       unsigned int nfilters;
-       unsigned int rx_mode;
+       /* grab the flags once for local use */
+       nd_flags = netdev->flags;
  
         rx_mode = IONIC_RX_MODE_F_UNICAST;
-       rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
-       rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
-       rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
-       rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
+       rx_mode |= (nd_flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0;
+       rx_mode |= (nd_flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0;
+       rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0;
+       rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0;
  
         /* sync unicast addresses
          * next check to see if we're in an overflow state
@@ -1430,49 +1389,83 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
          *       we remove our overflow flag and check the netdev flags
          *       to see if we can disable NIC PROMISC
          */
-       if (can_sleep)
-               __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
-       else
-               __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del);
+       __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del);
         nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters);
         if (netdev_uc_count(netdev) + 1 > nfilters) {
                 rx_mode |= IONIC_RX_MODE_F_PROMISC;
                 lif->uc_overflow = true;
         } else if (lif->uc_overflow) {
                 lif->uc_overflow = false;
-               if (!(netdev->flags & IFF_PROMISC))
+               if (!(nd_flags & IFF_PROMISC))
                         rx_mode &= ~IONIC_RX_MODE_F_PROMISC;
         }
  
         /* same for multicast */
-       if (can_sleep)
-               __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
-       else
-               __dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del);
+       __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del);
         nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters);
         if (netdev_mc_count(netdev) > nfilters) {
                 rx_mode |= IONIC_RX_MODE_F_ALLMULTI;
                 lif->mc_overflow = true;
         } else if (lif->mc_overflow) {
                 lif->mc_overflow = false;
-               if (!(netdev->flags & IFF_ALLMULTI))
+               if (!(nd_flags & IFF_ALLMULTI))
                         rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI;
         }
  
+       i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:",
+                     lif->rx_mode, rx_mode);
+       if (rx_mode & IONIC_RX_MODE_F_UNICAST)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST");
+       if (rx_mode & IONIC_RX_MODE_F_MULTICAST)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST");
+       if (rx_mode & IONIC_RX_MODE_F_BROADCAST)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST");
+       if (rx_mode & IONIC_RX_MODE_F_PROMISC)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC");
+       if (rx_mode & IONIC_RX_MODE_F_ALLMULTI)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI");
+       if (rx_mode & IONIC_RX_MODE_F_RDMA_SNIFFER)
+               i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_RDMA_SNIFFER");
+       netdev_dbg(netdev, "lif%d %s\n", lif->index, buf);
+
         if (lif->rx_mode != rx_mode) {
-               if (!can_sleep) {
-                       work = kzalloc(sizeof(*work), GFP_ATOMIC);
-                       if (!work) {
-                               netdev_err(lif->netdev, "rxmode change dropped\n");
-                               return;
-                       }
-                       work->type = IONIC_DW_TYPE_RX_MODE;
-                       work->rx_mode = rx_mode;
-                       netdev_dbg(lif->netdev, "deferred: rx_mode\n");
-                       ionic_lif_deferred_enqueue(&lif->deferred, work);
-               } else {
-                       ionic_lif_rx_mode(lif, rx_mode);
+               struct ionic_admin_ctx ctx = {
+                       .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+                       .cmd.rx_mode_set = {
+                               .opcode = IONIC_CMD_RX_MODE_SET,
+                               .lif_index = cpu_to_le16(lif->index),
+                       },
+               };
+               int err;
+
+               ctx.cmd.rx_mode_set.rx_mode = cpu_to_le16(rx_mode);
+               err = ionic_adminq_post_wait(lif, &ctx);
+               if (err)
+                       netdev_warn(netdev, "set rx_mode 0x%04x failed: %d\n",
+                                   rx_mode, err);
+               else
+                       lif->rx_mode = rx_mode;
+       }
+
+       mutex_unlock(&lif->config_lock);
+}
+
+static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep)
+{
+       struct ionic_lif *lif = netdev_priv(netdev);
+       struct ionic_deferred_work *work;
+
+       if (!can_sleep) {
+               work = kzalloc(sizeof(*work), GFP_ATOMIC);
+               if (!work) {
+                       netdev_err(lif->netdev, "rxmode change dropped\n");
+                       return;
                 }
+               work->type = IONIC_DW_TYPE_RX_MODE;
+               netdev_dbg(lif->netdev, "deferred: rx_mode\n");
+               ionic_lif_deferred_enqueue(&lif->deferred, work);
+       } else {
+               ionic_lif_rx_mode(lif);
         }
  }
  
@@ -3074,6 +3067,7 @@ void ionic_lif_deinit(struct ionic_lif *lif)
         ionic_lif_qcq_deinit(lif, lif->notifyqcq);
         ionic_lif_qcq_deinit(lif, lif->adminqcq);
  
+       mutex_destroy(&lif->config_lock);
         mutex_destroy(&lif->queue_lock);
         ionic_lif_reset(lif);
  }
@@ -3201,7 +3195,7 @@ static int ionic_station_set(struct ionic_lif *lif)
                  */
                 if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
                                       netdev->dev_addr))
-                       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP);
+                       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
         } else {
                 /* Update the netdev mac with the device's mac */
                 memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
@@ -3218,7 +3212,7 @@ static int ionic_station_set(struct ionic_lif *lif)
  
         netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
                    netdev->dev_addr);
-       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP);
+       ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR);
  
         return 0;
  }
@@ -3241,6 +3235,7 @@ int ionic_lif_init(struct ionic_lif *lif)
  
         lif->hw_index = le16_to_cpu(comp.hw_index);
         mutex_init(&lif->queue_lock);
+       mutex_init(&lif->config_lock);
  
         /* now that we have the hw_index we can figure out our doorbell page */
         lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h

index 346506f..69ab59f 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -108,7 +108,6 @@ struct ionic_deferred_work {
         struct list_head list;
         enum ionic_deferred_work_type type;
         union {
-               unsigned int rx_mode;
                 u8 addr[ETH_ALEN];
                 u8 fw_status;
         };
@@ -179,6 +178,7 @@ struct ionic_lif {
         unsigned int index;
         unsigned int hw_index;
         struct mutex queue_lock;        /* lock for queue structures */
+       struct mutex config_lock;       /* lock for config actions */
         spinlock_t adminq_lock;         /* lock for AdminQ operations */
         struct ionic_qcq *adminqcq;
         struct ionic_qcq *notifyqcq;
@@ -199,7 +199,7 @@ struct ionic_lif {
         unsigned int nrxq_descs;
         u32 rx_copybreak;
         u64 rxq_features;
-       unsigned int rx_mode;
+       u16 rx_mode;
         u64 hw_features;
         bool registered;
         bool mc_overflow;
@@ -302,7 +302,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
  int ionic_lif_size(struct ionic *ionic);
  
  #if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
-int ionic_lif_hwstamp_replay(struct ionic_lif *lif);
+void ionic_lif_hwstamp_replay(struct ionic_lif *lif);
  int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr);
  int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr);
  ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter);
@@ -311,10 +311,7 @@ void ionic_lif_unregister_phc(struct ionic_lif *lif);
  void ionic_lif_alloc_phc(struct ionic_lif *lif);
  void ionic_lif_free_phc(struct ionic_lif *lif);
  #else
-static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
-{
-       return -EOPNOTSUPP;
-}
+static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {}
  
  static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
  {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c

index 736ebc5..afc45da 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c
@@ -188,6 +188,9 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
         struct hwtstamp_config config;
         int err;
  
+       if (!lif->phc || !lif->phc->ptp)
+               return -EOPNOTSUPP;
+
         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
                 return -EFAULT;
  
@@ -203,15 +206,16 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr)
         return 0;
  }
  
-int ionic_lif_hwstamp_replay(struct ionic_lif *lif)
+void ionic_lif_hwstamp_replay(struct ionic_lif *lif)
  {
         int err;
  
+       if (!lif->phc || !lif->phc->ptp)
+               return;
+
         err = ionic_lif_hwstamp_set_ts_config(lif, NULL);
         if (err)
                 netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err);
-
-       return err;
  }
  
  int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c

index 2ba1924..37c3958 100644 (file)
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -269,12 +269,11 @@ static void ionic_rx_clean(struct ionic_queue *q,
                 }
         }
  
-       if (likely(netdev->features & NETIF_F_RXCSUM)) {
-               if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) {
-                       skb->ip_summed = CHECKSUM_COMPLETE;
-                       skb->csum = (__force __wsum)le16_to_cpu(comp->csum);
-                       stats->csum_complete++;
-               }
+       if (likely(netdev->features & NETIF_F_RXCSUM) &&
+           (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC)) {
+               skb->ip_summed = CHECKSUM_COMPLETE;
+               skb->csum = (__force __wsum)le16_to_cpu(comp->csum);
+               stats->csum_complete++;
         } else {
                 stats->csum_none++;
         }
@@ -446,11 +445,12 @@ void ionic_rx_empty(struct ionic_queue *q)
         q->tail_idx = 0;
  }
  
-static void ionic_dim_update(struct ionic_qcq *qcq)
+static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode)
  {
         struct dim_sample dim_sample;
         struct ionic_lif *lif;
         unsigned int qi;
+       u64 pkts, bytes;
  
         if (!qcq->intr.dim_coal_hw)
                 return;
@@ -458,14 +458,23 @@ static void ionic_dim_update(struct ionic_qcq *qcq)
         lif = qcq->q.lif;
         qi = qcq->cq.bound_q->index;
  
-       ionic_intr_coal_init(lif->ionic->idev.intr_ctrl,
-                            lif->rxqcqs[qi]->intr.index,
-                            qcq->intr.dim_coal_hw);
+       switch (napi_mode) {
+       case IONIC_LIF_F_TX_DIM_INTR:
+               pkts = lif->txqstats[qi].pkts;
+               bytes = lif->txqstats[qi].bytes;
+               break;
+       case IONIC_LIF_F_RX_DIM_INTR:
+               pkts = lif->rxqstats[qi].pkts;
+               bytes = lif->rxqstats[qi].bytes;
+               break;
+       default:
+               pkts = lif->txqstats[qi].pkts + lif->rxqstats[qi].pkts;
+               bytes = lif->txqstats[qi].bytes + lif->rxqstats[qi].bytes;
+               break;
+       }
  
         dim_update_sample(qcq->cq.bound_intr->rearm_count,
-                         lif->txqstats[qi].pkts,
-                         lif->txqstats[qi].bytes,
-                         &dim_sample);
+                         pkts, bytes, &dim_sample);
  
         net_dim(&qcq->dim, dim_sample);
  }
@@ -486,7 +495,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget)
                                      ionic_tx_service, NULL, NULL);
  
         if (work_done < budget && napi_complete_done(napi, work_done)) {
-               ionic_dim_update(qcq);
+               ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR);
                 flags |= IONIC_INTR_CRED_UNMASK;
                 cq->bound_intr->rearm_count++;
         }
@@ -525,7 +534,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
                 ionic_rx_fill(cq->bound_q);
  
         if (work_done < budget && napi_complete_done(napi, work_done)) {
-               ionic_dim_update(qcq);
+               ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR);
                 flags |= IONIC_INTR_CRED_UNMASK;
                 cq->bound_intr->rearm_count++;
         }
@@ -571,7 +580,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
                 ionic_rx_fill(rxcq->bound_q);
  
         if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) {
-               ionic_dim_update(qcq);
+               ionic_dim_update(qcq, 0);
                 flags |= IONIC_INTR_CRED_UNMASK;
                 rxcq->bound_intr->rearm_count++;
         }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c

index cf7f4da..4c7501b 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
@@ -207,14 +207,15 @@ struct devlink *qed_devlink_register(struct qed_dev *cdev)
         struct devlink *dl;
         int rc;
  
-       dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink));
+       dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink),
+                          &cdev->pdev->dev);
         if (!dl)
                 return ERR_PTR(-ENOMEM);
  
         qdevlink = devlink_priv(dl);
         qdevlink->cdev = cdev;
  
-       rc = devlink_register(dl, &cdev->pdev->dev);
+       rc = devlink_register(dl);
         if (rc)
                 goto err_free;
  
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c

index 578935f..ab6d4f7 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -464,12 +464,19 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
         u32 int_sts, first_drop_reason, details, address, all_drops_reason;
         struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt;
  
+       int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
+       if (int_sts == 0xdeadbeaf) {
+               DP_NOTICE(p_hwfn->cdev,
+                         "DORQ is being reset, skipping int_sts handler\n");
+
+               return 0;
+       }
+
         /* int_sts may be zero since all PFs were interrupted for doorbell
          * overflow but another one already handled it. Can abort here. If
          * This PF also requires overflow recovery we will be interrupted again.
          * The masked almost full indication may also be set. Ignoring.
          */
-       int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS);
         if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL))
                 return 0;
  
@@ -528,6 +535,9 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn)
  
  static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn)
  {
+       if (p_hwfn->cdev->recov_in_prog)
+               return 0;
+
         p_hwfn->db_recovery_info.dorq_attn = true;
         qed_dorq_attn_overflow(p_hwfn);
  
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c

index a998611..fc8b3e6 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -1624,8 +1624,6 @@ qed_iwarp_get_listener(struct qed_hwfn *p_hwfn,
         static const u32 ip_zero[4] = { 0, 0, 0, 0 };
         bool found = false;
  
-       qed_iwarp_print_cm_info(p_hwfn, cm_info);
-
         list_for_each_entry(listener,
                             &p_hwfn->p_rdma_info->iwarp.listen_list,
                             list_entry) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c

index aa48b1b..6871d89 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1215,6 +1215,10 @@ static void qed_slowpath_task(struct work_struct *work)
  
         if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC,
                                &hwfn->slowpath_task_flags)) {
+               /* skip qed_db_rec_handler during recovery/unload */
+               if (hwfn->cdev->recov_in_prog || !hwfn->slowpath_wq_active)
+                       goto out;
+
                 qed_db_rec_handler(hwfn, ptt);
                 if (hwfn->periodic_db_rec_count--)
                         qed_slowpath_delayed_work(hwfn,
@@ -1222,6 +1226,7 @@ static void qed_slowpath_task(struct work_struct *work)
                                                   QED_PERIODIC_DB_REC_INTERVAL);
         }
  
+out:
         qed_ptt_release(hwfn, ptt);
  }
  
diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c

index c1dd71d..3b84d00 100644 (file)
--- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c
@@ -4,7 +4,6 @@
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/pci.h>
-#include <linux/kernel.h>
  #include <linux/list.h>
  #include <linux/mm.h>
  #include <linux/types.h>
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h

index 8693117..66c69f0 100644 (file)
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -492,6 +492,7 @@ struct qede_fastpath {
  #define QEDE_SP_HW_ERR                  4
  #define QEDE_SP_ARFS_CONFIG             5
  #define QEDE_SP_AER                    7
+#define QEDE_SP_DISABLE                        8
  
  #ifdef CONFIG_RFS_ACCEL
  int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c

index c59b72c..a2e4dfb 100644 (file)
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -831,7 +831,7 @@ int qede_configure_vlan_filters(struct qede_dev *edev)
  int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
  {
         struct qede_dev *edev = netdev_priv(dev);
-       struct qede_vlan *vlan = NULL;
+       struct qede_vlan *vlan;
         int rc = 0;
  
         DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid);
@@ -842,7 +842,7 @@ int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
                 if (vlan->vid == vid)
                         break;
  
-       if (!vlan || (vlan->vid != vid)) {
+       if (list_entry_is_head(vlan, &edev->vlan_list, list)) {
                 DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
                            "Vlan isn't configured\n");
                 goto out;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c

index 033bf2c..d400e9b 100644 (file)
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1005,6 +1005,13 @@ static void qede_sp_task(struct work_struct *work)
         struct qede_dev *edev = container_of(work, struct qede_dev,
                                              sp_task.work);
  
+       /* Disable execution of this deferred work once
+        * qede removal is in progress, this stop any future
+        * scheduling of sp_task.
+        */
+       if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags))
+               return;
+
         /* The locking scheme depends on the specific flag:
          * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
          * ensure that ongoing flows are ended and new ones are not started.
@@ -1292,6 +1299,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
         qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY));
  
         if (mode != QEDE_REMOVE_RECOVERY) {
+               set_bit(QEDE_SP_DISABLE, &edev->sp_flags);
                 unregister_netdev(ndev);
  
                 cancel_delayed_work_sync(&edev->sp_task);
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c

index 2376b27..c00ad57 100644 (file)
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -154,7 +154,7 @@ static int ql_wait_for_drvr_lock(struct ql3_adapter *qdev)
                                       "driver lock acquired\n");
                         return 1;
                 }
-               ssleep(1);
+               mdelay(1000);
         } while (++i < 10);
  
         netdev_err(qdev->ndev, "Timed out waiting for driver lock...\n");
@@ -3274,7 +3274,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev)
                 if ((value & ISP_CONTROL_SR) == 0)
                         break;
  
-               ssleep(1);
+               mdelay(1000);
         } while ((--max_wait_time));
  
         /*
@@ -3310,7 +3310,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev)
                                                    ispControlStatus);
                         if ((value & ISP_CONTROL_FSR) == 0)
                                 break;
-                       ssleep(1);
+                       mdelay(1000);
                 } while ((--max_wait_time));
         }
         if (max_wait_time == 0)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c

index d8f0863..f6b6651 100644 (file)
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -1021,7 +1021,7 @@ clear_diag_irq:
  
  static void qlcnic_create_loopback_buff(unsigned char *data, u8 mac[])
  {
-       unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
+       static const unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00};
  
         memset(data, 0x4e, QLCNIC_ILB_PKT_SIZE);
  
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c

index ec6f7f9..60a0c0e 100644 (file)
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -443,7 +443,7 @@ static int sis900_probe(struct pci_dev *pci_dev,
  #endif
  
         /* setup various bits in PCI command register */
-       ret = pci_enable_device(pci_dev);
+       ret = pcim_enable_device(pci_dev);
         if(ret) return ret;
  
         i = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32));
@@ -469,7 +469,7 @@ static int sis900_probe(struct pci_dev *pci_dev,
         ioaddr = pci_iomap(pci_dev, 0, 0);
         if (!ioaddr) {
                 ret = -ENOMEM;
-               goto err_out_cleardev;
+               goto err_out;
         }
  
         sis_priv = netdev_priv(net_dev);
@@ -581,8 +581,6 @@ err_unmap_tx:
                           sis_priv->tx_ring_dma);
  err_out_unmap:
         pci_iounmap(pci_dev, ioaddr);
-err_out_cleardev:
-       pci_release_regions(pci_dev);
   err_out:
         free_netdev(net_dev);
         return ret;
@@ -2499,7 +2497,6 @@ static void sis900_remove(struct pci_dev *pci_dev)
                           sis_priv->tx_ring_dma);
         pci_iounmap(pci_dev, sis_priv->ioaddr);
         free_netdev(net_dev);
-       pci_release_regions(pci_dev);
  }
  
  static int __maybe_unused sis900_suspend(struct device *dev)
diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig

index c52a38d..72e42a8 100644 (file)
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig
@@ -23,6 +23,7 @@ config SMC9194
         tristate "SMC 9194 support"
         depends on ISA
         select CRC32
+       select NETDEV_LEGACY_INIT
         help
           This is support for the SMC9xxx based Ethernet cards. Choose this
           option if you have a DELL laptop with the docking station, or
diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c

index bf7c8c8..0ce403f 100644 (file)
--- a/drivers/net/ethernet/smsc/smc9194.c
+++ b/drivers/net/ethernet/smsc/smc9194.c
@@ -1508,7 +1508,7 @@ MODULE_PARM_DESC(io, "SMC 99194 I/O base address");
  MODULE_PARM_DESC(irq, "SMC 99194 IRQ number");
  MODULE_PARM_DESC(ifport, "SMC 99194 interface port (0-default, 1-TP, 2-AUI)");
  
-int __init init_module(void)
+static int __init smc_init_module(void)
  {
         if (io == 0)
                 printk(KERN_WARNING
@@ -1518,13 +1518,15 @@ int __init init_module(void)
         devSMC9194 = smc_init(-1);
         return PTR_ERR_OR_ZERO(devSMC9194);
  }
+module_init(smc_init_module);
  
-void __exit cleanup_module(void)
+static void __exit smc_cleanup_module(void)
  {
         unregister_netdev(devSMC9194);
         free_irq(devSMC9194->irq, devSMC9194);
         release_region(devSMC9194->base_addr, SMC_IO_EXTENT);
         free_netdev(devSMC9194);
  }
+module_exit(smc_cleanup_module);
  
  #endif /* MODULE */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c

index 28dd0ed..f7dc845 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -289,10 +289,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
                 val &= ~NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL;
                 break;
         default:
-               dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
-                       phy_modes(gmac->phy_mode));
-               err = -EINVAL;
-               goto err_remove_config_dt;
+               goto err_unsupported_phy;
         }
         regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val);
  
@@ -309,10 +306,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
                         NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id);
                 break;
         default:
-               dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
-                       phy_modes(gmac->phy_mode));
-               err = -EINVAL;
-               goto err_remove_config_dt;
+               goto err_unsupported_phy;
         }
         regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val);
  
@@ -329,8 +323,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
                                 NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id);
                 break;
         default:
-               /* We don't get here; the switch above will have errored out */
-               unreachable();
+               goto err_unsupported_phy;
         }
         regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val);
  
@@ -361,6 +354,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
  
         return 0;
  
+err_unsupported_phy:
+       dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
+               phy_modes(gmac->phy_mode));
+       err = -EINVAL;
+
  err_remove_config_dt:
         stmmac_remove_config_dt(pdev, plat_dat);
  
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c

index 67ba083..b217453 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -1249,6 +1249,7 @@ const struct stmmac_ops dwmac410_ops = {
         .config_l3_filter = dwmac4_config_l3_filter,
         .config_l4_filter = dwmac4_config_l4_filter,
         .est_configure = dwmac5_est_configure,
+       .est_irq_status = dwmac5_est_irq_status,
         .fpe_configure = dwmac5_fpe_configure,
         .fpe_send_mpacket = dwmac5_fpe_send_mpacket,
         .fpe_irq_status = dwmac5_fpe_irq_status,
@@ -1300,6 +1301,7 @@ const struct stmmac_ops dwmac510_ops = {
         .config_l3_filter = dwmac4_config_l3_filter,
         .config_l4_filter = dwmac4_config_l4_filter,
         .est_configure = dwmac5_est_configure,
+       .est_irq_status = dwmac5_est_irq_status,
         .fpe_configure = dwmac5_fpe_configure,
         .fpe_send_mpacket = dwmac5_fpe_send_mpacket,
         .fpe_irq_status = dwmac5_fpe_irq_status,
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c

index 006fd42..1501e89 100644 (file)
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -8191,8 +8191,9 @@ static int niu_pci_vpd_fetch(struct niu *np, u32 start)
                 err = niu_pci_vpd_scan_props(np, here, end);
                 if (err < 0)
                         return err;
+               /* ret == 1 is not an error */
                 if (err == 1)
-                       return -EINVAL;
+                       return 0;
         }
         return 0;
  }
@@ -9207,7 +9208,7 @@ static int niu_get_of_props(struct niu *np)
         else
                 dp = pci_device_to_OF_node(np->pdev);
  
-       phy_type = of_get_property(dp, "phy-type", &prop_len);
+       phy_type = of_get_property(dp, "phy-type", NULL);
         if (!phy_type) {
                 netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp);
                 return -EINVAL;
@@ -9241,12 +9242,12 @@ static int niu_get_of_props(struct niu *np)
                 return -EINVAL;
         }
  
-       model = of_get_property(dp, "model", &prop_len);
+       model = of_get_property(dp, "model", NULL);
  
         if (model)
                 strcpy(np->vpd.model, model);
  
-       if (of_find_property(dp, "hot-swappable-phy", &prop_len)) {
+       if (of_find_property(dp, "hot-swappable-phy", NULL)) {
                 np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER |
                         NIU_FLAGS_HOTPLUG_PHY);
         }
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig

index 7ac8e5e..affcf92 100644 (file)
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -64,7 +64,6 @@ config TI_CPSW
  config TI_CPSW_SWITCHDEV
         tristate "TI CPSW Switch Support with switchdev"
         depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
-       depends on BRIDGE || BRIDGE=n
         depends on NET_SWITCHDEV
         depends on TI_CPTS || !TI_CPTS
         select PAGE_POOL
@@ -110,7 +109,6 @@ config TI_K3_AM65_CPSW_NUSS
  config TI_K3_AM65_CPSW_SWITCHDEV
         bool "TI K3 AM654x/J721E CPSW Switch mode support"
         depends on TI_K3_AM65_CPSW_NUSS
-       depends on BRIDGE || BRIDGE=n
         depends on NET_SWITCHDEV
         help
          This enables switchdev support for TI K3 CPSWxG Ethernet
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c

index 4f67d1a..130346f 100644 (file)
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -7,7 +7,6 @@
  
  #include <linux/clk.h>
  #include <linux/etherdevice.h>
-#include <linux/if_bridge.h>
  #include <linux/if_vlan.h>
  #include <linux/interrupt.h>
  #include <linux/kernel.h>
@@ -28,6 +27,7 @@
  #include <linux/sys_soc.h>
  #include <linux/dma/ti-cppi5.h>
  #include <linux/dma/k3-udma-glue.h>
+#include <net/switchdev.h>
  
  #include "cpsw_ale.h"
  #include "cpsw_sl.h"
@@ -519,6 +519,10 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common,
         }
  
         napi_enable(&common->napi_rx);
+       if (common->rx_irq_disabled) {
+               common->rx_irq_disabled = false;
+               enable_irq(common->rx_chns.irq);
+       }
  
         dev_dbg(common->dev, "cpsw_nuss started\n");
         return 0;
@@ -872,8 +876,12 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget)
  
         dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget);
  
-       if (num_rx < budget && napi_complete_done(napi_rx, num_rx))
-               enable_irq(common->rx_chns.irq);
+       if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) {
+               if (common->rx_irq_disabled) {
+                       common->rx_irq_disabled = false;
+                       enable_irq(common->rx_chns.irq);
+               }
+       }
  
         return num_rx;
  }
@@ -1078,19 +1086,20 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget)
         else
                 num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget);
  
-       num_tx = min(num_tx, budget);
-       if (num_tx < budget) {
-               napi_complete(napi_tx);
+       if (num_tx >= budget)
+               return budget;
+
+       if (napi_complete_done(napi_tx, num_tx))
                 enable_irq(tx_chn->irq);
-       }
  
-       return num_tx;
+       return 0;
  }
  
  static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id)
  {
         struct am65_cpsw_common *common = dev_id;
  
+       common->rx_irq_disabled = true;
         disable_irq_nosync(irq);
         napi_schedule(&common->napi_rx);
  
@@ -2061,8 +2070,12 @@ static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *comm
  
         for (i = 1; i <= common->port_num; i++) {
                 struct am65_cpsw_port *port = am65_common_get_port(common, i);
-               struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev);
+               struct am65_cpsw_ndev_priv *priv;
+
+               if (!port->ndev)
+                       continue;
  
+               priv = am65_ndev_to_priv(port->ndev);
                 priv->offload_fwd_mark = set_val;
         }
  }
@@ -2409,14 +2422,14 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common)
         int i;
  
         common->devlink =
-               devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv));
+               devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv), dev);
         if (!common->devlink)
                 return -ENOMEM;
  
         dl_priv = devlink_priv(common->devlink);
         dl_priv->common = common;
  
-       ret = devlink_register(common->devlink, dev);
+       ret = devlink_register(common->devlink);
         if (ret) {
                 dev_err(dev, "devlink reg fail ret:%d\n", ret);
                 goto dl_free;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h

index 5d93e34..048ed10 100644 (file)
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -126,6 +126,8 @@ struct am65_cpsw_common {
         struct am65_cpsw_rx_chn rx_chns;
         struct napi_struct      napi_rx;
  
+       bool                    rx_irq_disabled;
+
         u32                     nuss_ver;
         u32                     cpsw_ver;
         unsigned long           bus_freq;
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c

index abf9a2a..9f70e40 100644 (file)
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -431,7 +431,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
         skb->protocol = eth_type_trans(skb, ndev);
  
         /* mark skb for recycling */
-       skb_mark_for_recycle(skb, page, pool);
+       skb_mark_for_recycle(skb);
         netif_receive_skb(skb);
  
         ndev->stats.rx_bytes += len;
@@ -905,7 +905,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
         struct cpdma_chan *txch;
         int ret, q_idx;
  
-       if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) {
+       if (skb_put_padto(skb, CPSW_MIN_PACKET_SIZE)) {
                 cpsw_err(priv, tx_err, "packet pad failed\n");
                 ndev->stats.tx_dropped++;
                 return NET_XMIT_DROP;
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c

index b4f55ff..ff3a96b 100644 (file)
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -11,7 +11,6 @@
  #include <linux/module.h>
  #include <linux/irqreturn.h>
  #include <linux/interrupt.h>
-#include <linux/if_bridge.h>
  #include <linux/if_ether.h>
  #include <linux/etherdevice.h>
  #include <linux/net_tstamp.h>
@@ -29,6 +28,7 @@
  #include <linux/kmemleak.h>
  #include <linux/sys_soc.h>
  
+#include <net/switchdev.h>
  #include <net/page_pool.h>
  #include <net/pkt_cls.h>
  #include <net/devlink.h>
@@ -375,7 +375,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
         skb->protocol = eth_type_trans(skb, ndev);
  
         /* mark skb for recycling */
-       skb_mark_for_recycle(skb, page, pool);
+       skb_mark_for_recycle(skb);
         netif_receive_skb(skb);
  
         ndev->stats.rx_bytes += len;
@@ -1800,14 +1800,14 @@ static int cpsw_register_devlink(struct cpsw_common *cpsw)
         struct cpsw_devlink *dl_priv;
         int ret = 0;
  
-       cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv));
+       cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv), dev);
         if (!cpsw->devlink)
                 return -ENOMEM;
  
         dl_priv = devlink_priv(cpsw->devlink);
         dl_priv->cpsw = cpsw;
  
-       ret = devlink_register(cpsw->devlink, dev);
+       ret = devlink_register(cpsw->devlink);
         if (ret) {
                 dev_err(dev, "DL reg fail ret:%d\n", ret);
                 goto dl_free;
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c

index 6377966..b1c5cbe 100644 (file)
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -943,7 +943,7 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev)
                 goto fail_tx;
         }
  
-       ret_code = skb_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
+       ret_code = skb_put_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE);
         if (unlikely(ret_code < 0)) {
                 if (netif_msg_tx_err(priv) && net_ratelimit())
                         dev_err(emac_dev, "DaVinci EMAC: packet pad failed");
diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c

index 3de67ba..a2fcdb1 100644 (file)
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -198,77 +198,6 @@ static void gsi_irq_type_disable(struct gsi *gsi, enum gsi_irq_type_id type_id)
         gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id));
  }
  
-/* Turn off all GSI interrupts initially; there is no gsi_irq_teardown() */
-static void gsi_irq_setup(struct gsi *gsi)
-{
-       /* Disable all interrupt types */
-       gsi_irq_type_update(gsi, 0);
-
-       /* Clear all type-specific interrupt masks */
-       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
-       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
-       iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
-       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
-
-       /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
-       if (gsi->version > IPA_VERSION_3_1) {
-               u32 offset;
-
-               /* These registers are in the non-adjusted address range */
-               offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
-               iowrite32(0, gsi->virt_raw + offset);
-               offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
-               iowrite32(0, gsi->virt_raw + offset);
-       }
-
-       iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
-}
-
-/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
-static int gsi_ring_setup(struct gsi *gsi)
-{
-       struct device *dev = gsi->dev;
-       u32 count;
-       u32 val;
-
-       if (gsi->version < IPA_VERSION_3_5_1) {
-               /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
-               gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
-               gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
-
-               return 0;
-       }
-
-       val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
-
-       count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
-       if (!count) {
-               dev_err(dev, "GSI reports zero channels supported\n");
-               return -EINVAL;
-       }
-       if (count > GSI_CHANNEL_COUNT_MAX) {
-               dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
-                        GSI_CHANNEL_COUNT_MAX, count);
-               count = GSI_CHANNEL_COUNT_MAX;
-       }
-       gsi->channel_count = count;
-
-       count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
-       if (!count) {
-               dev_err(dev, "GSI reports zero event rings supported\n");
-               return -EINVAL;
-       }
-       if (count > GSI_EVT_RING_COUNT_MAX) {
-               dev_warn(dev,
-                        "limiting to %u event rings; hardware supports %u\n",
-                        GSI_EVT_RING_COUNT_MAX, count);
-               count = GSI_EVT_RING_COUNT_MAX;
-       }
-       gsi->evt_ring_count = count;
-
-       return 0;
-}
-
  /* Event ring commands are performed one at a time.  Their completion
   * is signaled by the event ring control GSI interrupt type, which is
   * only enabled when we issue an event ring command.  Only the event
@@ -920,12 +849,13 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell)
         /* All done! */
  }
  
-static int __gsi_channel_start(struct gsi_channel *channel, bool start)
+static int __gsi_channel_start(struct gsi_channel *channel, bool resume)
  {
         struct gsi *gsi = channel->gsi;
         int ret;
  
-       if (!start)
+       /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+       if (resume && gsi->version < IPA_VERSION_4_0)
                 return 0;
  
         mutex_lock(&gsi->mutex);
@@ -947,7 +877,7 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id)
         napi_enable(&channel->napi);
         gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id);
  
-       ret = __gsi_channel_start(channel, true);
+       ret = __gsi_channel_start(channel, false);
         if (ret) {
                 gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id);
                 napi_disable(&channel->napi);
@@ -971,7 +901,7 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel)
         return ret;
  }
  
-static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
+static int __gsi_channel_stop(struct gsi_channel *channel, bool suspend)
  {
         struct gsi *gsi = channel->gsi;
         int ret;
@@ -979,7 +909,8 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop)
         /* Wait for any underway transactions to complete before stopping. */
         gsi_channel_trans_quiesce(channel);
  
-       if (!stop)
+       /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */
+       if (suspend && gsi->version < IPA_VERSION_4_0)
                 return 0;
  
         mutex_lock(&gsi->mutex);
@@ -997,7 +928,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
         struct gsi_channel *channel = &gsi->channel[channel_id];
         int ret;
  
-       ret = __gsi_channel_stop(channel, true);
+       ret = __gsi_channel_stop(channel, false);
         if (ret)
                 return ret;
  
@@ -1026,13 +957,13 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell)
         mutex_unlock(&gsi->mutex);
  }
  
-/* Stop a STARTED channel for suspend (using stop if requested) */
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
+/* Stop a started channel for suspend */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id)
  {
         struct gsi_channel *channel = &gsi->channel[channel_id];
         int ret;
  
-       ret = __gsi_channel_stop(channel, stop);
+       ret = __gsi_channel_stop(channel, true);
         if (ret)
                 return ret;
  
@@ -1042,12 +973,24 @@ int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop)
         return 0;
  }
  
-/* Resume a suspended channel (starting will be requested if STOPPED) */
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start)
+/* Resume a suspended channel (starting if stopped) */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id)
  {
         struct gsi_channel *channel = &gsi->channel[channel_id];
  
-       return __gsi_channel_start(channel, start);
+       return __gsi_channel_start(channel, true);
+}
+
+/* Prevent all GSI interrupts while suspended */
+void gsi_suspend(struct gsi *gsi)
+{
+       disable_irq(gsi->irq);
+}
+
+/* Allow all GSI interrupts again when resuming */
+void gsi_resume(struct gsi *gsi)
+{
+       enable_irq(gsi->irq);
  }
  
  /**
@@ -1372,33 +1315,20 @@ static irqreturn_t gsi_isr(int irq, void *dev_id)
         return IRQ_HANDLED;
  }
  
+/* Init function for GSI IRQ lookup; there is no gsi_irq_exit() */
  static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev)
  {
-       struct device *dev = &pdev->dev;
-       unsigned int irq;
         int ret;
  
         ret = platform_get_irq_byname(pdev, "gsi");
         if (ret <= 0)
                 return ret ? : -EINVAL;
  
-       irq = ret;
-
-       ret = request_irq(irq, gsi_isr, 0, "gsi", gsi);
-       if (ret) {
-               dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret);
-               return ret;
-       }
-       gsi->irq = irq;
+       gsi->irq = ret;
  
         return 0;
  }
  
-static void gsi_irq_exit(struct gsi *gsi)
-{
-       free_irq(gsi->irq, gsi);
-}
-
  /* Return the transaction associated with a transfer completion event */
  static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel,
                                          struct gsi_event *event)
@@ -1876,6 +1806,93 @@ static void gsi_channel_teardown(struct gsi *gsi)
         gsi_irq_disable(gsi);
  }
  
+/* Turn off all GSI interrupts initially */
+static int gsi_irq_setup(struct gsi *gsi)
+{
+       int ret;
+
+       /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
+       iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+
+       /* Disable all interrupt types */
+       gsi_irq_type_update(gsi, 0);
+
+       /* Clear all type-specific interrupt masks */
+       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET);
+       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET);
+       iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET);
+       iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
+
+       /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */
+       if (gsi->version > IPA_VERSION_3_1) {
+               u32 offset;
+
+               /* These registers are in the non-adjusted address range */
+               offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET;
+               iowrite32(0, gsi->virt_raw + offset);
+               offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET;
+               iowrite32(0, gsi->virt_raw + offset);
+       }
+
+       iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
+
+       ret = request_irq(gsi->irq, gsi_isr, 0, "gsi", gsi);
+       if (ret)
+               dev_err(gsi->dev, "error %d requesting \"gsi\" IRQ\n", ret);
+
+       return ret;
+}
+
+static void gsi_irq_teardown(struct gsi *gsi)
+{
+       free_irq(gsi->irq, gsi);
+}
+
+/* Get # supported channel and event rings; there is no gsi_ring_teardown() */
+static int gsi_ring_setup(struct gsi *gsi)
+{
+       struct device *dev = gsi->dev;
+       u32 count;
+       u32 val;
+
+       if (gsi->version < IPA_VERSION_3_5_1) {
+               /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */
+               gsi->channel_count = GSI_CHANNEL_COUNT_MAX;
+               gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX;
+
+               return 0;
+       }
+
+       val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET);
+
+       count = u32_get_bits(val, NUM_CH_PER_EE_FMASK);
+       if (!count) {
+               dev_err(dev, "GSI reports zero channels supported\n");
+               return -EINVAL;
+       }
+       if (count > GSI_CHANNEL_COUNT_MAX) {
+               dev_warn(dev, "limiting to %u channels; hardware supports %u\n",
+                        GSI_CHANNEL_COUNT_MAX, count);
+               count = GSI_CHANNEL_COUNT_MAX;
+       }
+       gsi->channel_count = count;
+
+       count = u32_get_bits(val, NUM_EV_PER_EE_FMASK);
+       if (!count) {
+               dev_err(dev, "GSI reports zero event rings supported\n");
+               return -EINVAL;
+       }
+       if (count > GSI_EVT_RING_COUNT_MAX) {
+               dev_warn(dev,
+                        "limiting to %u event rings; hardware supports %u\n",
+                        GSI_EVT_RING_COUNT_MAX, count);
+               count = GSI_EVT_RING_COUNT_MAX;
+       }
+       gsi->evt_ring_count = count;
+
+       return 0;
+}
+
  /* Setup function for GSI.  GSI firmware must be loaded and initialized */
  int gsi_setup(struct gsi *gsi)
  {
@@ -1889,25 +1906,34 @@ int gsi_setup(struct gsi *gsi)
                 return -EIO;
         }
  
-       gsi_irq_setup(gsi);             /* No matching teardown required */
+       ret = gsi_irq_setup(gsi);
+       if (ret)
+               return ret;
  
         ret = gsi_ring_setup(gsi);      /* No matching teardown required */
         if (ret)
-               return ret;
+               goto err_irq_teardown;
  
         /* Initialize the error log */
         iowrite32(0, gsi->virt + GSI_ERROR_LOG_OFFSET);
  
-       /* Writing 1 indicates IRQ interrupts; 0 would be MSI */
-       iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
+       ret = gsi_channel_setup(gsi);
+       if (ret)
+               goto err_irq_teardown;
  
-       return gsi_channel_setup(gsi);
+       return 0;
+
+err_irq_teardown:
+       gsi_irq_teardown(gsi);
+
+       return ret;
  }
  
  /* Inverse of gsi_setup() */
  void gsi_teardown(struct gsi *gsi)
  {
         gsi_channel_teardown(gsi);
+       gsi_irq_teardown(gsi);
  }
  
  /* Initialize a channel's event ring */
@@ -2204,20 +2230,18 @@ int gsi_init(struct gsi *gsi, struct platform_device *pdev,
  
         init_completion(&gsi->completion);
  
-       ret = gsi_irq_init(gsi, pdev);
+       ret = gsi_irq_init(gsi, pdev);  /* No matching exit required */
         if (ret)
                 goto err_iounmap;
  
         ret = gsi_channel_init(gsi, count, data);
         if (ret)
-               goto err_irq_exit;
+               goto err_iounmap;
  
         mutex_init(&gsi->mutex);
  
         return 0;
  
-err_irq_exit:
-       gsi_irq_exit(gsi);
  err_iounmap:
         iounmap(gsi->virt_raw);
  
@@ -2229,7 +2253,6 @@ void gsi_exit(struct gsi *gsi)
  {
         mutex_destroy(&gsi->mutex);
         gsi_channel_exit(gsi);
-       gsi_irq_exit(gsi);
         iounmap(gsi->virt_raw);
  }
  
diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h

index 81cd7b0..88b80dc 100644 (file)
--- a/drivers/net/ipa/gsi.h
+++ b/drivers/net/ipa/gsi.h
@@ -232,8 +232,35 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id);
   */
  void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell);
  
-int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop);
-int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start);
+/**
+ * gsi_suspend() - Prepare the GSI subsystem for suspend
+ * @gsi:       GSI pointer
+ */
+void gsi_suspend(struct gsi *gsi);
+
+/**
+ * gsi_resume() - Resume the GSI subsystem following suspend
+ * @gsi:       GSI pointer
+ */
+void gsi_resume(struct gsi *gsi);
+
+/**
+ * gsi_channel_suspend() - Suspend a GSI channel
+ * @gsi:       GSI pointer
+ * @channel_id:        Channel to suspend
+ *
+ * For IPA v4.0+, suspend is implemented by stopping the channel.
+ */
+int gsi_channel_suspend(struct gsi *gsi, u32 channel_id);
+
+/**
+ * gsi_channel_resume() - Resume a suspended GSI channel
+ * @gsi:       GSI pointer
+ * @channel_id:        Channel to resume
+ *
+ * For IPA v4.0+, the stopped channel is started again.
+ */
+int gsi_channel_resume(struct gsi *gsi, u32 channel_id);
  
  /**
   * gsi_init() - Initialize the GSI subsystem
diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h

index 71ba996..34152fe 100644 (file)
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -27,20 +27,9 @@ struct ipa_clock;
  struct ipa_smp2p;
  struct ipa_interrupt;
  
-/**
- * enum ipa_flag - IPA state flags
- * @IPA_FLAG_RESUMED:  Whether resume from suspend has been signaled
- * @IPA_FLAG_COUNT:    Number of defined IPA flags
- */
-enum ipa_flag {
-       IPA_FLAG_RESUMED,
-       IPA_FLAG_COUNT,         /* Last; not a flag */
-};
-
  /**
   * struct ipa - IPA information
   * @gsi:               Embedded GSI structure
- * @flags:             Boolean state flags
   * @version:           IPA hardware version
   * @pdev:              Platform device
   * @completion:                Used to signal pipeline clear transfer complete
@@ -83,7 +72,6 @@ enum ipa_flag {
   */
  struct ipa {
         struct gsi gsi;
-       DECLARE_BITMAP(flags, IPA_FLAG_COUNT);
         enum ipa_version version;
         struct platform_device *pdev;
         struct completion completion;
diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c

index 69ef6ea..a67b613 100644 (file)
--- a/drivers/net/ipa/ipa_clock.c
+++ b/drivers/net/ipa/ipa_clock.c
@@ -9,9 +9,12 @@
  #include <linux/clk.h>
  #include <linux/device.h>
  #include <linux/interconnect.h>
+#include <linux/pm.h>
+#include <linux/bitops.h>
  
  #include "ipa.h"
  #include "ipa_clock.h"
+#include "ipa_endpoint.h"
  #include "ipa_modem.h"
  #include "ipa_data.h"
  
@@ -42,11 +45,22 @@ struct ipa_interconnect {
         u32 peak_bandwidth;
  };
  
+/**
+ * enum ipa_power_flag - IPA power flags
+ * @IPA_POWER_FLAG_RESUMED:    Whether resume from suspend has been signaled
+ * @IPA_POWER_FLAG_COUNT:      Number of defined power flags
+ */
+enum ipa_power_flag {
+       IPA_POWER_FLAG_RESUMED,
+       IPA_POWER_FLAG_COUNT,           /* Last; not a flag */
+};
+
  /**
   * struct ipa_clock - IPA clocking information
   * @count:             Clocking reference count
   * @mutex:             Protects clock enable/disable
   * @core:              IPA core clock
+ * @flags:             Boolean state flags
   * @interconnect_count:        Number of elements in interconnect[]
   * @interconnect:      Interconnect array
   */
@@ -54,6 +68,7 @@ struct ipa_clock {
         refcount_t count;
         struct mutex mutex; /* protects clock enable/disable */
         struct clk *core;
+       DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT);
         u32 interconnect_count;
         struct ipa_interconnect *interconnect;
  };
@@ -144,8 +159,12 @@ static int ipa_interconnect_enable(struct ipa *ipa)
                 ret = icc_set_bw(interconnect->path,
                                  interconnect->average_bandwidth,
                                  interconnect->peak_bandwidth);
-               if (ret)
+               if (ret) {
+                       dev_err(&ipa->pdev->dev,
+                               "error %d enabling %s interconnect\n",
+                               ret, icc_get_name(interconnect->path));
                         goto out_unwind;
+               }
                 interconnect++;
         }
  
@@ -159,10 +178,11 @@ out_unwind:
  }
  
  /* To disable an interconnect, we just its bandwidth to 0 */
-static void ipa_interconnect_disable(struct ipa *ipa)
+static int ipa_interconnect_disable(struct ipa *ipa)
  {
         struct ipa_interconnect *interconnect;
         struct ipa_clock *clock = ipa->clock;
+       struct device *dev = &ipa->pdev->dev;
         int result = 0;
         u32 count;
         int ret;
@@ -172,13 +192,16 @@ static void ipa_interconnect_disable(struct ipa *ipa)
         while (count--) {
                 interconnect--;
                 ret = icc_set_bw(interconnect->path, 0, 0);
-               if (ret && !result)
-                       result = ret;
+               if (ret) {
+                       dev_err(dev, "error %d disabling %s interconnect\n",
+                               ret, icc_get_name(interconnect->path));
+                       /* Try to disable all; record only the first error */
+                       if (!result)
+                               result = ret;
+               }
         }
  
-       if (result)
-               dev_err(&ipa->pdev->dev,
-                       "error %d disabling IPA interconnects\n", ret);
+       return result;
  }
  
  /* Turn on IPA clocks, including interconnects */
@@ -191,8 +214,10 @@ static int ipa_clock_enable(struct ipa *ipa)
                 return ret;
  
         ret = clk_prepare_enable(ipa->clock->core);
-       if (ret)
-               ipa_interconnect_disable(ipa);
+       if (ret) {
+               dev_err(&ipa->pdev->dev, "error %d enabling core clock\n", ret);
+               (void)ipa_interconnect_disable(ipa);
+       }
  
         return ret;
  }
@@ -201,7 +226,7 @@ static int ipa_clock_enable(struct ipa *ipa)
  static void ipa_clock_disable(struct ipa *ipa)
  {
         clk_disable_unprepare(ipa->clock->core);
-       ipa_interconnect_disable(ipa);
+       (void)ipa_interconnect_disable(ipa);
  }
  
  /* Get an IPA clock reference, but only if the reference count is
@@ -238,13 +263,8 @@ void ipa_clock_get(struct ipa *ipa)
                 goto out_mutex_unlock;
  
         ret = ipa_clock_enable(ipa);
-       if (ret) {
-               dev_err(&ipa->pdev->dev, "error %d enabling IPA clock\n", ret);
-               goto out_mutex_unlock;
-       }
-
-       refcount_set(&clock->count, 1);
-
+       if (!ret)
+               refcount_set(&clock->count, 1);
  out_mutex_unlock:
         mutex_unlock(&clock->mutex);
  }
@@ -271,6 +291,40 @@ u32 ipa_clock_rate(struct ipa *ipa)
         return ipa->clock ? (u32)clk_get_rate(ipa->clock->core) : 0;
  }
  
+/**
+ * ipa_suspend_handler() - Handle the suspend IPA interrupt
+ * @ipa:       IPA pointer
+ * @irq_id:    IPA interrupt type (unused)
+ *
+ * If an RX endpoint is suspended, and the IPA has a packet destined for
+ * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP
+ * that it should resume the endpoint.  If we get one of these interrupts
+ * we just wake up the system.
+ */
+static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
+{
+       /* Just report the event, and let system resume handle the rest.
+        * More than one endpoint could signal this; if so, ignore
+        * all but the first.
+        */
+       if (!test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags))
+               pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
+
+       /* Acknowledge/clear the suspend interrupt on all endpoints */
+       ipa_interrupt_suspend_clear_all(ipa->interrupt);
+}
+
+void ipa_power_setup(struct ipa *ipa)
+{
+       ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
+                         ipa_suspend_handler);
+}
+
+void ipa_power_teardown(struct ipa *ipa)
+{
+       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+}
+
  /* Initialize IPA clocking */
  struct ipa_clock *
  ipa_clock_init(struct device *dev, const struct ipa_clock_data *data)
@@ -329,3 +383,62 @@ void ipa_clock_exit(struct ipa_clock *clock)
         kfree(clock);
         clk_put(clk);
  }
+
+/**
+ * ipa_suspend() - Power management system suspend callback
+ * @dev:       IPA device structure
+ *
+ * Return:     Always returns zero
+ *
+ * Called by the PM framework when a system suspend operation is invoked.
+ * Suspends endpoints and releases the clock reference held to keep
+ * the IPA clock running until this point.
+ */
+static int ipa_suspend(struct device *dev)
+{
+       struct ipa *ipa = dev_get_drvdata(dev);
+
+       /* Endpoints aren't usable until setup is complete */
+       if (ipa->setup_complete) {
+               __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags);
+               ipa_endpoint_suspend(ipa);
+               gsi_suspend(&ipa->gsi);
+       }
+
+       ipa_clock_put(ipa);
+
+       return 0;
+}
+
+/**
+ * ipa_resume() - Power management system resume callback
+ * @dev:       IPA device structure
+ *
+ * Return:     Always returns 0
+ *
+ * Called by the PM framework when a system resume operation is invoked.
+ * Takes an IPA clock reference to keep the clock running until suspend,
+ * and resumes endpoints.
+ */
+static int ipa_resume(struct device *dev)
+{
+       struct ipa *ipa = dev_get_drvdata(dev);
+
+       /* This clock reference will keep the IPA out of suspend
+        * until we get a power management suspend request.
+        */
+       ipa_clock_get(ipa);
+
+       /* Endpoints aren't usable until setup is complete */
+       if (ipa->setup_complete) {
+               gsi_resume(&ipa->gsi);
+               ipa_endpoint_resume(ipa);
+       }
+
+       return 0;
+}
+
+const struct dev_pm_ops ipa_pm_ops = {
+       .suspend        = ipa_suspend,
+       .resume         = ipa_resume,
+};
diff --git a/drivers/net/ipa/ipa_clock.h b/drivers/net/ipa/ipa_clock.h

index 1fe6347..2a0f7ff 100644 (file)
--- a/drivers/net/ipa/ipa_clock.h
+++ b/drivers/net/ipa/ipa_clock.h
@@ -11,6 +11,9 @@ struct device;
  struct ipa;
  struct ipa_clock_data;
  
+/* IPA device power management function block */
+extern const struct dev_pm_ops ipa_pm_ops;
+
  /**
   * ipa_clock_rate() - Return the current IPA core clock rate
   * @ipa:       IPA structure
@@ -19,6 +22,18 @@ struct ipa_clock_data;
   */
  u32 ipa_clock_rate(struct ipa *ipa);
  
+/**
+ * ipa_power_setup() - Set up IPA power management
+ * @ipa:       IPA pointer
+ */
+void ipa_power_setup(struct ipa *ipa);
+
+/**
+ * ipa_power_teardown() - Inverse of ipa_power_setup()
+ * @ipa:       IPA pointer
+ */
+void ipa_power_teardown(struct ipa *ipa);
+
  /**
   * ipa_clock_init() - Initialize IPA clocking
   * @dev:       IPA device
diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c

index 6ab9282..8d83e14 100644 (file)
--- a/drivers/net/ipa/ipa_data-v4.9.c
+++ b/drivers/net/ipa/ipa_data-v4.9.c
@@ -418,18 +418,13 @@ static const struct ipa_mem_data ipa_mem_data = {
  /* Interconnect rates are in 1000 byte/second units */
  static const struct ipa_interconnect_data ipa_interconnect_data[] = {
         {
-               .name                   = "ipa_to_llcc",
+               .name                   = "memory",
                 .peak_bandwidth         = 600000,       /* 600 MBps */
                 .average_bandwidth      = 150000,       /* 150 MBps */
         },
-       {
-               .name                   = "llcc_to_ebi1",
-               .peak_bandwidth         = 1804000,      /* 1.804 GBps */
-               .average_bandwidth      = 150000,       /* 150 MBps */
-       },
         /* Average rate is unused for the next interconnect */
         {
-               .name                   = "appss_to_ipa",
+               .name                   = "config",
                 .peak_bandwidth         = 74000,        /* 74 MBps */
                 .average_bandwidth      = 0,            /* unused */
         },
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c

index 8070d1a..08ee37a 100644 (file)
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1587,7 +1587,6 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
  {
         struct device *dev = &endpoint->ipa->pdev->dev;
         struct gsi *gsi = &endpoint->ipa->gsi;
-       bool stop_channel;
         int ret;
  
         if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
@@ -1598,11 +1597,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
                 (void)ipa_endpoint_program_suspend(endpoint, true);
         }
  
-       /* Starting with IPA v4.0, endpoints are suspended by stopping the
-        * underlying GSI channel rather than using endpoint suspend mode.
-        */
-       stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
-       ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel);
+       ret = gsi_channel_suspend(gsi, endpoint->channel_id);
         if (ret)
                 dev_err(dev, "error %d suspending channel %u\n", ret,
                         endpoint->channel_id);
@@ -1612,7 +1607,6 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
  {
         struct device *dev = &endpoint->ipa->pdev->dev;
         struct gsi *gsi = &endpoint->ipa->gsi;
-       bool start_channel;
         int ret;
  
         if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id)))
@@ -1621,11 +1615,7 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
         if (!endpoint->toward_ipa)
                 (void)ipa_endpoint_program_suspend(endpoint, false);
  
-       /* Starting with IPA v4.0, the underlying GSI channel must be
-        * restarted for resume.
-        */
-       start_channel = endpoint->ipa->version >= IPA_VERSION_4_0;
-       ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel);
+       ret = gsi_channel_resume(gsi, endpoint->channel_id);
         if (ret)
                 dev_err(dev, "error %d resuming channel %u\n", ret,
                         endpoint->channel_id);
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c

index 2e728d4..25bbb45 100644 (file)
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -79,29 +79,6 @@
  /* Divider for 19.2 MHz crystal oscillator clock to get common timer clock */
  #define IPA_XO_CLOCK_DIVIDER   192     /* 1 is subtracted where used */
  
-/**
- * ipa_suspend_handler() - Handle the suspend IPA interrupt
- * @ipa:       IPA pointer
- * @irq_id:    IPA interrupt type (unused)
- *
- * If an RX endpoint is in suspend state, and the IPA has a packet
- * destined for that endpoint, the IPA generates a SUSPEND interrupt
- * to inform the AP that it should resume the endpoint.  If we get
- * one of these interrupts we just resume everything.
- */
-static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id)
-{
-       /* Just report the event, and let system resume handle the rest.
-        * More than one endpoint could signal this; if so, ignore
-        * all but the first.
-        */
-       if (!test_and_set_bit(IPA_FLAG_RESUMED, ipa->flags))
-               pm_wakeup_dev_event(&ipa->pdev->dev, 0, true);
-
-       /* Acknowledge/clear the suspend interrupt on all endpoints */
-       ipa_interrupt_suspend_clear_all(ipa->interrupt);
-}
-
  /**
   * ipa_setup() - Set up IPA hardware
   * @ipa:       IPA pointer
@@ -124,12 +101,11 @@ int ipa_setup(struct ipa *ipa)
         if (ret)
                 return ret;
  
-       ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND,
-                         ipa_suspend_handler);
+       ipa_power_setup(ipa);
  
         ret = device_init_wakeup(dev, true);
         if (ret)
-               goto err_interrupt_remove;
+               goto err_gsi_teardown;
  
         ipa_endpoint_setup(ipa);
  
@@ -177,9 +153,9 @@ err_command_disable:
         ipa_endpoint_disable_one(command_endpoint);
  err_endpoint_teardown:
         ipa_endpoint_teardown(ipa);
+       ipa_power_teardown(ipa);
         (void)device_init_wakeup(dev, false);
-err_interrupt_remove:
-       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
+err_gsi_teardown:
         gsi_teardown(&ipa->gsi);
  
         return ret;
@@ -204,8 +180,8 @@ static void ipa_teardown(struct ipa *ipa)
         command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX];
         ipa_endpoint_disable_one(command_endpoint);
         ipa_endpoint_teardown(ipa);
+       ipa_power_teardown(ipa);
         (void)device_init_wakeup(&ipa->pdev->dev, false);
-       ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND);
         gsi_teardown(&ipa->gsi);
  }
  
@@ -474,7 +450,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
  
         ret = ipa_endpoint_config(ipa);
         if (ret)
-               goto err_interrupt_deconfig;
+               goto err_uc_deconfig;
  
         ipa_table_config(ipa);          /* No deconfig required */
  
@@ -491,7 +467,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data)
  
  err_endpoint_deconfig:
         ipa_endpoint_deconfig(ipa);
-err_interrupt_deconfig:
+err_uc_deconfig:
         ipa_uc_deconfig(ipa);
         ipa_interrupt_deconfig(ipa->interrupt);
         ipa->interrupt = NULL;
@@ -874,62 +850,6 @@ static void ipa_shutdown(struct platform_device *pdev)
                 dev_err(&pdev->dev, "shutdown: remove returned %d\n", ret);
  }
  
-/**
- * ipa_suspend() - Power management system suspend callback
- * @dev:       IPA device structure
- *
- * Return:     Always returns zero
- *
- * Called by the PM framework when a system suspend operation is invoked.
- * Suspends endpoints and releases the clock reference held to keep
- * the IPA clock running until this point.
- */
-static int ipa_suspend(struct device *dev)
-{
-       struct ipa *ipa = dev_get_drvdata(dev);
-
-       /* Endpoints aren't usable until setup is complete */
-       if (ipa->setup_complete) {
-               __clear_bit(IPA_FLAG_RESUMED, ipa->flags);
-               ipa_endpoint_suspend(ipa);
-       }
-
-       ipa_clock_put(ipa);
-
-       return 0;
-}
-
-/**
- * ipa_resume() - Power management system resume callback
- * @dev:       IPA device structure
- *
- * Return:     Always returns 0
- *
- * Called by the PM framework when a system resume operation is invoked.
- * Takes an IPA clock reference to keep the clock running until suspend,
- * and resumes endpoints.
- */
-static int ipa_resume(struct device *dev)
-{
-       struct ipa *ipa = dev_get_drvdata(dev);
-
-       /* This clock reference will keep the IPA out of suspend
-        * until we get a power management suspend request.
-        */
-       ipa_clock_get(ipa);
-
-       /* Endpoints aren't usable until setup is complete */
-       if (ipa->setup_complete)
-               ipa_endpoint_resume(ipa);
-
-       return 0;
-}
-
-static const struct dev_pm_ops ipa_pm_ops = {
-       .suspend        = ipa_suspend,
-       .resume         = ipa_resume,
-};
-
  static const struct attribute_group *ipa_attribute_groups[] = {
         &ipa_attribute_group,
         &ipa_feature_attribute_group,
diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c

index 4ea8287..ad4019e 100644 (file)
--- a/drivers/net/ipa/ipa_modem.c
+++ b/drivers/net/ipa/ipa_modem.c
@@ -178,6 +178,9 @@ void ipa_modem_suspend(struct net_device *netdev)
         struct ipa_priv *priv = netdev_priv(netdev);
         struct ipa *ipa = priv->ipa;
  
+       if (!(netdev->flags & IFF_UP))
+               return;
+
         netif_stop_queue(netdev);
  
         ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
@@ -194,6 +197,9 @@ void ipa_modem_resume(struct net_device *netdev)
         struct ipa_priv *priv = netdev_priv(netdev);
         struct ipa *ipa = priv->ipa;
  
+       if (!(netdev->flags & IFF_UP))
+               return;
+
         ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]);
         ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]);
  
@@ -225,13 +231,15 @@ int ipa_modem_start(struct ipa *ipa)
         SET_NETDEV_DEV(netdev, &ipa->pdev->dev);
         priv = netdev_priv(netdev);
         priv->ipa = ipa;
+       ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
+       ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
+       ipa->modem_netdev = netdev;
  
         ret = register_netdev(netdev);
-       if (!ret) {
-               ipa->modem_netdev = netdev;
-               ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev;
-               ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev;
-       } else {
+       if (ret) {
+               ipa->modem_netdev = NULL;
+               ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
+               ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
                 free_netdev(netdev);
         }
  
@@ -265,13 +273,15 @@ int ipa_modem_stop(struct ipa *ipa)
         /* Prevent the modem from triggering a call to ipa_setup() */
         ipa_smp2p_disable(ipa);
  
-       /* Stop the queue and disable the endpoints if it's open */
+       /* Clean up the netdev and endpoints if it was started */
         if (netdev) {
-               (void)ipa_stop(netdev);
+               /* If it was opened, stop it first */
+               if (netdev->flags & IFF_UP)
+                       (void)ipa_stop(netdev);
+               unregister_netdev(netdev);
+               ipa->modem_netdev = NULL;
                 ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL;
                 ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL;
-               ipa->modem_netdev = NULL;
-               unregister_netdev(netdev);
                 free_netdev(netdev);
         }
  
diff --git a/drivers/net/mhi/Makefile b/drivers/net/mhi/Makefile

deleted file mode 100644 (file)

index f71b9f8..0000000
--- a/drivers/net/mhi/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-obj-$(CONFIG_MHI_NET) += mhi_net.o
-
-mhi_net-y := net.o proto_mbim.o
diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h

deleted file mode 100644 (file)

index 1d0c499..0000000
--- a/drivers/net/mhi/mhi.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- */
-
-struct mhi_net_stats {
-       u64_stats_t rx_packets;
-       u64_stats_t rx_bytes;
-       u64_stats_t rx_errors;
-       u64_stats_t rx_dropped;
-       u64_stats_t rx_length_errors;
-       u64_stats_t tx_packets;
-       u64_stats_t tx_bytes;
-       u64_stats_t tx_errors;
-       u64_stats_t tx_dropped;
-       struct u64_stats_sync tx_syncp;
-       struct u64_stats_sync rx_syncp;
-};
-
-struct mhi_net_dev {
-       struct mhi_device *mdev;
-       struct net_device *ndev;
-       struct sk_buff *skbagg_head;
-       struct sk_buff *skbagg_tail;
-       const struct mhi_net_proto *proto;
-       void *proto_data;
-       struct delayed_work rx_refill;
-       struct mhi_net_stats stats;
-       u32 rx_queue_sz;
-       int msg_enable;
-       unsigned int mru;
-};
-
-struct mhi_net_proto {
-       int (*init)(struct mhi_net_dev *mhi_netdev);
-       struct sk_buff * (*tx_fixup)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
-       void (*rx)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb);
-};
-
-extern const struct mhi_net_proto proto_mbim;
diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c

deleted file mode 100644 (file)

index 0cc7dcd..0000000
--- a/drivers/net/mhi/net.c
+++ /dev/null
@@ -1,487 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
- */
-
-#include <linux/if_arp.h>
-#include <linux/mhi.h>
-#include <linux/mod_devicetable.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/u64_stats_sync.h>
-#include <linux/wwan.h>
-
-#include "mhi.h"
-
-#define MHI_NET_MIN_MTU                ETH_MIN_MTU
-#define MHI_NET_MAX_MTU                0xffff
-#define MHI_NET_DEFAULT_MTU    0x4000
-
-/* When set to false, the default netdev (link 0) is not created, and it's up
- * to user to create the link (via wwan rtnetlink).
- */
-static bool create_default_iface = true;
-module_param(create_default_iface, bool, 0);
-
-struct mhi_device_info {
-       const char *netname;
-       const struct mhi_net_proto *proto;
-};
-
-static int mhi_ndo_open(struct net_device *ndev)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-
-       /* Feed the rx buffer pool */
-       schedule_delayed_work(&mhi_netdev->rx_refill, 0);
-
-       /* Carrier is established via out-of-band channel (e.g. qmi) */
-       netif_carrier_on(ndev);
-
-       netif_start_queue(ndev);
-
-       return 0;
-}
-
-static int mhi_ndo_stop(struct net_device *ndev)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-
-       netif_stop_queue(ndev);
-       netif_carrier_off(ndev);
-       cancel_delayed_work_sync(&mhi_netdev->rx_refill);
-
-       return 0;
-}
-
-static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-       const struct mhi_net_proto *proto = mhi_netdev->proto;
-       struct mhi_device *mdev = mhi_netdev->mdev;
-       int err;
-
-       if (proto && proto->tx_fixup) {
-               skb = proto->tx_fixup(mhi_netdev, skb);
-               if (unlikely(!skb))
-                       goto exit_drop;
-       }
-
-       err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
-       if (unlikely(err)) {
-               net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
-                                   ndev->name, err);
-               dev_kfree_skb_any(skb);
-               goto exit_drop;
-       }
-
-       if (mhi_queue_is_full(mdev, DMA_TO_DEVICE))
-               netif_stop_queue(ndev);
-
-       return NETDEV_TX_OK;
-
-exit_drop:
-       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
-       u64_stats_inc(&mhi_netdev->stats.tx_dropped);
-       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-
-       return NETDEV_TX_OK;
-}
-
-static void mhi_ndo_get_stats64(struct net_device *ndev,
-                               struct rtnl_link_stats64 *stats)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-       unsigned int start;
-
-       do {
-               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp);
-               stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets);
-               stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes);
-               stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors);
-               stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped);
-               stats->rx_length_errors = u64_stats_read(&mhi_netdev->stats.rx_length_errors);
-       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start));
-
-       do {
-               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp);
-               stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets);
-               stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes);
-               stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors);
-               stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped);
-       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start));
-}
-
-static const struct net_device_ops mhi_netdev_ops = {
-       .ndo_open               = mhi_ndo_open,
-       .ndo_stop               = mhi_ndo_stop,
-       .ndo_start_xmit         = mhi_ndo_xmit,
-       .ndo_get_stats64        = mhi_ndo_get_stats64,
-};
-
-static void mhi_net_setup(struct net_device *ndev)
-{
-       ndev->header_ops = NULL;  /* No header */
-       ndev->type = ARPHRD_RAWIP;
-       ndev->hard_header_len = 0;
-       ndev->addr_len = 0;
-       ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
-       ndev->netdev_ops = &mhi_netdev_ops;
-       ndev->mtu = MHI_NET_DEFAULT_MTU;
-       ndev->min_mtu = MHI_NET_MIN_MTU;
-       ndev->max_mtu = MHI_NET_MAX_MTU;
-       ndev->tx_queue_len = 1000;
-}
-
-static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev,
-                                      struct sk_buff *skb)
-{
-       struct sk_buff *head = mhi_netdev->skbagg_head;
-       struct sk_buff *tail = mhi_netdev->skbagg_tail;
-
-       /* This is non-paged skb chaining using frag_list */
-       if (!head) {
-               mhi_netdev->skbagg_head = skb;
-               return skb;
-       }
-
-       if (!skb_shinfo(head)->frag_list)
-               skb_shinfo(head)->frag_list = skb;
-       else
-               tail->next = skb;
-
-       head->len += skb->len;
-       head->data_len += skb->len;
-       head->truesize += skb->truesize;
-
-       mhi_netdev->skbagg_tail = skb;
-
-       return mhi_netdev->skbagg_head;
-}
-
-static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
-                               struct mhi_result *mhi_res)
-{
-       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
-       const struct mhi_net_proto *proto = mhi_netdev->proto;
-       struct sk_buff *skb = mhi_res->buf_addr;
-       int free_desc_count;
-
-       free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
-
-       if (unlikely(mhi_res->transaction_status)) {
-               switch (mhi_res->transaction_status) {
-               case -EOVERFLOW:
-                       /* Packet can not fit in one MHI buffer and has been
-                        * split over multiple MHI transfers, do re-aggregation.
-                        * That usually means the device side MTU is larger than
-                        * the host side MTU/MRU. Since this is not optimal,
-                        * print a warning (once).
-                        */
-                       netdev_warn_once(mhi_netdev->ndev,
-                                        "Fragmented packets received, fix MTU?\n");
-                       skb_put(skb, mhi_res->bytes_xferd);
-                       mhi_net_skb_agg(mhi_netdev, skb);
-                       break;
-               case -ENOTCONN:
-                       /* MHI layer stopping/resetting the DL channel */
-                       dev_kfree_skb_any(skb);
-                       return;
-               default:
-                       /* Unknown error, simply drop */
-                       dev_kfree_skb_any(skb);
-                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
-                       u64_stats_inc(&mhi_netdev->stats.rx_errors);
-                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
-               }
-       } else {
-               skb_put(skb, mhi_res->bytes_xferd);
-
-               if (mhi_netdev->skbagg_head) {
-                       /* Aggregate the final fragment */
-                       skb = mhi_net_skb_agg(mhi_netdev, skb);
-                       mhi_netdev->skbagg_head = NULL;
-               }
-
-               switch (skb->data[0] & 0xf0) {
-               case 0x40:
-                       skb->protocol = htons(ETH_P_IP);
-                       break;
-               case 0x60:
-                       skb->protocol = htons(ETH_P_IPV6);
-                       break;
-               default:
-                       skb->protocol = htons(ETH_P_MAP);
-                       break;
-               }
-
-               if (proto && proto->rx) {
-                       proto->rx(mhi_netdev, skb);
-               } else {
-                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
-                       u64_stats_inc(&mhi_netdev->stats.rx_packets);
-                       u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
-                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
-                       netif_rx(skb);
-               }
-       }
-
-       /* Refill if RX buffers queue becomes low */
-       if (free_desc_count >= mhi_netdev->rx_queue_sz / 2)
-               schedule_delayed_work(&mhi_netdev->rx_refill, 0);
-}
-
-static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
-                               struct mhi_result *mhi_res)
-{
-       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
-       struct net_device *ndev = mhi_netdev->ndev;
-       struct mhi_device *mdev = mhi_netdev->mdev;
-       struct sk_buff *skb = mhi_res->buf_addr;
-
-       /* Hardware has consumed the buffer, so free the skb (which is not
-        * freed by the MHI stack) and perform accounting.
-        */
-       dev_consume_skb_any(skb);
-
-       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
-       if (unlikely(mhi_res->transaction_status)) {
-
-               /* MHI layer stopping/resetting the UL channel */
-               if (mhi_res->transaction_status == -ENOTCONN) {
-                       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-                       return;
-               }
-
-               u64_stats_inc(&mhi_netdev->stats.tx_errors);
-       } else {
-               u64_stats_inc(&mhi_netdev->stats.tx_packets);
-               u64_stats_add(&mhi_netdev->stats.tx_bytes, mhi_res->bytes_xferd);
-       }
-       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
-
-       if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mdev, DMA_TO_DEVICE))
-               netif_wake_queue(ndev);
-}
-
-static void mhi_net_rx_refill_work(struct work_struct *work)
-{
-       struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev,
-                                                     rx_refill.work);
-       struct net_device *ndev = mhi_netdev->ndev;
-       struct mhi_device *mdev = mhi_netdev->mdev;
-       struct sk_buff *skb;
-       unsigned int size;
-       int err;
-
-       size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu);
-
-       while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
-               skb = netdev_alloc_skb(ndev, size);
-               if (unlikely(!skb))
-                       break;
-
-               err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
-               if (unlikely(err)) {
-                       net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
-                                           ndev->name, err);
-                       kfree_skb(skb);
-                       break;
-               }
-
-               /* Do not hog the CPU if rx buffers are consumed faster than
-                * queued (unlikely).
-                */
-               cond_resched();
-       }
-
-       /* If we're still starved of rx buffers, reschedule later */
-       if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mhi_netdev->rx_queue_sz)
-               schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
-}
-
-static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
-                          struct netlink_ext_ack *extack)
-{
-       const struct mhi_device_info *info;
-       struct mhi_device *mhi_dev = ctxt;
-       struct mhi_net_dev *mhi_netdev;
-       int err;
-
-       info = (struct mhi_device_info *)mhi_dev->id->driver_data;
-
-       /* For now we only support one link (link context 0), driver must be
-        * reworked to break 1:1 relationship for net MBIM and to forward setup
-        * call to rmnet(QMAP) otherwise.
-        */
-       if (if_id != 0)
-               return -EINVAL;
-
-       if (dev_get_drvdata(&mhi_dev->dev))
-               return -EBUSY;
-
-       mhi_netdev = wwan_netdev_drvpriv(ndev);
-
-       dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
-       mhi_netdev->ndev = ndev;
-       mhi_netdev->mdev = mhi_dev;
-       mhi_netdev->skbagg_head = NULL;
-       mhi_netdev->proto = info->proto;
-       mhi_netdev->mru = mhi_dev->mhi_cntrl->mru;
-
-       INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
-       u64_stats_init(&mhi_netdev->stats.rx_syncp);
-       u64_stats_init(&mhi_netdev->stats.tx_syncp);
-
-       /* Start MHI channels */
-       err = mhi_prepare_for_transfer(mhi_dev);
-       if (err)
-               goto out_err;
-
-       /* Number of transfer descriptors determines size of the queue */
-       mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
-
-       if (extack)
-               err = register_netdevice(ndev);
-       else
-               err = register_netdev(ndev);
-       if (err)
-               goto out_err;
-
-       if (mhi_netdev->proto) {
-               err = mhi_netdev->proto->init(mhi_netdev);
-               if (err)
-                       goto out_err_proto;
-       }
-
-       return 0;
-
-out_err_proto:
-       unregister_netdevice(ndev);
-out_err:
-       free_netdev(ndev);
-       return err;
-}
-
-static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
-                           struct list_head *head)
-{
-       struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev);
-       struct mhi_device *mhi_dev = ctxt;
-
-       if (head)
-               unregister_netdevice_queue(ndev, head);
-       else
-               unregister_netdev(ndev);
-
-       mhi_unprepare_from_transfer(mhi_dev);
-
-       kfree_skb(mhi_netdev->skbagg_head);
-
-       dev_set_drvdata(&mhi_dev->dev, NULL);
-}
-
-static const struct wwan_ops mhi_wwan_ops = {
-       .priv_size = sizeof(struct mhi_net_dev),
-       .setup = mhi_net_setup,
-       .newlink = mhi_net_newlink,
-       .dellink = mhi_net_dellink,
-};
-
-static int mhi_net_probe(struct mhi_device *mhi_dev,
-                        const struct mhi_device_id *id)
-{
-       const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
-       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
-       struct net_device *ndev;
-       int err;
-
-       err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev,
-                               WWAN_NO_DEFAULT_LINK);
-       if (err)
-               return err;
-
-       if (!create_default_iface)
-               return 0;
-
-       /* Create a default interface which is used as either RMNET real-dev,
-        * MBIM link 0 or ip link 0)
-        */
-       ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
-                           NET_NAME_PREDICTABLE, mhi_net_setup);
-       if (!ndev) {
-               err = -ENOMEM;
-               goto err_unregister;
-       }
-
-       SET_NETDEV_DEV(ndev, &mhi_dev->dev);
-
-       err = mhi_net_newlink(mhi_dev, ndev, 0, NULL);
-       if (err)
-               goto err_release;
-
-       return 0;
-
-err_release:
-       free_netdev(ndev);
-err_unregister:
-       wwan_unregister_ops(&cntrl->mhi_dev->dev);
-
-       return err;
-}
-
-static void mhi_net_remove(struct mhi_device *mhi_dev)
-{
-       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
-       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
-
-       /* WWAN core takes care of removing remaining links */
-       wwan_unregister_ops(&cntrl->mhi_dev->dev);
-
-       if (create_default_iface)
-               mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL);
-}
-
-static const struct mhi_device_info mhi_hwip0 = {
-       .netname = "mhi_hwip%d",
-};
-
-static const struct mhi_device_info mhi_swip0 = {
-       .netname = "mhi_swip%d",
-};
-
-static const struct mhi_device_info mhi_hwip0_mbim = {
-       .netname = "mhi_mbim%d",
-       .proto = &proto_mbim,
-};
-
-static const struct mhi_device_id mhi_net_id_table[] = {
-       /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
-       { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
-       /* Software data PATH (to modem CPU) */
-       { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
-       /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
-       { .chan = "IP_HW0_MBIM", .driver_data = (kernel_ulong_t)&mhi_hwip0_mbim },
-       {}
-};
-MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
-
-static struct mhi_driver mhi_net_driver = {
-       .probe = mhi_net_probe,
-       .remove = mhi_net_remove,
-       .dl_xfer_cb = mhi_net_dl_callback,
-       .ul_xfer_cb = mhi_net_ul_callback,
-       .id_table = mhi_net_id_table,
-       .driver = {
-               .name = "mhi_net",
-               .owner = THIS_MODULE,
-       },
-};
-
-module_mhi_driver(mhi_net_driver);
-
-MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
-MODULE_DESCRIPTION("Network over MHI");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c

deleted file mode 100644 (file)

index 761d90b..0000000
--- a/drivers/net/mhi/proto_mbim.c
+++ /dev/null
@@ -1,310 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* MHI Network driver - Network over MHI bus
- *
- * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
- *
- * This driver copy some code from cdc_ncm, which is:
- * Copyright (C) ST-Ericsson 2010-2012
- * and cdc_mbim, which is:
- * Copyright (c) 2012  Smith Micro Software, Inc.
- * Copyright (c) 2012  Bjørn Mork <bjorn@mork.no>
- *
- */
-
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-#include <linux/ip.h>
-#include <linux/mii.h>
-#include <linux/netdevice.h>
-#include <linux/wwan.h>
-#include <linux/skbuff.h>
-#include <linux/usb.h>
-#include <linux/usb/cdc.h>
-#include <linux/usb/usbnet.h>
-#include <linux/usb/cdc_ncm.h>
-
-#include "mhi.h"
-
-#define MBIM_NDP16_SIGN_MASK 0x00ffffff
-
-/* Usual WWAN MTU */
-#define MHI_MBIM_DEFAULT_MTU 1500
-
-/* 3500 allows to optimize skb allocation, the skbs will basically fit in
- * one 4K page. Large MBIM packets will simply be split over several MHI
- * transfers and chained by the MHI net layer (zerocopy).
- */
-#define MHI_MBIM_DEFAULT_MRU 3500
-
-struct mbim_context {
-       u16 rx_seq;
-       u16 tx_seq;
-};
-
-static void __mbim_length_errors_inc(struct mhi_net_dev *dev)
-{
-       u64_stats_update_begin(&dev->stats.rx_syncp);
-       u64_stats_inc(&dev->stats.rx_length_errors);
-       u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static void __mbim_errors_inc(struct mhi_net_dev *dev)
-{
-       u64_stats_update_begin(&dev->stats.rx_syncp);
-       u64_stats_inc(&dev->stats.rx_errors);
-       u64_stats_update_end(&dev->stats.rx_syncp);
-}
-
-static int mbim_rx_verify_nth16(struct sk_buff *skb)
-{
-       struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
-       struct mbim_context *ctx = dev->proto_data;
-       struct usb_cdc_ncm_nth16 *nth16;
-       int len;
-
-       if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
-                       sizeof(struct usb_cdc_ncm_ndp16)) {
-               netif_dbg(dev, rx_err, dev->ndev, "frame too short\n");
-               __mbim_length_errors_inc(dev);
-               return -EINVAL;
-       }
-
-       nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
-
-       if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "invalid NTH16 signature <%#010x>\n",
-                         le32_to_cpu(nth16->dwSignature));
-               __mbim_errors_inc(dev);
-               return -EINVAL;
-       }
-
-       /* No limit on the block length, except the size of the data pkt */
-       len = le16_to_cpu(nth16->wBlockLength);
-       if (len > skb->len) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "NTB does not fit into the skb %u/%u\n", len,
-                         skb->len);
-               __mbim_length_errors_inc(dev);
-               return -EINVAL;
-       }
-
-       if (ctx->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
-           (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) &&
-           !(ctx->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "sequence number glitch prev=%d curr=%d\n",
-                         ctx->rx_seq, le16_to_cpu(nth16->wSequence));
-       }
-       ctx->rx_seq = le16_to_cpu(nth16->wSequence);
-
-       return le16_to_cpu(nth16->wNdpIndex);
-}
-
-static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
-{
-       struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev);
-       int ret;
-
-       if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
-               netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n",
-                         le16_to_cpu(ndp16->wLength));
-               return -EINVAL;
-       }
-
-       ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
-                       / sizeof(struct usb_cdc_ncm_dpe16));
-       ret--; /* Last entry is always a NULL terminator */
-
-       if (sizeof(struct usb_cdc_ncm_ndp16) +
-            ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
-               netif_dbg(dev, rx_err, dev->ndev,
-                         "Invalid nframes = %d\n", ret);
-               return -EINVAL;
-       }
-
-       return ret;
-}
-
-static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb)
-{
-       struct net_device *ndev = mhi_netdev->ndev;
-       int ndpoffset;
-
-       /* Check NTB header and retrieve first NDP offset */
-       ndpoffset = mbim_rx_verify_nth16(skb);
-       if (ndpoffset < 0) {
-               net_err_ratelimited("%s: Incorrect NTB header\n", ndev->name);
-               goto error;
-       }
-
-       /* Process each NDP */
-       while (1) {
-               struct usb_cdc_ncm_ndp16 ndp16;
-               struct usb_cdc_ncm_dpe16 dpe16;
-               int nframes, n, dpeoffset;
-
-               if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
-                       net_err_ratelimited("%s: Incorrect NDP offset (%u)\n",
-                                           ndev->name, ndpoffset);
-                       __mbim_length_errors_inc(mhi_netdev);
-                       goto error;
-               }
-
-               /* Check NDP header and retrieve number of datagrams */
-               nframes = mbim_rx_verify_ndp16(skb, &ndp16);
-               if (nframes < 0) {
-                       net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name);
-                       __mbim_length_errors_inc(mhi_netdev);
-                       goto error;
-               }
-
-                /* Only IP data type supported, no DSS in MHI context */
-               if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
-                               != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
-                       net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name);
-                       __mbim_errors_inc(mhi_netdev);
-                       goto next_ndp;
-               }
-
-               /* Only primary IP session 0 (0x00) supported for now */
-               if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) {
-                       net_err_ratelimited("%s: bad packet session\n", ndev->name);
-                       __mbim_errors_inc(mhi_netdev);
-                       goto next_ndp;
-               }
-
-               /* de-aggregate and deliver IP packets */
-               dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
-               for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
-                       u16 dgram_offset, dgram_len;
-                       struct sk_buff *skbn;
-
-                       if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
-                               break;
-
-                       dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
-                       dgram_len = le16_to_cpu(dpe16.wDatagramLength);
-
-                       if (!dgram_offset || !dgram_len)
-                               break; /* null terminator */
-
-                       skbn = netdev_alloc_skb(ndev, dgram_len);
-                       if (!skbn)
-                               continue;
-
-                       skb_put(skbn, dgram_len);
-                       skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
-
-                       switch (skbn->data[0] & 0xf0) {
-                       case 0x40:
-                               skbn->protocol = htons(ETH_P_IP);
-                               break;
-                       case 0x60:
-                               skbn->protocol = htons(ETH_P_IPV6);
-                               break;
-                       default:
-                               net_err_ratelimited("%s: unknown protocol\n",
-                                                   ndev->name);
-                               __mbim_errors_inc(mhi_netdev);
-                               dev_kfree_skb_any(skbn);
-                               continue;
-                       }
-
-                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
-                       u64_stats_inc(&mhi_netdev->stats.rx_packets);
-                       u64_stats_add(&mhi_netdev->stats.rx_bytes, skbn->len);
-                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
-                       netif_rx(skbn);
-               }
-next_ndp:
-               /* Other NDP to process? */
-               ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
-               if (!ndpoffset)
-                       break;
-       }
-
-       /* free skb */
-       dev_consume_skb_any(skb);
-       return;
-error:
-       dev_kfree_skb_any(skb);
-}
-
-struct mbim_tx_hdr {
-       struct usb_cdc_ncm_nth16 nth16;
-       struct usb_cdc_ncm_ndp16 ndp16;
-       struct usb_cdc_ncm_dpe16 dpe16[2];
-} __packed;
-
-static struct sk_buff *mbim_tx_fixup(struct mhi_net_dev *mhi_netdev,
-                                    struct sk_buff *skb)
-{
-       struct mbim_context *ctx = mhi_netdev->proto_data;
-       unsigned int dgram_size = skb->len;
-       struct usb_cdc_ncm_nth16 *nth16;
-       struct usb_cdc_ncm_ndp16 *ndp16;
-       struct mbim_tx_hdr *mbim_hdr;
-
-       /* For now, this is a partial implementation of CDC MBIM, only one NDP
-        * is sent, containing the IP packet (no aggregation).
-        */
-
-       /* Ensure we have enough headroom for crafting MBIM header */
-       if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
-               dev_kfree_skb_any(skb);
-               return NULL;
-       }
-
-       mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
-
-       /* Fill NTB header */
-       nth16 = &mbim_hdr->nth16;
-       nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
-       nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
-       nth16->wSequence = cpu_to_le16(ctx->tx_seq++);
-       nth16->wBlockLength = cpu_to_le16(skb->len);
-       nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
-
-       /* Fill the unique NDP */
-       ndp16 = &mbim_hdr->ndp16;
-       ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN);
-       ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
-                                       + sizeof(struct usb_cdc_ncm_dpe16) * 2);
-       ndp16->wNextNdpIndex = 0;
-
-       /* Datagram follows the mbim header */
-       ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
-       ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
-
-       /* null termination */
-       ndp16->dpe16[1].wDatagramIndex = 0;
-       ndp16->dpe16[1].wDatagramLength = 0;
-
-       return skb;
-}
-
-static int mbim_init(struct mhi_net_dev *mhi_netdev)
-{
-       struct net_device *ndev = mhi_netdev->ndev;
-
-       mhi_netdev->proto_data = devm_kzalloc(&ndev->dev,
-                                             sizeof(struct mbim_context),
-                                             GFP_KERNEL);
-       if (!mhi_netdev->proto_data)
-               return -ENOMEM;
-
-       ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
-       ndev->mtu = MHI_MBIM_DEFAULT_MTU;
-
-       if (!mhi_netdev->mru)
-               mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU;
-
-       return 0;
-}
-
-const struct mhi_net_proto proto_mbim = {
-       .init = mbim_init,
-       .rx = mbim_rx,
-       .tx_fixup = mbim_tx_fixup,
-};
diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c

new file mode 100644 (file)

index 0000000..975f7f9
--- /dev/null
+++ b/drivers/net/mhi_net.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* MHI Network driver - Network over MHI bus
+ *
+ * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+#include <linux/if_arp.h>
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+
+#define MHI_NET_MIN_MTU                ETH_MIN_MTU
+#define MHI_NET_MAX_MTU                0xffff
+#define MHI_NET_DEFAULT_MTU    0x4000
+
+struct mhi_net_stats {
+       u64_stats_t rx_packets;
+       u64_stats_t rx_bytes;
+       u64_stats_t rx_errors;
+       u64_stats_t tx_packets;
+       u64_stats_t tx_bytes;
+       u64_stats_t tx_errors;
+       u64_stats_t tx_dropped;
+       struct u64_stats_sync tx_syncp;
+       struct u64_stats_sync rx_syncp;
+};
+
+struct mhi_net_dev {
+       struct mhi_device *mdev;
+       struct net_device *ndev;
+       struct sk_buff *skbagg_head;
+       struct sk_buff *skbagg_tail;
+       struct delayed_work rx_refill;
+       struct mhi_net_stats stats;
+       u32 rx_queue_sz;
+       int msg_enable;
+       unsigned int mru;
+};
+
+struct mhi_device_info {
+       const char *netname;
+};
+
+static int mhi_ndo_open(struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+       /* Feed the rx buffer pool */
+       schedule_delayed_work(&mhi_netdev->rx_refill, 0);
+
+       /* Carrier is established via out-of-band channel (e.g. qmi) */
+       netif_carrier_on(ndev);
+
+       netif_start_queue(ndev);
+
+       return 0;
+}
+
+static int mhi_ndo_stop(struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+       netif_stop_queue(ndev);
+       netif_carrier_off(ndev);
+       cancel_delayed_work_sync(&mhi_netdev->rx_refill);
+
+       return 0;
+}
+
+static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+       struct mhi_device *mdev = mhi_netdev->mdev;
+       int err;
+
+       err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+       if (unlikely(err)) {
+               net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
+                                   ndev->name, err);
+               dev_kfree_skb_any(skb);
+               goto exit_drop;
+       }
+
+       if (mhi_queue_is_full(mdev, DMA_TO_DEVICE))
+               netif_stop_queue(ndev);
+
+       return NETDEV_TX_OK;
+
+exit_drop:
+       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
+       u64_stats_inc(&mhi_netdev->stats.tx_dropped);
+       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+
+       return NETDEV_TX_OK;
+}
+
+static void mhi_ndo_get_stats64(struct net_device *ndev,
+                               struct rtnl_link_stats64 *stats)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+       unsigned int start;
+
+       do {
+               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp);
+               stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets);
+               stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes);
+               stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors);
+       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start));
+
+       do {
+               start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp);
+               stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets);
+               stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes);
+               stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors);
+               stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped);
+       } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start));
+}
+
+static const struct net_device_ops mhi_netdev_ops = {
+       .ndo_open               = mhi_ndo_open,
+       .ndo_stop               = mhi_ndo_stop,
+       .ndo_start_xmit         = mhi_ndo_xmit,
+       .ndo_get_stats64        = mhi_ndo_get_stats64,
+};
+
+static void mhi_net_setup(struct net_device *ndev)
+{
+       ndev->header_ops = NULL;  /* No header */
+       ndev->type = ARPHRD_RAWIP;
+       ndev->hard_header_len = 0;
+       ndev->addr_len = 0;
+       ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+       ndev->netdev_ops = &mhi_netdev_ops;
+       ndev->mtu = MHI_NET_DEFAULT_MTU;
+       ndev->min_mtu = MHI_NET_MIN_MTU;
+       ndev->max_mtu = MHI_NET_MAX_MTU;
+       ndev->tx_queue_len = 1000;
+}
+
+static struct sk_buff *mhi_net_skb_agg(struct mhi_net_dev *mhi_netdev,
+                                      struct sk_buff *skb)
+{
+       struct sk_buff *head = mhi_netdev->skbagg_head;
+       struct sk_buff *tail = mhi_netdev->skbagg_tail;
+
+       /* This is non-paged skb chaining using frag_list */
+       if (!head) {
+               mhi_netdev->skbagg_head = skb;
+               return skb;
+       }
+
+       if (!skb_shinfo(head)->frag_list)
+               skb_shinfo(head)->frag_list = skb;
+       else
+               tail->next = skb;
+
+       head->len += skb->len;
+       head->data_len += skb->len;
+       head->truesize += skb->truesize;
+
+       mhi_netdev->skbagg_tail = skb;
+
+       return mhi_netdev->skbagg_head;
+}
+
+static void mhi_net_dl_callback(struct mhi_device *mhi_dev,
+                               struct mhi_result *mhi_res)
+{
+       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+       struct sk_buff *skb = mhi_res->buf_addr;
+       int free_desc_count;
+
+       free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       if (unlikely(mhi_res->transaction_status)) {
+               switch (mhi_res->transaction_status) {
+               case -EOVERFLOW:
+                       /* Packet can not fit in one MHI buffer and has been
+                        * split over multiple MHI transfers, do re-aggregation.
+                        * That usually means the device side MTU is larger than
+                        * the host side MTU/MRU. Since this is not optimal,
+                        * print a warning (once).
+                        */
+                       netdev_warn_once(mhi_netdev->ndev,
+                                        "Fragmented packets received, fix MTU?\n");
+                       skb_put(skb, mhi_res->bytes_xferd);
+                       mhi_net_skb_agg(mhi_netdev, skb);
+                       break;
+               case -ENOTCONN:
+                       /* MHI layer stopping/resetting the DL channel */
+                       dev_kfree_skb_any(skb);
+                       return;
+               default:
+                       /* Unknown error, simply drop */
+                       dev_kfree_skb_any(skb);
+                       u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
+                       u64_stats_inc(&mhi_netdev->stats.rx_errors);
+                       u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
+               }
+       } else {
+               skb_put(skb, mhi_res->bytes_xferd);
+
+               if (mhi_netdev->skbagg_head) {
+                       /* Aggregate the final fragment */
+                       skb = mhi_net_skb_agg(mhi_netdev, skb);
+                       mhi_netdev->skbagg_head = NULL;
+               }
+
+               switch (skb->data[0] & 0xf0) {
+               case 0x40:
+                       skb->protocol = htons(ETH_P_IP);
+                       break;
+               case 0x60:
+                       skb->protocol = htons(ETH_P_IPV6);
+                       break;
+               default:
+                       skb->protocol = htons(ETH_P_MAP);
+                       break;
+               }
+
+               u64_stats_update_begin(&mhi_netdev->stats.rx_syncp);
+               u64_stats_inc(&mhi_netdev->stats.rx_packets);
+               u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len);
+               u64_stats_update_end(&mhi_netdev->stats.rx_syncp);
+               netif_rx(skb);
+       }
+
+       /* Refill if RX buffers queue becomes low */
+       if (free_desc_count >= mhi_netdev->rx_queue_sz / 2)
+               schedule_delayed_work(&mhi_netdev->rx_refill, 0);
+}
+
+static void mhi_net_ul_callback(struct mhi_device *mhi_dev,
+                               struct mhi_result *mhi_res)
+{
+       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+       struct net_device *ndev = mhi_netdev->ndev;
+       struct mhi_device *mdev = mhi_netdev->mdev;
+       struct sk_buff *skb = mhi_res->buf_addr;
+
+       /* Hardware has consumed the buffer, so free the skb (which is not
+        * freed by the MHI stack) and perform accounting.
+        */
+       dev_consume_skb_any(skb);
+
+       u64_stats_update_begin(&mhi_netdev->stats.tx_syncp);
+       if (unlikely(mhi_res->transaction_status)) {
+               /* MHI layer stopping/resetting the UL channel */
+               if (mhi_res->transaction_status == -ENOTCONN) {
+                       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+                       return;
+               }
+
+               u64_stats_inc(&mhi_netdev->stats.tx_errors);
+       } else {
+               u64_stats_inc(&mhi_netdev->stats.tx_packets);
+               u64_stats_add(&mhi_netdev->stats.tx_bytes, mhi_res->bytes_xferd);
+       }
+       u64_stats_update_end(&mhi_netdev->stats.tx_syncp);
+
+       if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mdev, DMA_TO_DEVICE))
+               netif_wake_queue(ndev);
+}
+
+static void mhi_net_rx_refill_work(struct work_struct *work)
+{
+       struct mhi_net_dev *mhi_netdev = container_of(work, struct mhi_net_dev,
+                                                     rx_refill.work);
+       struct net_device *ndev = mhi_netdev->ndev;
+       struct mhi_device *mdev = mhi_netdev->mdev;
+       struct sk_buff *skb;
+       unsigned int size;
+       int err;
+
+       size = mhi_netdev->mru ? mhi_netdev->mru : READ_ONCE(ndev->mtu);
+
+       while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+               skb = netdev_alloc_skb(ndev, size);
+               if (unlikely(!skb))
+                       break;
+
+               err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, size, MHI_EOT);
+               if (unlikely(err)) {
+                       net_err_ratelimited("%s: Failed to queue RX buf (%d)\n",
+                                           ndev->name, err);
+                       kfree_skb(skb);
+                       break;
+               }
+
+               /* Do not hog the CPU if rx buffers are consumed faster than
+                * queued (unlikely).
+                */
+               cond_resched();
+       }
+
+       /* If we're still starved of rx buffers, reschedule later */
+       if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mhi_netdev->rx_queue_sz)
+               schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
+}
+
+static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev;
+       int err;
+
+       mhi_netdev = netdev_priv(ndev);
+
+       dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
+       mhi_netdev->ndev = ndev;
+       mhi_netdev->mdev = mhi_dev;
+       mhi_netdev->skbagg_head = NULL;
+       mhi_netdev->mru = mhi_dev->mhi_cntrl->mru;
+
+       INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
+       u64_stats_init(&mhi_netdev->stats.rx_syncp);
+       u64_stats_init(&mhi_netdev->stats.tx_syncp);
+
+       /* Start MHI channels */
+       err = mhi_prepare_for_transfer(mhi_dev, 0);
+       if (err)
+               goto out_err;
+
+       /* Number of transfer descriptors determines size of the queue */
+       mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       err = register_netdev(ndev);
+       if (err)
+               return err;
+
+       return 0;
+
+out_err:
+       free_netdev(ndev);
+       return err;
+}
+
+static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev)
+{
+       struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
+
+       unregister_netdev(ndev);
+
+       mhi_unprepare_from_transfer(mhi_dev);
+
+       kfree_skb(mhi_netdev->skbagg_head);
+
+       dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static int mhi_net_probe(struct mhi_device *mhi_dev,
+                        const struct mhi_device_id *id)
+{
+       const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
+       struct net_device *ndev;
+       int err;
+
+       ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
+                           NET_NAME_PREDICTABLE, mhi_net_setup);
+       if (!ndev)
+               return -ENOMEM;
+
+       SET_NETDEV_DEV(ndev, &mhi_dev->dev);
+
+       err = mhi_net_newlink(mhi_dev, ndev);
+       if (err) {
+               free_netdev(ndev);
+               return err;
+       }
+
+       return 0;
+}
+
+static void mhi_net_remove(struct mhi_device *mhi_dev)
+{
+       struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
+
+       mhi_net_dellink(mhi_dev, mhi_netdev->ndev);
+}
+
+static const struct mhi_device_info mhi_hwip0 = {
+       .netname = "mhi_hwip%d",
+};
+
+static const struct mhi_device_info mhi_swip0 = {
+       .netname = "mhi_swip%d",
+};
+
+static const struct mhi_device_id mhi_net_id_table[] = {
+       /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */
+       { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 },
+       /* Software data PATH (to modem CPU) */
+       { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 },
+       {}
+};
+MODULE_DEVICE_TABLE(mhi, mhi_net_id_table);
+
+static struct mhi_driver mhi_net_driver = {
+       .probe = mhi_net_probe,
+       .remove = mhi_net_remove,
+       .dl_xfer_cb = mhi_net_dl_callback,
+       .ul_xfer_cb = mhi_net_ul_callback,
+       .id_table = mhi_net_id_table,
+       .driver = {
+               .name = "mhi_net",
+               .owner = THIS_MODULE,
+       },
+};
+
+module_mhi_driver(mhi_net_driver);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("Network over MHI");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c

index ff01e5b..62d033a 100644 (file)
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -183,8 +183,6 @@ new_port_store(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count)
  {
         struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
-       struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
-       struct devlink *devlink;
         unsigned int port_index;
         int ret;
  
@@ -195,12 +193,15 @@ new_port_store(struct device *dev, struct device_attribute *attr,
         if (ret)
                 return ret;
  
-       devlink = priv_to_devlink(nsim_dev);
+       if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+               return -EBUSY;
+
+       if (nsim_bus_dev->in_reload) {
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+               return -EBUSY;
+       }
  
-       mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
-       devlink_reload_disable(devlink);
         ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
-       devlink_reload_enable(devlink);
         mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
         return ret ? ret : count;
  }
@@ -212,8 +213,6 @@ del_port_store(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count)
  {
         struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev);
-       struct nsim_dev *nsim_dev = dev_get_drvdata(dev);
-       struct devlink *devlink;
         unsigned int port_index;
         int ret;
  
@@ -224,12 +223,15 @@ del_port_store(struct device *dev, struct device_attribute *attr,
         if (ret)
                 return ret;
  
-       devlink = priv_to_devlink(nsim_dev);
+       if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+               return -EBUSY;
+
+       if (nsim_bus_dev->in_reload) {
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+               return -EBUSY;
+       }
  
-       mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
-       devlink_reload_disable(devlink);
         ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index);
-       devlink_reload_enable(devlink);
         mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
         return ret ? ret : count;
  }
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c

index d538a39..54313bd 100644 (file)
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -864,16 +864,24 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change,
                                 struct netlink_ext_ack *extack)
  {
         struct nsim_dev *nsim_dev = devlink_priv(devlink);
+       struct nsim_bus_dev *nsim_bus_dev;
+
+       nsim_bus_dev = nsim_dev->nsim_bus_dev;
+       if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock))
+               return -EOPNOTSUPP;
  
         if (nsim_dev->dont_allow_reload) {
                 /* For testing purposes, user set debugfs dont_allow_reload
                  * value to true. So forbid it.
                  */
                 NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes");
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
                 return -EOPNOTSUPP;
         }
+       nsim_bus_dev->in_reload = true;
  
         nsim_dev_reload_destroy(nsim_dev);
+       mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
         return 0;
  }
  
@@ -882,17 +890,26 @@ static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_actio
                               struct netlink_ext_ack *extack)
  {
         struct nsim_dev *nsim_dev = devlink_priv(devlink);
+       struct nsim_bus_dev *nsim_bus_dev;
+       int ret;
+
+       nsim_bus_dev = nsim_dev->nsim_bus_dev;
+       mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock);
+       nsim_bus_dev->in_reload = false;
  
         if (nsim_dev->fail_reload) {
                 /* For testing purposes, user set debugfs fail_reload
                  * value to true. Fail right away.
                  */
                 NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes");
+               mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
                 return -EINVAL;
         }
  
         *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
-       return nsim_dev_reload_create(nsim_dev, extack);
+       ret = nsim_dev_reload_create(nsim_dev, extack);
+       mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock);
+       return ret;
  }
  
  static int nsim_dev_info_get(struct devlink *devlink,
@@ -1432,7 +1449,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
         int err;
  
         devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev),
-                                  nsim_bus_dev->initial_net);
+                                nsim_bus_dev->initial_net, &nsim_bus_dev->dev);
         if (!devlink)
                 return -ENOMEM;
         nsim_dev = devlink_priv(devlink);
@@ -1453,7 +1470,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
         if (err)
                 goto err_devlink_free;
  
-       err = devlink_register(devlink, &nsim_bus_dev->dev);
+       err = devlink_register(devlink);
         if (err)
                 goto err_resources_unregister;
  
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c

index 213d3e5..4300261 100644 (file)
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -1441,7 +1441,7 @@ static u64 nsim_fib_nexthops_res_occ_get(void *priv)
  static void nsim_fib_set_max_all(struct nsim_fib_data *data,
                                  struct devlink *devlink)
  {
-       enum nsim_resource_id res_ids[] = {
+       static const enum nsim_resource_id res_ids[] = {
                 NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
                 NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES,
                 NSIM_RESOURCE_NEXTHOPS,
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h

index 1c20bcb..793c86d 100644 (file)
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -362,6 +362,7 @@ struct nsim_bus_dev {
         struct nsim_vf_config *vfconfigs;
         /* Lock for devlink->reload_enabled in netdevsim module */
         struct mutex nsim_bus_reload_lock;
+       bool in_reload;
         bool init;
  };
  
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c

index 7bf3011..83aea5c 100644 (file)
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -288,7 +288,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
         if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) {
                 if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E ||
                     BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810 ||
-                   BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E)
+                   BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811)
                         val |= BCM54XX_SHD_SCR3_RXCTXC_DIS;
                 else
                         val |= BCM54XX_SHD_SCR3_TRDDAPD;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c

index 4d53886..53bdd67 100644 (file)
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -401,11 +401,11 @@ static int ksz8041_config_aneg(struct phy_device *phydev)
  }
  
  static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
-                                           const u32 ksz_phy_id)
+                                           const bool ksz_8051)
  {
         int ret;
  
-       if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id)
+       if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051)
                 return 0;
  
         ret = phy_read(phydev, MII_BMSR);
@@ -418,7 +418,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
          * the switch does not.
          */
         ret &= BMSR_ERCAP;
-       if (ksz_phy_id == PHY_ID_KSZ8051)
+       if (ksz_8051)
                 return ret;
         else
                 return !ret;
@@ -426,7 +426,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
  
  static int ksz8051_match_phy_device(struct phy_device *phydev)
  {
-       return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051);
+       return ksz8051_ksz8795_match_phy_device(phydev, true);
  }
  
  static int ksz8081_config_init(struct phy_device *phydev)
@@ -535,7 +535,7 @@ static int ksz8061_config_init(struct phy_device *phydev)
  
  static int ksz8795_match_phy_device(struct phy_device *phydev)
  {
-       return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX);
+       return ksz8051_ksz8795_match_phy_device(phydev, false);
  }
  
  static int ksz9021_load_values_from_of(struct phy_device *phydev,
diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c

index 924ed5b..edb9516 100644 (file)
--- a/drivers/net/phy/mscc/mscc_ptp.c
+++ b/drivers/net/phy/mscc/mscc_ptp.c
@@ -506,7 +506,7 @@ static int vsc85xx_ptp_cmp_init(struct phy_device *phydev, enum ts_blk blk)
  {
         struct vsc8531_private *vsc8531 = phydev->priv;
         bool base = phydev->mdio.addr == vsc8531->ts_base_addr;
-       u8 msgs[] = {
+       static const u8 msgs[] = {
                 PTP_MSGTYPE_SYNC,
                 PTP_MSGTYPE_DELAY_REQ
         };
@@ -847,7 +847,7 @@ static int vsc85xx_ts_ptp_action_flow(struct phy_device *phydev, enum ts_blk blk
  static int vsc85xx_ptp_conf(struct phy_device *phydev, enum ts_blk blk,
                             bool one_step, bool enable)
  {
-       u8 msgs[] = {
+       static const u8 msgs[] = {
                 PTP_MSGTYPE_SYNC,
                 PTP_MSGTYPE_DELAY_REQ
         };
@@ -1268,8 +1268,8 @@ static void vsc8584_set_input_clk_configured(struct phy_device *phydev)
  static int __vsc8584_init_ptp(struct phy_device *phydev)
  {
         struct vsc8531_private *vsc8531 = phydev->priv;
-       u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
-       u8  ltc_seq_a[] = { 8, 6, 5, 4, 2 };
+       static const u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 };
+       static const u8  ltc_seq_a[] = { 8, 6, 5, 4, 2 };
         u32 val;
  
         if (!vsc8584_is_1588_input_clk_configured(phydev)) {
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c

index 207e59e..06e2181 100644 (file)
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -443,7 +443,7 @@ static int ipheth_probe(struct usb_interface *intf,
  
         netdev->netdev_ops = &ipheth_netdev_ops;
         netdev->watchdog_timeo = IPHETH_TX_TIMEOUT;
-       strcpy(netdev->name, "eth%d");
+       strscpy(netdev->name, "eth%d", sizeof(netdev->name));
  
         dev = netdev_priv(netdev);
         dev->udev = udev;
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c

index 13f8636..4e8d3c2 100644 (file)
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1154,7 +1154,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
  {
         struct phy_device *phydev = dev->net->phydev;
         struct ethtool_link_ksettings ecmd;
-       int ladv, radv, ret;
+       int ladv, radv, ret, link;
         u32 buf;
  
         /* clear LAN78xx interrupt status */
@@ -1162,9 +1162,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
         if (unlikely(ret < 0))
                 return -EIO;
  
+       mutex_lock(&phydev->lock);
         phy_read_status(phydev);
+       link = phydev->link;
+       mutex_unlock(&phydev->lock);
  
-       if (!phydev->link && dev->link_on) {
+       if (!link && dev->link_on) {
                 dev->link_on = false;
  
                 /* reset MAC */
@@ -1177,7 +1180,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                         return -EIO;
  
                 del_timer(&dev->stat_monitor);
-       } else if (phydev->link && !dev->link_on) {
+       } else if (link && !dev->link_on) {
                 dev->link_on = true;
  
                 phy_ethtool_ksettings_get(phydev, &ecmd);
@@ -1466,9 +1469,14 @@ static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata)
  
  static u32 lan78xx_get_link(struct net_device *net)
  {
+       u32 link;
+
+       mutex_lock(&net->phydev->lock);
         phy_read_status(net->phydev);
+       link = net->phydev->link;
+       mutex_unlock(&net->phydev->lock);
  
-       return net->phydev->link;
+       return link;
  }
  
  static void lan78xx_get_drvinfo(struct net_device *net,
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c

index 0475ef0..36dafcb 100644 (file)
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -1,31 +1,7 @@
  // SPDX-License-Identifier: GPL-2.0-only
  /*
- *  Copyright (c) 1999-2013 Petko Manolov (petkan@nucleusys.com)
+ *  Copyright (c) 1999-2021 Petko Manolov (petkan@nucleusys.com)
   *
- *     ChangeLog:
- *             ....    Most of the time spent on reading sources & docs.
- *             v0.2.x  First official release for the Linux kernel.
- *             v0.3.0  Beutified and structured, some bugs fixed.
- *             v0.3.x  URBifying bulk requests and bugfixing. First relatively
- *                     stable release. Still can touch device's registers only
- *                     from top-halves.
- *             v0.4.0  Control messages remained unurbified are now URBs.
- *                     Now we can touch the HW at any time.
- *             v0.4.9  Control urbs again use process context to wait. Argh...
- *                     Some long standing bugs (enable_net_traffic) fixed.
- *                     Also nasty trick about resubmiting control urb from
- *                     interrupt context used. Please let me know how it
- *                     behaves. Pegasus II support added since this version.
- *                     TODO: suppressing HCD warnings spewage on disconnect.
- *             v0.4.13 Ethernet address is now set at probe(), not at open()
- *                     time as this seems to break dhcpd.
- *             v0.5.0  branch to 2.5.x kernels
- *             v0.5.1  ethtool support added
- *             v0.5.5  rx socket buffers are in a pool and the their allocation
- *                     is out of the interrupt routine.
- *             ...
- *             v0.9.3  simplified [get|set]_register(s), async update registers
- *                     logic revisited, receive skb_pool removed.
   */
  
  #include <linux/sched.h>
@@ -45,7 +21,6 @@
  /*
   * Version Information
   */
-#define DRIVER_VERSION "v0.9.3 (2013/04/25)"
  #define DRIVER_AUTHOR "Petko Manolov <petkan@nucleusys.com>"
  #define DRIVER_DESC "Pegasus/Pegasus II USB Ethernet driver"
  
@@ -132,9 +107,15 @@ static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data)
  static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size,
                          const void *data)
  {
-       return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS,
+       int ret;
+
+       ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS,
                                     PEGASUS_REQT_WRITE, 0, indx, data, size,
                                     1000, GFP_NOIO);
+       if (ret < 0)
+               netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret);
+
+       return ret;
  }
  
  /*
@@ -145,10 +126,15 @@ static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size,
  static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data)
  {
         void *buf = &data;
+       int ret;
  
-       return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG,
+       ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG,
                                     PEGASUS_REQT_WRITE, data, indx, buf, 1,
                                     1000, GFP_NOIO);
+       if (ret < 0)
+               netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret);
+
+       return ret;
  }
  
  static int update_eth_regs_async(pegasus_t *pegasus)
@@ -188,10 +174,9 @@ static int update_eth_regs_async(pegasus_t *pegasus)
  
  static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd)
  {
-       int i;
-       __u8 data[4] = { phy, 0, 0, indx };
+       int i, ret;
         __le16 regdi;
-       int ret = -ETIMEDOUT;
+       __u8 data[4] = { phy, 0, 0, indx };
  
         if (cmd & PHY_WRITE) {
                 __le16 *t = (__le16 *) & data[1];
@@ -207,12 +192,15 @@ static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd)
                 if (data[0] & PHY_DONE)
                         break;
         }
-       if (i >= REG_TIMEOUT)
+       if (i >= REG_TIMEOUT) {
+               ret = -ETIMEDOUT;
                 goto fail;
+       }
         if (cmd & PHY_READ) {
                 ret = get_registers(p, PhyData, 2, &regdi);
+               if (ret < 0)
+                       goto fail;
                 *regd = le16_to_cpu(regdi);
-               return ret;
         }
         return 0;
  fail:
@@ -235,9 +223,13 @@ static int write_mii_word(pegasus_t *pegasus, __u8 phy, __u8 indx, __u16 *regd)
  static int mdio_read(struct net_device *dev, int phy_id, int loc)
  {
         pegasus_t *pegasus = netdev_priv(dev);
+       int ret;
         u16 res;
  
-       read_mii_word(pegasus, phy_id, loc, &res);
+       ret = read_mii_word(pegasus, phy_id, loc, &res);
+       if (ret < 0)
+               return ret;
+
         return (int)res;
  }
  
@@ -251,10 +243,9 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val)
  
  static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata)
  {
-       int i;
-       __u8 tmp = 0;
+       int ret, i;
         __le16 retdatai;
-       int ret;
+       __u8 tmp = 0;
  
         set_register(pegasus, EpromCtrl, 0);
         set_register(pegasus, EpromOffset, index);
@@ -262,21 +253,25 @@ static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata)
  
         for (i = 0; i < REG_TIMEOUT; i++) {
                 ret = get_registers(pegasus, EpromCtrl, 1, &tmp);
+               if (ret < 0)
+                       goto fail;
                 if (tmp & EPROM_DONE)
                         break;
-               if (ret == -ESHUTDOWN)
-                       goto fail;
         }
-       if (i >= REG_TIMEOUT)
+       if (i >= REG_TIMEOUT) {
+               ret = -ETIMEDOUT;
                 goto fail;
+       }
  
         ret = get_registers(pegasus, EpromData, 2, &retdatai);
+       if (ret < 0)
+               goto fail;
         *retdata = le16_to_cpu(retdatai);
         return ret;
  
  fail:
-       netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__);
-       return -ETIMEDOUT;
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+       return ret;
  }
  
  #ifdef PEGASUS_WRITE_EEPROM
@@ -324,10 +319,10 @@ static int write_eprom_word(pegasus_t *pegasus, __u8 index, __u16 data)
         return ret;
  
  fail:
-       netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
         return -ETIMEDOUT;
  }
-#endif                         /* PEGASUS_WRITE_EEPROM */
+#endif /* PEGASUS_WRITE_EEPROM */
  
  static inline int get_node_id(pegasus_t *pegasus, u8 *id)
  {
@@ -367,19 +362,21 @@ static void set_ethernet_addr(pegasus_t *pegasus)
         return;
  err:
         eth_hw_addr_random(pegasus->net);
-       dev_info(&pegasus->intf->dev, "software assigned MAC address.\n");
+       netif_dbg(pegasus, drv, pegasus->net, "software assigned MAC address.\n");
  
         return;
  }
  
  static inline int reset_mac(pegasus_t *pegasus)
  {
+       int ret, i;
         __u8 data = 0x8;
-       int i;
  
         set_register(pegasus, EthCtrl1, data);
         for (i = 0; i < REG_TIMEOUT; i++) {
-               get_registers(pegasus, EthCtrl1, 1, &data);
+               ret = get_registers(pegasus, EthCtrl1, 1, &data);
+               if (ret < 0)
+                       goto fail;
                 if (~data & 0x08) {
                         if (loopback)
                                 break;
@@ -402,22 +399,29 @@ static inline int reset_mac(pegasus_t *pegasus)
         }
         if (usb_dev_id[pegasus->dev_index].vendor == VENDOR_ELCON) {
                 __u16 auxmode;
-               read_mii_word(pegasus, 3, 0x1b, &auxmode);
+               ret = read_mii_word(pegasus, 3, 0x1b, &auxmode);
+               if (ret < 0)
+                       goto fail;
                 auxmode |= 4;
                 write_mii_word(pegasus, 3, 0x1b, &auxmode);
         }
  
         return 0;
+fail:
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
+       return ret;
  }
  
  static int enable_net_traffic(struct net_device *dev, struct usb_device *usb)
  {
-       __u16 linkpart;
-       __u8 data[4];
         pegasus_t *pegasus = netdev_priv(dev);
         int ret;
+       __u16 linkpart;
+       __u8 data[4];
  
-       read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart);
+       ret = read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart);
+       if (ret < 0)
+               goto fail;
         data[0] = 0xc8; /* TX & RX enable, append status, no CRC */
         data[1] = 0;
         if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL))
@@ -435,11 +439,16 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb)
             usb_dev_id[pegasus->dev_index].vendor == VENDOR_LINKSYS2 ||
             usb_dev_id[pegasus->dev_index].vendor == VENDOR_DLINK) {
                 u16 auxmode;
-               read_mii_word(pegasus, 0, 0x1b, &auxmode);
+               ret = read_mii_word(pegasus, 0, 0x1b, &auxmode);
+               if (ret < 0)
+                       goto fail;
                 auxmode |= 4;
                 write_mii_word(pegasus, 0, 0x1b, &auxmode);
         }
  
+       return 0;
+fail:
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
         return ret;
  }
  
@@ -447,9 +456,9 @@ static void read_bulk_callback(struct urb *urb)
  {
         pegasus_t *pegasus = urb->context;
         struct net_device *net;
+       u8 *buf = urb->transfer_buffer;
         int rx_status, count = urb->actual_length;
         int status = urb->status;
-       u8 *buf = urb->transfer_buffer;
         __u16 pkt_len;
  
         if (!pegasus)
@@ -735,12 +744,16 @@ static inline void disable_net_traffic(pegasus_t *pegasus)
         set_registers(pegasus, EthCtrl0, sizeof(tmp), &tmp);
  }
  
-static inline void get_interrupt_interval(pegasus_t *pegasus)
+static inline int get_interrupt_interval(pegasus_t *pegasus)
  {
         u16 data;
         u8 interval;
+       int ret;
+
+       ret = read_eprom_word(pegasus, 4, &data);
+       if (ret < 0)
+               return ret;
  
-       read_eprom_word(pegasus, 4, &data);
         interval = data >> 8;
         if (pegasus->usb->speed != USB_SPEED_HIGH) {
                 if (interval < 0x80) {
@@ -755,6 +768,8 @@ static inline void get_interrupt_interval(pegasus_t *pegasus)
                 }
         }
         pegasus->intr_interval = interval;
+
+       return 0;
  }
  
  static void set_carrier(struct net_device *net)
@@ -880,7 +895,6 @@ static void pegasus_get_drvinfo(struct net_device *dev,
         pegasus_t *pegasus = netdev_priv(dev);
  
         strlcpy(info->driver, driver_name, sizeof(info->driver));
-       strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
         usb_make_path(pegasus->usb, info->bus_info, sizeof(info->bus_info));
  }
  
@@ -999,8 +1013,7 @@ static int pegasus_siocdevprivate(struct net_device *net, struct ifreq *rq,
                 data[0] = pegasus->phy;
                 fallthrough;
         case SIOCDEVPRIVATE + 1:
-               read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]);
-               res = 0;
+               res = read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]);
                 break;
         case SIOCDEVPRIVATE + 2:
                 if (!capable(CAP_NET_ADMIN))
@@ -1034,22 +1047,25 @@ static void pegasus_set_multicast(struct net_device *net)
  
  static __u8 mii_phy_probe(pegasus_t *pegasus)
  {
-       int i;
+       int i, ret;
         __u16 tmp;
  
         for (i = 0; i < 32; i++) {
-               read_mii_word(pegasus, i, MII_BMSR, &tmp);
+               ret = read_mii_word(pegasus, i, MII_BMSR, &tmp);
+               if (ret < 0)
+                       goto fail;
                 if (tmp == 0 || tmp == 0xffff || (tmp & BMSR_MEDIA) == 0)
                         continue;
                 else
                         return i;
         }
-
+fail:
         return 0xff;
  }
  
  static inline void setup_pegasus_II(pegasus_t *pegasus)
  {
+       int ret;
         __u8 data = 0xa5;
  
         set_register(pegasus, Reg1d, 0);
@@ -1061,7 +1077,9 @@ static inline void setup_pegasus_II(pegasus_t *pegasus)
                 set_register(pegasus, Reg7b, 2);
  
         set_register(pegasus, 0x83, data);
-       get_registers(pegasus, 0x83, 1, &data);
+       ret = get_registers(pegasus, 0x83, 1, &data);
+       if (ret < 0)
+               goto fail;
  
         if (data == 0xa5)
                 pegasus->chip = 0x8513;
@@ -1076,6 +1094,10 @@ static inline void setup_pegasus_II(pegasus_t *pegasus)
                 set_register(pegasus, Reg81, 6);
         else
                 set_register(pegasus, Reg81, 2);
+
+       return;
+fail:
+       netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__);
  }
  
  static void check_carrier(struct work_struct *work)
@@ -1150,7 +1172,9 @@ static int pegasus_probe(struct usb_interface *intf,
                                 | NETIF_MSG_PROBE | NETIF_MSG_LINK);
  
         pegasus->features = usb_dev_id[dev_index].private;
-       get_interrupt_interval(pegasus);
+       res = get_interrupt_interval(pegasus);
+       if (res)
+               goto out2;
         if (reset_mac(pegasus)) {
                 dev_err(&intf->dev, "can't reset MAC\n");
                 res = -EIO;
@@ -1297,7 +1321,7 @@ static void __init parse_id(char *id)
  
  static int __init pegasus_init(void)
  {
-       pr_info("%s: %s, " DRIVER_DESC "\n", driver_name, DRIVER_VERSION);
+       pr_info("%s: " DRIVER_DESC "\n", driver_name);
         if (devid)
                 parse_id(devid);
         return usb_register(&pegasus_driver);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c

index 470e1c1..840c1c2 100644 (file)
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1725,7 +1725,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
         dev->interrupt_count = 0;
  
         dev->net = net;
-       strcpy (net->name, "usb%d");
+       strscpy(net->name, "usb%d", sizeof(net->name));
         memcpy (net->dev_addr, node_id, sizeof node_id);
  
         /* rx and tx sides can use different message sizes;
@@ -1752,13 +1752,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
                 if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
                     ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
                      (net->dev_addr [0] & 0x02) == 0))
-                       strcpy (net->name, "eth%d");
+                       strscpy(net->name, "eth%d", sizeof(net->name));
                 /* WLAN devices should always be named "wlan%d" */
                 if ((dev->driver_info->flags & FLAG_WLAN) != 0)
-                       strcpy(net->name, "wlan%d");
+                       strscpy(net->name, "wlan%d", sizeof(net->name));
                 /* WWAN devices should always be named "wwan%d" */
                 if ((dev->driver_info->flags & FLAG_WWAN) != 0)
-                       strcpy(net->name, "wwan%d");
+                       strscpy(net->name, "wwan%d", sizeof(net->name));
  
                 /* devices that cannot do ARP */
                 if ((dev->driver_info->flags & FLAG_NOARP) != 0)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

index 56c3f85..2e42210 100644 (file)
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -380,7 +380,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
                                    struct page *page, unsigned int offset,
                                    unsigned int len, unsigned int truesize,
                                    bool hdr_valid, unsigned int metasize,
-                                  bool whole_page)
+                                  unsigned int headroom)
  {
         struct sk_buff *skb;
         struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -398,28 +398,16 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
         else
                 hdr_padded_len = sizeof(struct padded_vnet_hdr);
  
-       /* If whole_page, there is an offset between the beginning of the
+       /* If headroom is not 0, there is an offset between the beginning of the
          * data and the allocated space, otherwise the data and the allocated
          * space are aligned.
          *
          * Buffers with headroom use PAGE_SIZE as alloc size, see
          * add_recvbuf_mergeable() + get_mergeable_buf_len()
          */
-       if (whole_page) {
-               /* Buffers with whole_page use PAGE_SIZE as alloc size,
-                * see add_recvbuf_mergeable() + get_mergeable_buf_len()
-                */
-               truesize = PAGE_SIZE;
-
-               /* page maybe head page, so we should get the buf by p, not the
-                * page
-                */
-               tailroom = truesize - len - offset_in_page(p);
-               buf = (char *)((unsigned long)p & PAGE_MASK);
-       } else {
-               tailroom = truesize - len;
-               buf = p;
-       }
+       truesize = headroom ? PAGE_SIZE : truesize;
+       tailroom = truesize - len - headroom;
+       buf = p - headroom;
  
         len -= hdr_len;
         offset += hdr_padded_len;
@@ -978,7 +966,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
                                 put_page(page);
                                 head_skb = page_to_skb(vi, rq, xdp_page, offset,
                                                        len, PAGE_SIZE, false,
-                                                      metasize, true);
+                                                      metasize,
+                                                      VIRTIO_XDP_HEADROOM);
                                 return head_skb;
                         }
                         break;
@@ -1029,7 +1018,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
         rcu_read_unlock();
  
         head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
-                              metasize, !!headroom);
+                              metasize, headroom);
         curr_skb = head_skb;
  
         if (unlikely(!curr_skb))
@@ -2208,14 +2197,14 @@ static int virtnet_set_channels(struct net_device *dev,
         if (vi->rq[0].xdp_prog)
                 return -EINVAL;
  
-       get_online_cpus();
+       cpus_read_lock();
         err = _virtnet_set_queues(vi, queue_pairs);
         if (err) {
-               put_online_cpus();
+               cpus_read_unlock();
                 goto err;
         }
         virtnet_set_affinity(vi);
-       put_online_cpus();
+       cpus_read_unlock();
  
         netif_set_real_num_tx_queues(dev, queue_pairs);
         netif_set_real_num_rx_queues(dev, queue_pairs);
@@ -2970,9 +2959,9 @@ static int init_vqs(struct virtnet_info *vi)
         if (ret)
                 goto err_free;
  
-       get_online_cpus();
+       cpus_read_lock();
         virtnet_set_affinity(vi);
-       put_online_cpus();
+       cpus_read_unlock();
  
         return 0;
  
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c

index 2b1b944..662e261 100644 (file)
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -857,30 +857,24 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
         unsigned int hh_len = LL_RESERVED_SPACE(dev);
         struct neighbour *neigh;
         bool is_v6gw = false;
-       int ret = -EINVAL;
  
         nf_reset_ct(skb);
  
         /* Be paranoid, rather than too clever. */
         if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
-               if (!skb2) {
-                       ret = -ENOMEM;
-                       goto err;
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb) {
+                       dev->stats.tx_errors++;
+                       return -ENOMEM;
                 }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-
-               consume_skb(skb);
-               skb = skb2;
         }
  
         rcu_read_lock_bh();
  
         neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
         if (!IS_ERR(neigh)) {
+               int ret;
+
                 sock_confirm_neigh(skb, neigh);
                 /* if crossing protocols, can not use the cached header */
                 ret = neigh_output(neigh, skb, is_v6gw);
@@ -889,9 +883,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
         }
  
         rcu_read_unlock_bh();
-err:
         vrf_tx_error(skb->dev, skb);
-       return ret;
+       return -EINVAL;
  }
  
  static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig

index 473df25..592a838 100644 (file)
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -290,30 +290,6 @@ config SLIC_DS26522
           To compile this driver as a module, choose M here: the
           module will be called slic_ds26522.
  
-config DSCC4_PCISYNC
-       bool "Etinc PCISYNC features"
-       depends on DSCC4
-       help
-         Due to Etinc's design choice for its PCISYNC cards, some operations
-         are only allowed on specific ports of the DSCC4. This option is the
-         only way for the driver to know that it shouldn't return a success
-         code for these operations.
-
-         Please say Y if your card is an Etinc's PCISYNC.
-
-config DSCC4_PCI_RST
-       bool "Hard reset support"
-       depends on DSCC4
-       help
-         Various DSCC4 bugs forbid any reliable software reset of the ASIC.
-         As a replacement, some vendors provide a way to assert the PCI #RST
-         pin of DSCC4 through the GPIO port of the card. If you choose Y,
-         the driver will make use of this feature before module removal
-         (i.e. rmmod). The feature is known to be available on Commtech's
-         cards. Contact your manufacturer for details.
-
-         Say Y if your card supports this feature.
-
  config IXP4XX_HSS
         tristate "Intel IXP4xx HSS (synchronous serial port) support"
         depends on HDLC && IXP4XX_NPE && IXP4XX_QMGR
@@ -337,33 +313,6 @@ config LAPBETHER
           To compile this driver as a module, choose M here: the
           module will be called lapbether.
  
-         If unsure, say N.
-
-config SBNI
-       tristate "Granch SBNI12 Leased Line adapter support"
-       depends on X86
-       help
-         Driver for ISA SBNI12-xx cards which are low cost alternatives to
-         leased line modems.
-
-         You can find more information and last versions of drivers and
-         utilities at <http://www.granch.ru/>. If you have any question you
-         can send email to <sbni@granch.ru>.
-
-         To compile this driver as a module, choose M here: the
-         module will be called sbni.
-
-         If unsure, say N.
-
-config SBNI_MULTILINE
-       bool "Multiple line feature support"
-       depends on SBNI
-       help
-         Schedule traffic for some parallel lines, via SBNI12 adapters.
-
-         If you have two computers connected with two parallel lines it's
-         possible to increase transfer rate nearly twice. You should have
-         a program named 'sbniconfig' to configure adapters.
  
           If unsure, say N.
  
diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile

index 081666c..f6b92ef 100644 (file)
--- a/drivers/net/wan/Makefile
+++ b/drivers/net/wan/Makefile
@@ -22,7 +22,6 @@ obj-$(CONFIG_FARSYNC)         += farsync.o
  obj-$(CONFIG_LANMEDIA)         += lmc/
  
  obj-$(CONFIG_LAPBETHER)                += lapbether.o
-obj-$(CONFIG_SBNI)             += sbni.o
  obj-$(CONFIG_N2)               += n2.o
  obj-$(CONFIG_C101)             += c101.o
  obj-$(CONFIG_WANXL)            += wanxl.o
diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c

index 15a7543..e985e54 100644 (file)
--- a/drivers/net/wan/hostess_sv11.c
+++ b/drivers/net/wan/hostess_sv11.c
@@ -319,16 +319,18 @@ MODULE_DESCRIPTION("Modular driver for the Comtrol Hostess SV11");
  
  static struct z8530_dev *sv11_unit;
  
-int init_module(void)
+static int sv11_module_init(void)
  {
         sv11_unit = sv11_init(io, irq);
         if (!sv11_unit)
                 return -ENODEV;
         return 0;
  }
+module_init(sv11_module_init);
  
-void cleanup_module(void)
+static void sv11_module_cleanup(void)
  {
         if (sv11_unit)
                 sv11_shutdown(sv11_unit);
  }
+module_exit(sv11_module_cleanup);
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c

deleted file mode 100644 (file)

index 469fe97..0000000
--- a/drivers/net/wan/sbni.c
+++ /dev/null
@@ -1,1639 +0,0 @@
-/* sbni.c:  Granch SBNI12 leased line adapters driver for linux
- *
- *     Written 2001 by Denis I.Timofeev (timofeev@granch.ru)
- *
- *     Previous versions were written by Yaroslav Polyakov,
- *     Alexey Zverev and Max Khon.
- *
- *     Driver supports SBNI12-02,-04,-05,-10,-11 cards, single and
- *     double-channel, PCI and ISA modifications.
- *     More info and useful utilities to work with SBNI12 cards you can find
- *     at http://www.granch.com (English) or http://www.granch.ru (Russian)
- *
- *     This software may be used and distributed according to the terms
- *     of the GNU General Public License.
- *
- *
- *  5.0.1      Jun 22 2001
- *       - Fixed bug in probe
- *  5.0.0      Jun 06 2001
- *       - Driver was completely redesigned by Denis I.Timofeev,
- *       - now PCI/Dual, ISA/Dual (with single interrupt line) models are
- *       - supported
- *  3.3.0      Thu Feb 24 21:30:28 NOVT 2000 
- *        - PCI cards support
- *  3.2.0      Mon Dec 13 22:26:53 NOVT 1999
- *       - Completely rebuilt all the packet storage system
- *       -    to work in Ethernet-like style.
- *  3.1.1      just fixed some bugs (5 aug 1999)
- *  3.1.0      added balancing feature (26 apr 1999)
- *  3.0.1      just fixed some bugs (14 apr 1999).
- *  3.0.0      Initial Revision, Yaroslav Polyakov (24 Feb 1999)
- *        - added pre-calculation for CRC, fixed bug with "len-2" frames, 
- *        - removed outbound fragmentation (MTU=1000), written CRC-calculation 
- *        - on asm, added work with hard_headers and now we have our own cache 
- *        - for them, optionally supported word-interchange on some chipsets,
- * 
- *     Known problem: this driver wasn't tested on multiprocessor machine.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/fcntl.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/pci.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-
-#include <net/net_namespace.h>
-#include <net/arp.h>
-#include <net/Space.h>
-
-#include <asm/io.h>
-#include <asm/types.h>
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-
-#include "sbni.h"
-
-/* device private data */
-
-struct net_local {
-       struct timer_list       watchdog;
-       struct net_device       *watchdog_dev;
-
-       spinlock_t      lock;
-       struct sk_buff  *rx_buf_p;              /* receive buffer ptr */
-       struct sk_buff  *tx_buf_p;              /* transmit buffer ptr */
-       
-       unsigned int    framelen;               /* current frame length */
-       unsigned int    maxframe;               /* maximum valid frame length */
-       unsigned int    state;
-       unsigned int    inppos, outpos;         /* positions in rx/tx buffers */
-
-       /* transmitting frame number - from frames qty to 1 */
-       unsigned int    tx_frameno;
-
-       /* expected number of next receiving frame */
-       unsigned int    wait_frameno;
-
-       /* count of failed attempts to frame send - 32 attempts do before
-          error - while receiver tunes on opposite side of wire */
-       unsigned int    trans_errors;
-
-       /* idle time; send pong when limit exceeded */
-       unsigned int    timer_ticks;
-
-       /* fields used for receive level autoselection */
-       int     delta_rxl;
-       unsigned int    cur_rxl_index, timeout_rxl;
-       unsigned long   cur_rxl_rcvd, prev_rxl_rcvd;
-
-       struct sbni_csr1        csr1;           /* current value of CSR1 */
-       struct sbni_in_stats    in_stats;       /* internal statistics */ 
-
-       struct net_device               *second;        /* for ISA/dual cards */
-
-#ifdef CONFIG_SBNI_MULTILINE
-       struct net_device               *master;
-       struct net_device               *link;
-#endif
-};
-
-
-static int  sbni_card_probe( unsigned long );
-static int  sbni_pci_probe( struct net_device  * );
-static struct net_device  *sbni_probe1(struct net_device *, unsigned long, int);
-static int  sbni_open( struct net_device * );
-static int  sbni_close( struct net_device * );
-static netdev_tx_t sbni_start_xmit(struct sk_buff *,
-                                        struct net_device * );
-static int  sbni_siocdevprivate(struct net_device *, struct ifreq *,
-                               void __user *, int);
-static void  set_multicast_list( struct net_device * );
-
-static irqreturn_t sbni_interrupt( int, void * );
-static void  handle_channel( struct net_device * );
-static int   recv_frame( struct net_device * );
-static void  send_frame( struct net_device * );
-static int   upload_data( struct net_device *,
-                         unsigned, unsigned, unsigned, u32 );
-static void  download_data( struct net_device *, u32 * );
-static void  sbni_watchdog(struct timer_list *);
-static void  interpret_ack( struct net_device *, unsigned );
-static int   append_frame_to_pkt( struct net_device *, unsigned, u32 );
-static void  indicate_pkt( struct net_device * );
-static void  card_start( struct net_device * );
-static void  prepare_to_send( struct sk_buff *, struct net_device * );
-static void  drop_xmit_queue( struct net_device * );
-static void  send_frame_header( struct net_device *, u32 * );
-static int   skip_tail( unsigned int, unsigned int, u32 );
-static int   check_fhdr( u32, u32 *, u32 *, u32 *, u32 *, u32 * );
-static void  change_level( struct net_device * );
-static void  timeout_change_level( struct net_device * );
-static u32   calc_crc32( u32, u8 *, u32 );
-static struct sk_buff *  get_rx_buf( struct net_device * );
-static int  sbni_init( struct net_device * );
-
-#ifdef CONFIG_SBNI_MULTILINE
-static int  enslave( struct net_device *, struct net_device * );
-static int  emancipate( struct net_device * );
-#endif
-
-static const char  version[] =
-       "Granch SBNI12 driver ver 5.0.1  Jun 22 2001  Denis I.Timofeev.\n";
-
-static bool skip_pci_probe     __initdata = false;
-static int  scandone   __initdata = 0;
-static int  num                __initdata = 0;
-
-static unsigned char  rxl_tab[];
-static u32  crc32tab[];
-
-/* A list of all installed devices, for removing the driver module. */
-static struct net_device  *sbni_cards[ SBNI_MAX_NUM_CARDS ];
-
-/* Lists of device's parameters */
-static u32     io[   SBNI_MAX_NUM_CARDS ] __initdata =
-       { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32     irq[  SBNI_MAX_NUM_CARDS ] __initdata;
-static u32     baud[ SBNI_MAX_NUM_CARDS ] __initdata;
-static u32     rxl[  SBNI_MAX_NUM_CARDS ] __initdata =
-       { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 };
-static u32     mac[  SBNI_MAX_NUM_CARDS ] __initdata;
-
-#ifndef MODULE
-typedef u32  iarr[];
-static iarr *dest[5] __initdata = { &io, &irq, &baud, &rxl, &mac };
-#endif
-
-/* A zero-terminated list of I/O addresses to be probed on ISA bus */
-static unsigned int  netcard_portlist[ ] __initdata = { 
-       0x210, 0x214, 0x220, 0x224, 0x230, 0x234, 0x240, 0x244, 0x250, 0x254,
-       0x260, 0x264, 0x270, 0x274, 0x280, 0x284, 0x290, 0x294, 0x2a0, 0x2a4,
-       0x2b0, 0x2b4, 0x2c0, 0x2c4, 0x2d0, 0x2d4, 0x2e0, 0x2e4, 0x2f0, 0x2f4,
-       0 };
-
-#define NET_LOCAL_LOCK(dev) (((struct net_local *)netdev_priv(dev))->lock)
-
-/*
- * Look for SBNI card which addr stored in dev->base_addr, if nonzero.
- * Otherwise, look through PCI bus. If none PCI-card was found, scan ISA.
- */
-
-static inline int __init
-sbni_isa_probe( struct net_device  *dev )
-{
-       if( dev->base_addr > 0x1ff &&
-           request_region( dev->base_addr, SBNI_IO_EXTENT, dev->name ) &&
-           sbni_probe1( dev, dev->base_addr, dev->irq ) )
-
-               return  0;
-       else {
-               pr_err("base address 0x%lx is busy, or adapter is malfunctional!\n",
-                      dev->base_addr);
-               return  -ENODEV;
-       }
-}
-
-static const struct net_device_ops sbni_netdev_ops = {
-       .ndo_open               = sbni_open,
-       .ndo_stop               = sbni_close,
-       .ndo_start_xmit         = sbni_start_xmit,
-       .ndo_set_rx_mode        = set_multicast_list,
-       .ndo_siocdevprivate     = sbni_siocdevprivate,
-       .ndo_set_mac_address    = eth_mac_addr,
-       .ndo_validate_addr      = eth_validate_addr,
-};
-
-static void __init sbni_devsetup(struct net_device *dev)
-{
-       ether_setup( dev );
-       dev->netdev_ops = &sbni_netdev_ops;
-}
-
-int __init sbni_probe(int unit)
-{
-       struct net_device *dev;
-       int err;
-
-       dev = alloc_netdev(sizeof(struct net_local), "sbni",
-                          NET_NAME_UNKNOWN, sbni_devsetup);
-       if (!dev)
-               return -ENOMEM;
-
-       dev->netdev_ops = &sbni_netdev_ops;
-
-       sprintf(dev->name, "sbni%d", unit);
-       netdev_boot_setup_check(dev);
-
-       err = sbni_init(dev);
-       if (err) {
-               free_netdev(dev);
-               return err;
-       }
-
-       err = register_netdev(dev);
-       if (err) {
-               release_region( dev->base_addr, SBNI_IO_EXTENT );
-               free_netdev(dev);
-               return err;
-       }
-       pr_info_once("%s", version);
-       return 0;
-}
-
-static int __init sbni_init(struct net_device *dev)
-{
-       int  i;
-       if( dev->base_addr )
-               return  sbni_isa_probe( dev );
-       /* otherwise we have to perform search our adapter */
-
-       if( io[ num ] != -1 ) {
-               dev->base_addr  = io[ num ];
-               dev->irq        = irq[ num ];
-       } else if( scandone  ||  io[ 0 ] != -1 ) {
-               return  -ENODEV;
-       }
-
-       /* if io[ num ] contains non-zero address, then that is on ISA bus */
-       if( dev->base_addr )
-               return  sbni_isa_probe( dev );
-
-       /* ...otherwise - scan PCI first */
-       if( !skip_pci_probe  &&  !sbni_pci_probe( dev ) )
-               return  0;
-
-       if( io[ num ] == -1 ) {
-               /* Auto-scan will be stopped when first ISA card were found */
-               scandone = 1;
-               if( num > 0 )
-                       return  -ENODEV;
-       }
-
-       for( i = 0;  netcard_portlist[ i ];  ++i ) {
-               int  ioaddr = netcard_portlist[ i ];
-               if( request_region( ioaddr, SBNI_IO_EXTENT, dev->name ) &&
-                   sbni_probe1( dev, ioaddr, 0 ))
-                       return 0;
-       }
-
-       return  -ENODEV;
-}
-
-
-static int __init
-sbni_pci_probe( struct net_device  *dev )
-{
-       struct pci_dev  *pdev = NULL;
-
-       while( (pdev = pci_get_class( PCI_CLASS_NETWORK_OTHER << 8, pdev ))
-              != NULL ) {
-               int  pci_irq_line;
-               unsigned long  pci_ioaddr;
-
-               if( pdev->vendor != SBNI_PCI_VENDOR &&
-                   pdev->device != SBNI_PCI_DEVICE )
-                       continue;
-
-               pci_ioaddr = pci_resource_start( pdev, 0 );
-               pci_irq_line = pdev->irq;
-
-               /* Avoid already found cards from previous calls */
-               if( !request_region( pci_ioaddr, SBNI_IO_EXTENT, dev->name ) ) {
-                       if (pdev->subsystem_device != 2)
-                               continue;
-
-                       /* Dual adapter is present */
-                       if (!request_region(pci_ioaddr += 4, SBNI_IO_EXTENT,
-                                                       dev->name ) )
-                               continue;
-               }
-
-               if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs)
-                       pr_warn(
-"WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n"
-"You should use the PCI BIOS setup to assign a valid IRQ line.\n",
-                               pci_irq_line );
-
-               /* avoiding re-enable dual adapters */
-               if( (pci_ioaddr & 7) == 0  &&  pci_enable_device( pdev ) ) {
-                       release_region( pci_ioaddr, SBNI_IO_EXTENT );
-                       pci_dev_put( pdev );
-                       return  -EIO;
-               }
-               if( sbni_probe1( dev, pci_ioaddr, pci_irq_line ) ) {
-                       SET_NETDEV_DEV(dev, &pdev->dev);
-                       /* not the best thing to do, but this is all messed up 
-                          for hotplug systems anyway... */
-                       pci_dev_put( pdev );
-                       return  0;
-               }
-       }
-       return  -ENODEV;
-}
-
-
-static struct net_device * __init
-sbni_probe1( struct net_device  *dev,  unsigned long  ioaddr,  int  irq )
-{
-       struct net_local  *nl;
-
-       if( sbni_card_probe( ioaddr ) ) {
-               release_region( ioaddr, SBNI_IO_EXTENT );
-               return NULL;
-       }
-
-       outb( 0, ioaddr + CSR0 );
-
-       if( irq < 2 ) {
-               unsigned long irq_mask;
-
-               irq_mask = probe_irq_on();
-               outb( EN_INT | TR_REQ, ioaddr + CSR0 );
-               outb( PR_RES, ioaddr + CSR1 );
-               mdelay(50);
-               irq = probe_irq_off(irq_mask);
-               outb( 0, ioaddr + CSR0 );
-
-               if( !irq ) {
-                       pr_err("%s: can't detect device irq!\n", dev->name);
-                       release_region( ioaddr, SBNI_IO_EXTENT );
-                       return NULL;
-               }
-       } else if( irq == 2 )
-               irq = 9;
-
-       dev->irq = irq;
-       dev->base_addr = ioaddr;
-
-       /* Fill in sbni-specific dev fields. */
-       nl = netdev_priv(dev);
-       if( !nl ) {
-               pr_err("%s: unable to get memory!\n", dev->name);
-               release_region( ioaddr, SBNI_IO_EXTENT );
-               return NULL;
-       }
-
-       memset( nl, 0, sizeof(struct net_local) );
-       spin_lock_init( &nl->lock );
-
-       /* store MAC address (generate if that isn't known) */
-       *(__be16 *)dev->dev_addr = htons( 0x00ff );
-       *(__be32 *)(dev->dev_addr + 2) = htonl( 0x01000000 |
-               ((mac[num] ?
-               mac[num] :
-               (u32)((long)netdev_priv(dev))) & 0x00ffffff));
-
-       /* store link settings (speed, receive level ) */
-       nl->maxframe  = DEFAULT_FRAME_LEN;
-       nl->csr1.rate = baud[ num ];
-
-       if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) {
-               /* autotune rxl */
-               nl->cur_rxl_index = DEF_RXL;
-               nl->delta_rxl = DEF_RXL_DELTA;
-       } else {
-               nl->delta_rxl = 0;
-       }
-       nl->csr1.rxl  = rxl_tab[ nl->cur_rxl_index ];
-       if( inb( ioaddr + CSR0 ) & 0x01 )
-               nl->state |= FL_SLOW_MODE;
-
-       pr_notice("%s: ioaddr %#lx, irq %d, MAC: 00:ff:01:%02x:%02x:%02x\n",
-                 dev->name, dev->base_addr, dev->irq,
-                 ((u8 *)dev->dev_addr)[3],
-                 ((u8 *)dev->dev_addr)[4],
-                 ((u8 *)dev->dev_addr)[5]);
-
-       pr_notice("%s: speed %d",
-                 dev->name,
-                 ((nl->state & FL_SLOW_MODE) ? 500000 : 2000000)
-                 / (1 << nl->csr1.rate));
-
-       if( nl->delta_rxl == 0 )
-               pr_cont(", receive level 0x%x (fixed)\n", nl->cur_rxl_index);
-       else
-               pr_cont(", receive level (auto)\n");
-
-#ifdef CONFIG_SBNI_MULTILINE
-       nl->master = dev;
-       nl->link   = NULL;
-#endif
-   
-       sbni_cards[ num++ ] = dev;
-       return  dev;
-}
-
-/* -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff  *skb,  struct net_device  *dev )
-{
-       struct net_device  *p;
-
-       netif_stop_queue( dev );
-
-       /* Looking for idle device in the list */
-       for( p = dev;  p; ) {
-               struct net_local  *nl = netdev_priv(p);
-               spin_lock( &nl->lock );
-               if( nl->tx_buf_p  ||  (nl->state & FL_LINE_DOWN) ) {
-                       p = nl->link;
-                       spin_unlock( &nl->lock );
-               } else {
-                       /* Idle dev is found */
-                       prepare_to_send( skb, p );
-                       spin_unlock( &nl->lock );
-                       netif_start_queue( dev );
-                       return NETDEV_TX_OK;
-               }
-       }
-
-       return NETDEV_TX_BUSY;
-}
-
-#else  /* CONFIG_SBNI_MULTILINE */
-
-static netdev_tx_t
-sbni_start_xmit( struct sk_buff  *skb,  struct net_device  *dev )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-
-       netif_stop_queue( dev );
-       spin_lock( &nl->lock );
-
-       prepare_to_send( skb, dev );
-
-       spin_unlock( &nl->lock );
-       return NETDEV_TX_OK;
-}
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
-/* -------------------------------------------------------------------------- */
-
-/* interrupt handler */
-
-/*
- *     SBNI12D-10, -11/ISA boards within "common interrupt" mode could not
- * be looked as two independent single-channel devices. Every channel seems
- * as Ethernet interface but interrupt handler must be common. Really, first
- * channel ("master") driver only registers the handler. In its struct net_local
- * it has got pointer to "slave" channel's struct net_local and handles that's
- * interrupts too.
- *     dev of successfully attached ISA SBNI boards is linked to list.
- * While next board driver is initialized, it scans this list. If one
- * has found dev with same irq and ioaddr different by 4 then it assumes
- * this board to be "master".
- */ 
-
-static irqreturn_t
-sbni_interrupt( int  irq,  void  *dev_id )
-{
-       struct net_device         *dev = dev_id;
-       struct net_local  *nl  = netdev_priv(dev);
-       int     repeat;
-
-       spin_lock( &nl->lock );
-       if( nl->second )
-               spin_lock(&NET_LOCAL_LOCK(nl->second));
-
-       do {
-               repeat = 0;
-               if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) {
-                       handle_channel( dev );
-                       repeat = 1;
-               }
-               if( nl->second  &&      /* second channel present */
-                   (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) {
-                       handle_channel( nl->second );
-                       repeat = 1;
-               }
-       } while( repeat );
-
-       if( nl->second )
-               spin_unlock(&NET_LOCAL_LOCK(nl->second));
-       spin_unlock( &nl->lock );
-       return IRQ_HANDLED;
-}
-
-
-static void
-handle_channel( struct net_device  *dev )
-{
-       struct net_local        *nl    = netdev_priv(dev);
-       unsigned long           ioaddr = dev->base_addr;
-
-       int  req_ans;
-       unsigned char  csr0;
-
-#ifdef CONFIG_SBNI_MULTILINE
-       /* Lock the master device because we going to change its local data */
-       if( nl->state & FL_SLAVE )
-               spin_lock(&NET_LOCAL_LOCK(nl->master));
-#endif
-
-       outb( (inb( ioaddr + CSR0 ) & ~EN_INT) | TR_REQ, ioaddr + CSR0 );
-
-       nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
-       for(;;) {
-               csr0 = inb( ioaddr + CSR0 );
-               if( ( csr0 & (RC_RDY | TR_RDY) ) == 0 )
-                       break;
-
-               req_ans = !(nl->state & FL_PREV_OK);
-
-               if( csr0 & RC_RDY )
-                       req_ans = recv_frame( dev );
-
-               /*
-                * TR_RDY always equals 1 here because we have owned the marker,
-                * and we set TR_REQ when disabled interrupts
-                */
-               csr0 = inb( ioaddr + CSR0 );
-               if( !(csr0 & TR_RDY)  ||  (csr0 & RC_RDY) )
-                       netdev_err(dev, "internal error!\n");
-
-               /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */
-               if( req_ans  ||  nl->tx_frameno != 0 )
-                       send_frame( dev );
-               else
-                       /* send marker without any data */
-                       outb( inb( ioaddr + CSR0 ) & ~TR_REQ, ioaddr + CSR0 );
-       }
-
-       outb( inb( ioaddr + CSR0 ) | EN_INT, ioaddr + CSR0 );
-
-#ifdef CONFIG_SBNI_MULTILINE
-       if( nl->state & FL_SLAVE )
-               spin_unlock(&NET_LOCAL_LOCK(nl->master));
-#endif
-}
-
-
-/*
- * Routine returns 1 if it needs to acknowledge received frame.
- * Empty frame received without errors won't be acknowledged.
- */
-
-static int
-recv_frame( struct net_device  *dev )
-{
-       struct net_local  *nl   = netdev_priv(dev);
-       unsigned long  ioaddr   = dev->base_addr;
-
-       u32  crc = CRC32_INITIAL;
-
-       unsigned  framelen = 0, frameno, ack;
-       unsigned  is_first, frame_ok = 0;
-
-       if( check_fhdr( ioaddr, &framelen, &frameno, &ack, &is_first, &crc ) ) {
-               frame_ok = framelen > 4
-                       ?  upload_data( dev, framelen, frameno, is_first, crc )
-                       :  skip_tail( ioaddr, framelen, crc );
-               if( frame_ok )
-                       interpret_ack( dev, ack );
-       }
-
-       outb( inb( ioaddr + CSR0 ) ^ CT_ZER, ioaddr + CSR0 );
-       if( frame_ok ) {
-               nl->state |= FL_PREV_OK;
-               if( framelen > 4 )
-                       nl->in_stats.all_rx_number++;
-       } else {
-               nl->state &= ~FL_PREV_OK;
-               change_level( dev );
-               nl->in_stats.all_rx_number++;
-               nl->in_stats.bad_rx_number++;
-       }
-
-       return  !frame_ok  ||  framelen > 4;
-}
-
-
-static void
-send_frame( struct net_device  *dev )
-{
-       struct net_local  *nl    = netdev_priv(dev);
-
-       u32  crc = CRC32_INITIAL;
-
-       if( nl->state & FL_NEED_RESEND ) {
-
-               /* if frame was sended but not ACK'ed - resend it */
-               if( nl->trans_errors ) {
-                       --nl->trans_errors;
-                       if( nl->framelen != 0 )
-                               nl->in_stats.resend_tx_number++;
-               } else {
-                       /* cannot xmit with many attempts */
-#ifdef CONFIG_SBNI_MULTILINE
-                       if( (nl->state & FL_SLAVE)  ||  nl->link )
-#endif
-                       nl->state |= FL_LINE_DOWN;
-                       drop_xmit_queue( dev );
-                       goto  do_send;
-               }
-       } else
-               nl->trans_errors = TR_ERROR_COUNT;
-
-       send_frame_header( dev, &crc );
-       nl->state |= FL_NEED_RESEND;
-       /*
-        * FL_NEED_RESEND will be cleared after ACK, but if empty
-        * frame sended then in prepare_to_send next frame
-        */
-
-
-       if( nl->framelen ) {
-               download_data( dev, &crc );
-               nl->in_stats.all_tx_number++;
-               nl->state |= FL_WAIT_ACK;
-       }
-
-       outsb( dev->base_addr + DAT, (u8 *)&crc, sizeof crc );
-
-do_send:
-       outb( inb( dev->base_addr + CSR0 ) & ~TR_REQ, dev->base_addr + CSR0 );
-
-       if( nl->tx_frameno )
-               /* next frame exists - we request card to send it */
-               outb( inb( dev->base_addr + CSR0 ) | TR_REQ,
-                     dev->base_addr + CSR0 );
-}
-
-
-/*
- * Write the frame data into adapter's buffer memory, and calculate CRC.
- * Do padding if necessary.
- */
-
-static void
-download_data( struct net_device  *dev,  u32  *crc_p )
-{
-       struct net_local  *nl    = netdev_priv(dev);
-       struct sk_buff    *skb   = nl->tx_buf_p;
-
-       unsigned  len = min_t(unsigned int, skb->len - nl->outpos, nl->framelen);
-
-       outsb( dev->base_addr + DAT, skb->data + nl->outpos, len );
-       *crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len );
-
-       /* if packet too short we should write some more bytes to pad */
-       for( len = nl->framelen - len;  len--; ) {
-               outb( 0, dev->base_addr + DAT );
-               *crc_p = CRC32( 0, *crc_p );
-       }
-}
-
-
-static int
-upload_data( struct net_device  *dev,  unsigned  framelen,  unsigned  frameno,
-            unsigned  is_first,  u32  crc )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       int  frame_ok;
-
-       if( is_first ) {
-               nl->wait_frameno = frameno;
-               nl->inppos = 0;
-       }
-
-       if( nl->wait_frameno == frameno ) {
-
-               if( nl->inppos + framelen  <=  ETHER_MAX_LEN )
-                       frame_ok = append_frame_to_pkt( dev, framelen, crc );
-
-               /*
-                * if CRC is right but framelen incorrect then transmitter
-                * error was occurred... drop entire packet
-                */
-               else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc ))
-                        != 0 ) {
-                       nl->wait_frameno = 0;
-                       nl->inppos = 0;
-#ifdef CONFIG_SBNI_MULTILINE
-                       nl->master->stats.rx_errors++;
-                       nl->master->stats.rx_missed_errors++;
-#else
-                       dev->stats.rx_errors++;
-                       dev->stats.rx_missed_errors++;
-#endif
-               }
-                       /* now skip all frames until is_first != 0 */
-       } else
-               frame_ok = skip_tail( dev->base_addr, framelen, crc );
-
-       if( is_first  &&  !frame_ok ) {
-               /*
-                * Frame has been broken, but we had already stored
-                * is_first... Drop entire packet.
-                */
-               nl->wait_frameno = 0;
-#ifdef CONFIG_SBNI_MULTILINE
-               nl->master->stats.rx_errors++;
-               nl->master->stats.rx_crc_errors++;
-#else
-               dev->stats.rx_errors++;
-               dev->stats.rx_crc_errors++;
-#endif
-       }
-
-       return  frame_ok;
-}
-
-
-static inline void
-send_complete( struct net_device *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-#ifdef CONFIG_SBNI_MULTILINE
-       nl->master->stats.tx_packets++;
-       nl->master->stats.tx_bytes += nl->tx_buf_p->len;
-#else
-       dev->stats.tx_packets++;
-       dev->stats.tx_bytes += nl->tx_buf_p->len;
-#endif
-       dev_consume_skb_irq(nl->tx_buf_p);
-
-       nl->tx_buf_p = NULL;
-
-       nl->outpos = 0;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-       nl->framelen   = 0;
-}
-
-
-static void
-interpret_ack( struct net_device  *dev,  unsigned  ack )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( ack == FRAME_SENT_OK ) {
-               nl->state &= ~FL_NEED_RESEND;
-
-               if( nl->state & FL_WAIT_ACK ) {
-                       nl->outpos += nl->framelen;
-
-                       if( --nl->tx_frameno ) {
-                               nl->framelen = min_t(unsigned int,
-                                                  nl->maxframe,
-                                                  nl->tx_buf_p->len - nl->outpos);
-                       } else {
-                               send_complete( dev );
-#ifdef CONFIG_SBNI_MULTILINE
-                               netif_wake_queue( nl->master );
-#else
-                               netif_wake_queue( dev );
-#endif
-                       }
-               }
-       }
-
-       nl->state &= ~FL_WAIT_ACK;
-}
-
-
-/*
- * Glue received frame with previous fragments of packet.
- * Indicate packet when last frame would be accepted.
- */
-
-static int
-append_frame_to_pkt( struct net_device  *dev,  unsigned  framelen,  u32  crc )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       u8  *p;
-
-       if( nl->inppos + framelen  >  ETHER_MAX_LEN )
-               return  0;
-
-       if( !nl->rx_buf_p  &&  !(nl->rx_buf_p = get_rx_buf( dev )) )
-               return  0;
-
-       p = nl->rx_buf_p->data + nl->inppos;
-       insb( dev->base_addr + DAT, p, framelen );
-       if( calc_crc32( crc, p, framelen ) != CRC32_REMAINDER )
-               return  0;
-
-       nl->inppos += framelen - 4;
-       if( --nl->wait_frameno == 0 )           /* last frame received */
-               indicate_pkt( dev );
-
-       return  1;
-}
-
-
-/*
- * Prepare to start output on adapter.
- * Transmitter will be actually activated when marker is accepted.
- */
-
-static void
-prepare_to_send( struct sk_buff  *skb,  struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       unsigned int  len;
-
-       /* nl->tx_buf_p == NULL here! */
-       if( nl->tx_buf_p )
-               netdev_err(dev, "memory leak!\n");
-
-       nl->outpos = 0;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-
-       len = skb->len;
-       if( len < SBNI_MIN_LEN )
-               len = SBNI_MIN_LEN;
-
-       nl->tx_buf_p    = skb;
-       nl->tx_frameno  = DIV_ROUND_UP(len, nl->maxframe);
-       nl->framelen    = len < nl->maxframe  ?  len  :  nl->maxframe;
-
-       outb( inb( dev->base_addr + CSR0 ) | TR_REQ,  dev->base_addr + CSR0 );
-#ifdef CONFIG_SBNI_MULTILINE
-       netif_trans_update(nl->master);
-#else
-       netif_trans_update(dev);
-#endif
-}
-
-
-static void
-drop_xmit_queue( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( nl->tx_buf_p ) {
-               dev_kfree_skb_any( nl->tx_buf_p );
-               nl->tx_buf_p = NULL;
-#ifdef CONFIG_SBNI_MULTILINE
-               nl->master->stats.tx_errors++;
-               nl->master->stats.tx_carrier_errors++;
-#else
-               dev->stats.tx_errors++;
-               dev->stats.tx_carrier_errors++;
-#endif
-       }
-
-       nl->tx_frameno  = 0;
-       nl->framelen    = 0;
-       nl->outpos      = 0;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-#ifdef CONFIG_SBNI_MULTILINE
-       netif_start_queue( nl->master );
-       netif_trans_update(nl->master);
-#else
-       netif_start_queue( dev );
-       netif_trans_update(dev);
-#endif
-}
-
-
-static void
-send_frame_header( struct net_device  *dev,  u32  *crc_p )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-
-       u32  crc = *crc_p;
-       u32  len_field = nl->framelen + 6;      /* CRC + frameno + reserved */
-       u8   value;
-
-       if( nl->state & FL_NEED_RESEND )
-               len_field |= FRAME_RETRY;       /* non-first attempt... */
-
-       if( nl->outpos == 0 )
-               len_field |= FRAME_FIRST;
-
-       len_field |= (nl->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD;
-       outb( SBNI_SIG, dev->base_addr + DAT );
-
-       value = (u8) len_field;
-       outb( value, dev->base_addr + DAT );
-       crc = CRC32( value, crc );
-       value = (u8) (len_field >> 8);
-       outb( value, dev->base_addr + DAT );
-       crc = CRC32( value, crc );
-
-       outb( nl->tx_frameno, dev->base_addr + DAT );
-       crc = CRC32( nl->tx_frameno, crc );
-       outb( 0, dev->base_addr + DAT );
-       crc = CRC32( 0, crc );
-       *crc_p = crc;
-}
-
-
-/*
- * if frame tail not needed (incorrect number or received twice),
- * it won't store, but CRC will be calculated
- */
-
-static int
-skip_tail( unsigned int  ioaddr,  unsigned int  tail_len,  u32 crc )
-{
-       while( tail_len-- )
-               crc = CRC32( inb( ioaddr + DAT ), crc );
-
-       return  crc == CRC32_REMAINDER;
-}
-
-
-/*
- * Preliminary checks if frame header is correct, calculates its CRC
- * and split it to simple fields
- */
-
-static int
-check_fhdr( u32  ioaddr,  u32  *framelen,  u32  *frameno,  u32  *ack,
-           u32  *is_first,  u32  *crc_p )
-{
-       u32  crc = *crc_p;
-       u8   value;
-
-       if( inb( ioaddr + DAT ) != SBNI_SIG )
-               return  0;
-
-       value = inb( ioaddr + DAT );
-       *framelen = (u32)value;
-       crc = CRC32( value, crc );
-       value = inb( ioaddr + DAT );
-       *framelen |= ((u32)value) << 8;
-       crc = CRC32( value, crc );
-
-       *ack = *framelen & FRAME_ACK_MASK;
-       *is_first = (*framelen & FRAME_FIRST) != 0;
-
-       if( (*framelen &= FRAME_LEN_MASK) < 6 ||
-           *framelen > SBNI_MAX_FRAME - 3 )
-               return  0;
-
-       value = inb( ioaddr + DAT );
-       *frameno = (u32)value;
-       crc = CRC32( value, crc );
-
-       crc = CRC32( inb( ioaddr + DAT ), crc );        /* reserved byte */
-       *framelen -= 2;
-
-       *crc_p = crc;
-       return  1;
-}
-
-
-static struct sk_buff *
-get_rx_buf( struct net_device  *dev )
-{
-       /* +2 is to compensate for the alignment fixup below */
-       struct sk_buff  *skb = dev_alloc_skb( ETHER_MAX_LEN + 2 );
-       if( !skb )
-               return  NULL;
-
-       skb_reserve( skb, 2 );          /* Align IP on longword boundaries */
-       return  skb;
-}
-
-
-static void
-indicate_pkt( struct net_device  *dev )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-       struct sk_buff    *skb = nl->rx_buf_p;
-
-       skb_put( skb, nl->inppos );
-
-#ifdef CONFIG_SBNI_MULTILINE
-       skb->protocol = eth_type_trans( skb, nl->master );
-       netif_rx( skb );
-       ++nl->master->stats.rx_packets;
-       nl->master->stats.rx_bytes += nl->inppos;
-#else
-       skb->protocol = eth_type_trans( skb, dev );
-       netif_rx( skb );
-       ++dev->stats.rx_packets;
-       dev->stats.rx_bytes += nl->inppos;
-#endif
-       nl->rx_buf_p = NULL;    /* protocol driver will clear this sk_buff */
-}
-
-
-/* -------------------------------------------------------------------------- */
-
-/*
- * Routine checks periodically wire activity and regenerates marker if
- * connect was inactive for a long time.
- */
-
-static void
-sbni_watchdog(struct timer_list *t)
-{
-       struct net_local   *nl  = from_timer(nl, t, watchdog);
-       struct net_device  *dev = nl->watchdog_dev;
-       unsigned long      flags;
-       unsigned char      csr0;
-
-       spin_lock_irqsave( &nl->lock, flags );
-
-       csr0 = inb( dev->base_addr + CSR0 );
-       if( csr0 & RC_CHK ) {
-
-               if( nl->timer_ticks ) {
-                       if( csr0 & (RC_RDY | BU_EMP) )
-                               /* receiving not active */
-                               nl->timer_ticks--;
-               } else {
-                       nl->in_stats.timeout_number++;
-                       if( nl->delta_rxl )
-                               timeout_change_level( dev );
-
-                       outb( *(u_char *)&nl->csr1 | PR_RES,
-                             dev->base_addr + CSR1 );
-                       csr0 = inb( dev->base_addr + CSR0 );
-               }
-       } else
-               nl->state &= ~FL_LINE_DOWN;
-
-       outb( csr0 | RC_CHK, dev->base_addr + CSR0 ); 
-
-       mod_timer(t, jiffies + SBNI_TIMEOUT);
-
-       spin_unlock_irqrestore( &nl->lock, flags );
-}
-
-
-static unsigned char  rxl_tab[] = {
-       0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08,
-       0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f
-};
-
-#define SIZE_OF_TIMEOUT_RXL_TAB 4
-static unsigned char  timeout_rxl_tab[] = {
-       0x03, 0x05, 0x08, 0x0b
-};
-
-/* -------------------------------------------------------------------------- */
-
-static void
-card_start( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       nl->timer_ticks = CHANGE_LEVEL_START_TICKS;
-       nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND);
-       nl->state |= FL_PREV_OK;
-
-       nl->inppos = nl->outpos = 0;
-       nl->wait_frameno = 0;
-       nl->tx_frameno   = 0;
-       nl->framelen     = 0;
-
-       outb( *(u_char *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
-       outb( EN_INT, dev->base_addr + CSR0 );
-}
-
-/* -------------------------------------------------------------------------- */
-
-/* Receive level auto-selection */
-
-static void
-change_level( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( nl->delta_rxl == 0 )        /* do not auto-negotiate RxL */
-               return;
-
-       if( nl->cur_rxl_index == 0 )
-               nl->delta_rxl = 1;
-       else if( nl->cur_rxl_index == 15 )
-               nl->delta_rxl = -1;
-       else if( nl->cur_rxl_rcvd < nl->prev_rxl_rcvd )
-               nl->delta_rxl = -nl->delta_rxl;
-
-       nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index += nl->delta_rxl ];
-       inb( dev->base_addr + CSR0 );   /* needs for PCI cards */
-       outb( *(u8 *)&nl->csr1, dev->base_addr + CSR1 );
-
-       nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
-       nl->cur_rxl_rcvd  = 0;
-}
-
-
-static void
-timeout_change_level( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       nl->cur_rxl_index = timeout_rxl_tab[ nl->timeout_rxl ];
-       if( ++nl->timeout_rxl >= 4 )
-               nl->timeout_rxl = 0;
-
-       nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
-       inb( dev->base_addr + CSR0 );
-       outb( *(unsigned char *)&nl->csr1, dev->base_addr + CSR1 );
-
-       nl->prev_rxl_rcvd = nl->cur_rxl_rcvd;
-       nl->cur_rxl_rcvd  = 0;
-}
-
-/* -------------------------------------------------------------------------- */
-
-/*
- *     Open/initialize the board. 
- */
-
-static int
-sbni_open( struct net_device  *dev )
-{
-       struct net_local        *nl = netdev_priv(dev);
-       struct timer_list       *w  = &nl->watchdog;
-
-       /*
-        * For double ISA adapters within "common irq" mode, we have to
-        * determine whether primary or secondary channel is initialized,
-        * and set the irq handler only in first case.
-        */
-       if( dev->base_addr < 0x400 ) {          /* ISA only */
-               struct net_device  **p = sbni_cards;
-               for( ;  *p  &&  p < sbni_cards + SBNI_MAX_NUM_CARDS;  ++p )
-                       if( (*p)->irq == dev->irq &&
-                           ((*p)->base_addr == dev->base_addr + 4 ||
-                            (*p)->base_addr == dev->base_addr - 4) &&
-                           (*p)->flags & IFF_UP ) {
-
-                               ((struct net_local *) (netdev_priv(*p)))
-                                       ->second = dev;
-                               netdev_notice(dev, "using shared irq with %s\n",
-                                             (*p)->name);
-                               nl->state |= FL_SECONDARY;
-                               goto  handler_attached;
-                       }
-       }
-
-       if( request_irq(dev->irq, sbni_interrupt, IRQF_SHARED, dev->name, dev) ) {
-               netdev_err(dev, "unable to get IRQ %d\n", dev->irq);
-               return  -EAGAIN;
-       }
-
-handler_attached:
-
-       spin_lock( &nl->lock );
-       memset( &dev->stats, 0, sizeof(struct net_device_stats) );
-       memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
-
-       card_start( dev );
-
-       netif_start_queue( dev );
-
-       /* set timer watchdog */
-       nl->watchdog_dev = dev;
-       timer_setup(w, sbni_watchdog, 0);
-       w->expires      = jiffies + SBNI_TIMEOUT;
-       add_timer( w );
-   
-       spin_unlock( &nl->lock );
-       return 0;
-}
-
-
-static int
-sbni_close( struct net_device  *dev )
-{
-       struct net_local  *nl = netdev_priv(dev);
-
-       if( nl->second  &&  nl->second->flags & IFF_UP ) {
-               netdev_notice(dev, "Secondary channel (%s) is active!\n",
-                             nl->second->name);
-               return  -EBUSY;
-       }
-
-#ifdef CONFIG_SBNI_MULTILINE
-       if( nl->state & FL_SLAVE )
-               emancipate( dev );
-       else
-               while( nl->link )       /* it's master device! */
-                       emancipate( nl->link );
-#endif
-
-       spin_lock( &nl->lock );
-
-       nl->second = NULL;
-       drop_xmit_queue( dev ); 
-       netif_stop_queue( dev );
-   
-       del_timer( &nl->watchdog );
-
-       outb( 0, dev->base_addr + CSR0 );
-
-       if( !(nl->state & FL_SECONDARY) )
-               free_irq( dev->irq, dev );
-       nl->state &= FL_SECONDARY;
-
-       spin_unlock( &nl->lock );
-       return 0;
-}
-
-
-/*
-       Valid combinations in CSR0 (for probing):
-
-       VALID_DECODER   0000,0011,1011,1010
-
-                                       ; 0   ; -
-                               TR_REQ  ; 1   ; +
-                       TR_RDY          ; 2   ; -
-                       TR_RDY  TR_REQ  ; 3   ; +
-               BU_EMP                  ; 4   ; +
-               BU_EMP          TR_REQ  ; 5   ; +
-               BU_EMP  TR_RDY          ; 6   ; -
-               BU_EMP  TR_RDY  TR_REQ  ; 7   ; +
-       RC_RDY                          ; 8   ; +
-       RC_RDY                  TR_REQ  ; 9   ; +
-       RC_RDY          TR_RDY          ; 10  ; -
-       RC_RDY          TR_RDY  TR_REQ  ; 11  ; -
-       RC_RDY  BU_EMP                  ; 12  ; -
-       RC_RDY  BU_EMP          TR_REQ  ; 13  ; -
-       RC_RDY  BU_EMP  TR_RDY          ; 14  ; -
-       RC_RDY  BU_EMP  TR_RDY  TR_REQ  ; 15  ; -
-*/
-
-#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200)
-
-
-static int
-sbni_card_probe( unsigned long  ioaddr )
-{
-       unsigned char  csr0;
-
-       csr0 = inb( ioaddr + CSR0 );
-       if( csr0 != 0xff  &&  csr0 != 0x00 ) {
-               csr0 &= ~EN_INT;
-               if( csr0 & BU_EMP )
-                       csr0 |= EN_INT;
-      
-               if( VALID_DECODER & (1 << (csr0 >> 4)) )
-                       return  0;
-       }
-   
-       return  -ENODEV;
-}
-
-/* -------------------------------------------------------------------------- */
-
-static int
-sbni_siocdevprivate(struct net_device  *dev,  struct ifreq  *ifr, void __user *data, int  cmd)
-{
-       struct net_local  *nl = netdev_priv(dev);
-       struct sbni_flags  flags;
-       int  error = 0;
-
-#ifdef CONFIG_SBNI_MULTILINE
-       struct net_device  *slave_dev;
-       char  slave_name[ 8 ];
-#endif
-  
-       switch( cmd ) {
-       case  SIOCDEVGETINSTATS :
-               if (copy_to_user(data, &nl->in_stats,
-                                sizeof(struct sbni_in_stats)))
-                       error = -EFAULT;
-               break;
-
-       case  SIOCDEVRESINSTATS :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-               memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) );
-               break;
-
-       case  SIOCDEVGHWSTATE :
-               flags.mac_addr  = *(u32 *)(dev->dev_addr + 3);
-               flags.rate      = nl->csr1.rate;
-               flags.slow_mode = (nl->state & FL_SLOW_MODE) != 0;
-               flags.rxl       = nl->cur_rxl_index;
-               flags.fixed_rxl = nl->delta_rxl == 0;
-
-               if (copy_to_user(data, &flags, sizeof(flags)))
-                       error = -EFAULT;
-               break;
-
-       case  SIOCDEVSHWSTATE :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-
-               spin_lock( &nl->lock );
-               flags = *(struct sbni_flags*) &ifr->ifr_ifru;
-               if( flags.fixed_rxl ) {
-                       nl->delta_rxl = 0;
-                       nl->cur_rxl_index = flags.rxl;
-               } else {
-                       nl->delta_rxl = DEF_RXL_DELTA;
-                       nl->cur_rxl_index = DEF_RXL;
-               }
-
-               nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ];
-               nl->csr1.rate = flags.rate;
-               outb( *(u8 *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 );
-               spin_unlock( &nl->lock );
-               break;
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-       case  SIOCDEVENSLAVE :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-
-               if (copy_from_user(slave_name, data, sizeof(slave_name)))
-                       return -EFAULT;
-               slave_dev = dev_get_by_name(&init_net, slave_name );
-               if( !slave_dev  ||  !(slave_dev->flags & IFF_UP) ) {
-                       netdev_err(dev, "trying to enslave non-active device %s\n",
-                                  slave_name);
-                       if (slave_dev)
-                               dev_put(slave_dev);
-                       return  -EPERM;
-               }
-
-               return  enslave( dev, slave_dev );
-
-       case  SIOCDEVEMANSIPATE :
-               if (!capable(CAP_NET_ADMIN))
-                       return  -EPERM;
-
-               return  emancipate( dev );
-
-#endif /* CONFIG_SBNI_MULTILINE */
-
-       default :
-               return  -EOPNOTSUPP;
-       }
-
-       return  error;
-}
-
-
-#ifdef CONFIG_SBNI_MULTILINE
-
-static int
-enslave( struct net_device  *dev,  struct net_device  *slave_dev )
-{
-       struct net_local  *nl  = netdev_priv(dev);
-       struct net_local  *snl = netdev_priv(slave_dev);
-
-       if( nl->state & FL_SLAVE )      /* This isn't master or free device */
-               return  -EBUSY;
-
-       if( snl->state & FL_SLAVE )     /* That was already enslaved */
-               return  -EBUSY;
-
-       spin_lock( &nl->lock );
-       spin_lock( &snl->lock );
-
-       /* append to list */
-       snl->link = nl->link;
-       nl->link  = slave_dev;
-       snl->master = dev;
-       snl->state |= FL_SLAVE;
-
-       /* Summary statistics of MultiLine operation will be stored
-          in master's counters */
-       memset( &slave_dev->stats, 0, sizeof(struct net_device_stats) );
-       netif_stop_queue( slave_dev );
-       netif_wake_queue( dev );        /* Now we are able to transmit */
-
-       spin_unlock( &snl->lock );
-       spin_unlock( &nl->lock );
-       netdev_notice(dev, "slave device (%s) attached\n", slave_dev->name);
-       return  0;
-}
-
-
-static int
-emancipate( struct net_device  *dev )
-{
-       struct net_local   *snl = netdev_priv(dev);
-       struct net_device  *p   = snl->master;
-       struct net_local   *nl  = netdev_priv(p);
-
-       if( !(snl->state & FL_SLAVE) )
-               return  -EINVAL;
-
-       spin_lock( &nl->lock );
-       spin_lock( &snl->lock );
-       drop_xmit_queue( dev );
-
-       /* exclude from list */
-       for(;;) {       /* must be in list */
-               struct net_local  *t = netdev_priv(p);
-               if( t->link == dev ) {
-                       t->link = snl->link;
-                       break;
-               }
-               p = t->link;
-       }
-
-       snl->link = NULL;
-       snl->master = dev;
-       snl->state &= ~FL_SLAVE;
-
-       netif_start_queue( dev );
-
-       spin_unlock( &snl->lock );
-       spin_unlock( &nl->lock );
-
-       dev_put( dev );
-       return  0;
-}
-
-#endif
-
-static void
-set_multicast_list( struct net_device  *dev )
-{
-       return;         /* sbni always operate in promiscuos mode */
-}
-
-
-#ifdef MODULE
-module_param_hw_array(io, int, ioport, NULL, 0);
-module_param_hw_array(irq, int, irq, NULL, 0);
-module_param_array(baud, int, NULL, 0);
-module_param_array(rxl, int, NULL, 0);
-module_param_array(mac, int, NULL, 0);
-module_param(skip_pci_probe, bool, 0);
-
-MODULE_LICENSE("GPL");
-
-
-int __init init_module( void )
-{
-       struct net_device  *dev;
-       int err;
-
-       while( num < SBNI_MAX_NUM_CARDS ) {
-               dev = alloc_netdev(sizeof(struct net_local), "sbni%d",
-                                  NET_NAME_UNKNOWN, sbni_devsetup);
-               if( !dev)
-                       break;
-
-               sprintf( dev->name, "sbni%d", num );
-
-               err = sbni_init(dev);
-               if (err) {
-                       free_netdev(dev);
-                       break;
-               }
-
-               if( register_netdev( dev ) ) {
-                       release_region( dev->base_addr, SBNI_IO_EXTENT );
-                       free_netdev( dev );
-                       break;
-               }
-       }
-
-       return  *sbni_cards  ?  0  :  -ENODEV;
-}
-
-void
-cleanup_module(void)
-{
-       int i;
-
-       for (i = 0;  i < SBNI_MAX_NUM_CARDS;  ++i) {
-               struct net_device *dev = sbni_cards[i];
-               if (dev != NULL) {
-                       unregister_netdev(dev);
-                       release_region(dev->base_addr, SBNI_IO_EXTENT);
-                       free_netdev(dev);
-               }
-       }
-}
-
-#else  /* MODULE */
-
-static int __init
-sbni_setup( char  *p )
-{
-       int  n, parm;
-
-       if( *p++ != '(' )
-               goto  bad_param;
-
-       for( n = 0, parm = 0;  *p  &&  n < 8; ) {
-               (*dest[ parm ])[ n ] = simple_strtoul( p, &p, 0 );
-               if( !*p  ||  *p == ')' )
-                       return 1;
-               if( *p == ';' ) {
-                       ++p;
-                       ++n;
-                       parm = 0;
-               } else if( *p++ != ',' ) {
-                       break;
-               } else {
-                       if( ++parm >= 5 )
-                               break;
-               }
-       }
-bad_param:
-       pr_err("Error in sbni kernel parameter!\n");
-       return 0;
-}
-
-__setup( "sbni=", sbni_setup );
-
-#endif /* MODULE */
-
-/* -------------------------------------------------------------------------- */
-
-static u32
-calc_crc32( u32  crc,  u8  *p,  u32  len )
-{
-       while( len-- )
-               crc = CRC32( *p++, crc );
-
-       return  crc;
-}
-
-static u32  crc32tab[] __attribute__ ((aligned(8))) = {
-       0xD202EF8D,  0xA505DF1B,  0x3C0C8EA1,  0x4B0BBE37,
-       0xD56F2B94,  0xA2681B02,  0x3B614AB8,  0x4C667A2E,
-       0xDCD967BF,  0xABDE5729,  0x32D70693,  0x45D03605,
-       0xDBB4A3A6,  0xACB39330,  0x35BAC28A,  0x42BDF21C,
-       0xCFB5FFE9,  0xB8B2CF7F,  0x21BB9EC5,  0x56BCAE53,
-       0xC8D83BF0,  0xBFDF0B66,  0x26D65ADC,  0x51D16A4A,
-       0xC16E77DB,  0xB669474D,  0x2F6016F7,  0x58672661,
-       0xC603B3C2,  0xB1048354,  0x280DD2EE,  0x5F0AE278,
-       0xE96CCF45,  0x9E6BFFD3,  0x0762AE69,  0x70659EFF,
-       0xEE010B5C,  0x99063BCA,  0x000F6A70,  0x77085AE6,
-       0xE7B74777,  0x90B077E1,  0x09B9265B,  0x7EBE16CD,
-       0xE0DA836E,  0x97DDB3F8,  0x0ED4E242,  0x79D3D2D4,
-       0xF4DBDF21,  0x83DCEFB7,  0x1AD5BE0D,  0x6DD28E9B,
-       0xF3B61B38,  0x84B12BAE,  0x1DB87A14,  0x6ABF4A82,
-       0xFA005713,  0x8D076785,  0x140E363F,  0x630906A9,
-       0xFD6D930A,  0x8A6AA39C,  0x1363F226,  0x6464C2B0,
-       0xA4DEAE1D,  0xD3D99E8B,  0x4AD0CF31,  0x3DD7FFA7,
-       0xA3B36A04,  0xD4B45A92,  0x4DBD0B28,  0x3ABA3BBE,
-       0xAA05262F,  0xDD0216B9,  0x440B4703,  0x330C7795,
-       0xAD68E236,  0xDA6FD2A0,  0x4366831A,  0x3461B38C,
-       0xB969BE79,  0xCE6E8EEF,  0x5767DF55,  0x2060EFC3,
-       0xBE047A60,  0xC9034AF6,  0x500A1B4C,  0x270D2BDA,
-       0xB7B2364B,  0xC0B506DD,  0x59BC5767,  0x2EBB67F1,
-       0xB0DFF252,  0xC7D8C2C4,  0x5ED1937E,  0x29D6A3E8,
-       0x9FB08ED5,  0xE8B7BE43,  0x71BEEFF9,  0x06B9DF6F,
-       0x98DD4ACC,  0xEFDA7A5A,  0x76D32BE0,  0x01D41B76,
-       0x916B06E7,  0xE66C3671,  0x7F6567CB,  0x0862575D,
-       0x9606C2FE,  0xE101F268,  0x7808A3D2,  0x0F0F9344,
-       0x82079EB1,  0xF500AE27,  0x6C09FF9D,  0x1B0ECF0B,
-       0x856A5AA8,  0xF26D6A3E,  0x6B643B84,  0x1C630B12,
-       0x8CDC1683,  0xFBDB2615,  0x62D277AF,  0x15D54739,
-       0x8BB1D29A,  0xFCB6E20C,  0x65BFB3B6,  0x12B88320,
-       0x3FBA6CAD,  0x48BD5C3B,  0xD1B40D81,  0xA6B33D17,
-       0x38D7A8B4,  0x4FD09822,  0xD6D9C998,  0xA1DEF90E,
-       0x3161E49F,  0x4666D409,  0xDF6F85B3,  0xA868B525,
-       0x360C2086,  0x410B1010,  0xD80241AA,  0xAF05713C,
-       0x220D7CC9,  0x550A4C5F,  0xCC031DE5,  0xBB042D73,
-       0x2560B8D0,  0x52678846,  0xCB6ED9FC,  0xBC69E96A,
-       0x2CD6F4FB,  0x5BD1C46D,  0xC2D895D7,  0xB5DFA541,
-       0x2BBB30E2,  0x5CBC0074,  0xC5B551CE,  0xB2B26158,
-       0x04D44C65,  0x73D37CF3,  0xEADA2D49,  0x9DDD1DDF,
-       0x03B9887C,  0x74BEB8EA,  0xEDB7E950,  0x9AB0D9C6,
-       0x0A0FC457,  0x7D08F4C1,  0xE401A57B,  0x930695ED,
-       0x0D62004E,  0x7A6530D8,  0xE36C6162,  0x946B51F4,
-       0x19635C01,  0x6E646C97,  0xF76D3D2D,  0x806A0DBB,
-       0x1E0E9818,  0x6909A88E,  0xF000F934,  0x8707C9A2,
-       0x17B8D433,  0x60BFE4A5,  0xF9B6B51F,  0x8EB18589,
-       0x10D5102A,  0x67D220BC,  0xFEDB7106,  0x89DC4190,
-       0x49662D3D,  0x3E611DAB,  0xA7684C11,  0xD06F7C87,
-       0x4E0BE924,  0x390CD9B2,  0xA0058808,  0xD702B89E,
-       0x47BDA50F,  0x30BA9599,  0xA9B3C423,  0xDEB4F4B5,
-       0x40D06116,  0x37D75180,  0xAEDE003A,  0xD9D930AC,
-       0x54D13D59,  0x23D60DCF,  0xBADF5C75,  0xCDD86CE3,
-       0x53BCF940,  0x24BBC9D6,  0xBDB2986C,  0xCAB5A8FA,
-       0x5A0AB56B,  0x2D0D85FD,  0xB404D447,  0xC303E4D1,
-       0x5D677172,  0x2A6041E4,  0xB369105E,  0xC46E20C8,
-       0x72080DF5,  0x050F3D63,  0x9C066CD9,  0xEB015C4F,
-       0x7565C9EC,  0x0262F97A,  0x9B6BA8C0,  0xEC6C9856,
-       0x7CD385C7,  0x0BD4B551,  0x92DDE4EB,  0xE5DAD47D,
-       0x7BBE41DE,  0x0CB97148,  0x95B020F2,  0xE2B71064,
-       0x6FBF1D91,  0x18B82D07,  0x81B17CBD,  0xF6B64C2B,
-       0x68D2D988,  0x1FD5E91E,  0x86DCB8A4,  0xF1DB8832,
-       0x616495A3,  0x1663A535,  0x8F6AF48F,  0xF86DC419,
-       0x660951BA,  0x110E612C,  0x88073096,  0xFF000000
-};
-
diff --git a/drivers/net/wan/sbni.h b/drivers/net/wan/sbni.h

deleted file mode 100644 (file)

index 8426451..0000000
--- a/drivers/net/wan/sbni.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/* sbni.h:  definitions for a Granch SBNI12 driver, version 5.0.0
- * Written 2001 Denis I.Timofeev (timofeev@granch.ru)
- * This file is distributed under the GNU GPL
- */
-
-#ifndef SBNI_H
-#define SBNI_H
-
-#ifdef SBNI_DEBUG
-#define DP( A ) A
-#else
-#define DP( A )
-#endif
-
-
-/* We don't have official vendor id yet... */
-#define SBNI_PCI_VENDOR        0x55 
-#define SBNI_PCI_DEVICE        0x9f
-
-#define ISA_MODE 0x00
-#define PCI_MODE 0x01
-
-#define        SBNI_IO_EXTENT  4
-
-enum sbni_reg {
-       CSR0 = 0,
-       CSR1 = 1,
-       DAT  = 2
-};
-
-/* CSR0 mapping */
-enum {
-       BU_EMP = 0x02,
-       RC_CHK = 0x04,
-       CT_ZER = 0x08,
-       TR_REQ = 0x10,
-       TR_RDY = 0x20,
-       EN_INT = 0x40,
-       RC_RDY = 0x80
-};
-
-
-/* CSR1 mapping */
-#define PR_RES 0x80
-
-struct sbni_csr1 {
-#ifdef __LITTLE_ENDIAN_BITFIELD
-       u8 rxl  : 5;
-       u8 rate : 2;
-       u8      : 1;
-#else
-       u8      : 1;
-       u8 rate : 2;
-       u8 rxl  : 5;
-#endif
-};
-
-/* fields in frame header */
-#define FRAME_ACK_MASK  (unsigned short)0x7000
-#define FRAME_LEN_MASK  (unsigned short)0x03FF
-#define FRAME_FIRST     (unsigned short)0x8000
-#define FRAME_RETRY     (unsigned short)0x0800
-
-#define FRAME_SENT_BAD  (unsigned short)0x4000
-#define FRAME_SENT_OK   (unsigned short)0x3000
-
-
-/* state flags */
-enum {
-       FL_WAIT_ACK    = 0x01,
-       FL_NEED_RESEND = 0x02,
-       FL_PREV_OK     = 0x04,
-       FL_SLOW_MODE   = 0x08,
-       FL_SECONDARY   = 0x10,
-#ifdef CONFIG_SBNI_MULTILINE
-       FL_SLAVE       = 0x20,
-#endif
-       FL_LINE_DOWN   = 0x40
-};
-
-
-enum {
-       DEFAULT_IOBASEADDR = 0x210,
-       DEFAULT_INTERRUPTNUMBER = 5,
-       DEFAULT_RATE = 0,
-       DEFAULT_FRAME_LEN = 1012
-};
-
-#define DEF_RXL_DELTA  -1
-#define DEF_RXL                0xf
-
-#define SBNI_SIG 0x5a
-
-#define        SBNI_MIN_LEN    60      /* Shortest Ethernet frame without FCS */
-#define SBNI_MAX_FRAME 1023
-#define ETHER_MAX_LEN  1518
-
-#define SBNI_TIMEOUT   (HZ/10)
-
-#define TR_ERROR_COUNT 32
-#define CHANGE_LEVEL_START_TICKS 4
-
-#define SBNI_MAX_NUM_CARDS     16
-
-/* internal SBNI-specific statistics */
-struct sbni_in_stats {
-       u32     all_rx_number;
-       u32     bad_rx_number;
-       u32     timeout_number;
-       u32     all_tx_number;
-       u32     resend_tx_number;
-};
-
-/* SBNI ioctl params */
-#define SIOCDEVGETINSTATS      SIOCDEVPRIVATE
-#define SIOCDEVRESINSTATS      SIOCDEVPRIVATE+1
-#define SIOCDEVGHWSTATE        SIOCDEVPRIVATE+2
-#define SIOCDEVSHWSTATE        SIOCDEVPRIVATE+3
-#define SIOCDEVENSLAVE         SIOCDEVPRIVATE+4
-#define SIOCDEVEMANSIPATE      SIOCDEVPRIVATE+5
-
-
-/* data packet for SIOCDEVGHWSTATE/SIOCDEVSHWSTATE ioctl requests */
-struct sbni_flags {
-       u32     rxl             : 4;
-       u32     rate            : 2;
-       u32     fixed_rxl       : 1;
-       u32     slow_mode       : 1;
-       u32     mac_addr        : 24;
-};
-
-/*
- * CRC-32 stuff
- */
-#define CRC32(c,crc) (crc32tab[((size_t)(crc) ^ (c)) & 0xff] ^ (((crc) >> 8) & 0x00FFFFFF))
-      /* CRC generator 0xEDB88320 */
-      /* CRC remainder 0x2144DF1C */
-      /* CRC initial value 0x00000000 */
-#define CRC32_REMAINDER 0x2144DF1C
-#define CRC32_INITIAL 0x00000000
-
-#ifndef __initdata
-#define __initdata
-#endif
-
-#endif
-
diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c

index 1df9595..514f2c1 100644 (file)
--- a/drivers/net/wireless/virt_wifi.c
+++ b/drivers/net/wireless/virt_wifi.c
@@ -136,6 +136,29 @@ static struct ieee80211_supported_band band_5ghz = {
  /* Assigned at module init. Guaranteed locally-administered and unicast. */
  static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {};
  
+static void virt_wifi_inform_bss(struct wiphy *wiphy)
+{
+       u64 tsf = div_u64(ktime_get_boottime_ns(), 1000);
+       struct cfg80211_bss *informed_bss;
+       static const struct {
+               u8 tag;
+               u8 len;
+               u8 ssid[8];
+       } __packed ssid = {
+               .tag = WLAN_EID_SSID,
+               .len = 8,
+               .ssid = "VirtWifi",
+       };
+
+       informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
+                                          CFG80211_BSS_FTYPE_PRESP,
+                                          fake_router_bssid, tsf,
+                                          WLAN_CAPABILITY_ESS, 0,
+                                          (void *)&ssid, sizeof(ssid),
+                                          DBM_TO_MBM(-50), GFP_KERNEL);
+       cfg80211_put_bss(wiphy, informed_bss);
+}
+
  /* Called with the rtnl lock held. */
  static int virt_wifi_scan(struct wiphy *wiphy,
                           struct cfg80211_scan_request *request)
@@ -156,28 +179,13 @@ static int virt_wifi_scan(struct wiphy *wiphy,
  /* Acquires and releases the rdev BSS lock. */
  static void virt_wifi_scan_result(struct work_struct *work)
  {
-       struct {
-               u8 tag;
-               u8 len;
-               u8 ssid[8];
-       } __packed ssid = {
-               .tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi",
-       };
-       struct cfg80211_bss *informed_bss;
         struct virt_wifi_wiphy_priv *priv =
                 container_of(work, struct virt_wifi_wiphy_priv,
                              scan_result.work);
         struct wiphy *wiphy = priv_to_wiphy(priv);
         struct cfg80211_scan_info scan_info = { .aborted = false };
-       u64 tsf = div_u64(ktime_get_boottime_ns(), 1000);
  
-       informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz,
-                                          CFG80211_BSS_FTYPE_PRESP,
-                                          fake_router_bssid, tsf,
-                                          WLAN_CAPABILITY_ESS, 0,
-                                          (void *)&ssid, sizeof(ssid),
-                                          DBM_TO_MBM(-50), GFP_KERNEL);
-       cfg80211_put_bss(wiphy, informed_bss);
+       virt_wifi_inform_bss(wiphy);
  
         /* Schedules work which acquires and releases the rtnl lock. */
         cfg80211_scan_done(priv->scan_request, &scan_info);
@@ -225,10 +233,12 @@ static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev,
         if (!could_schedule)
                 return -EBUSY;
  
-       if (sme->bssid)
+       if (sme->bssid) {
                 ether_addr_copy(priv->connect_requested_bss, sme->bssid);
-       else
+       } else {
+               virt_wifi_inform_bss(wiphy);
                 eth_zero_addr(priv->connect_requested_bss);
+       }
  
         wiphy_debug(wiphy, "connect\n");
  
@@ -241,11 +251,13 @@ static void virt_wifi_connect_complete(struct work_struct *work)
         struct virt_wifi_netdev_priv *priv =
                 container_of(work, struct virt_wifi_netdev_priv, connect.work);
         u8 *requested_bss = priv->connect_requested_bss;
-       bool has_addr = !is_zero_ether_addr(requested_bss);
         bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid);
         u16 status = WLAN_STATUS_SUCCESS;
  
-       if (!priv->is_up || (has_addr && !right_addr))
+       if (is_zero_ether_addr(requested_bss))
+               requested_bss = NULL;
+
+       if (!priv->is_up || (requested_bss && !right_addr))
                 status = WLAN_STATUS_UNSPECIFIED_FAILURE;
         else
                 priv->is_connected = true;
diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig

index de93843..77dbfc4 100644 (file)
--- a/drivers/net/wwan/Kconfig
+++ b/drivers/net/wwan/Kconfig
@@ -38,6 +38,18 @@ config MHI_WWAN_CTRL
           To compile this driver as a module, choose M here: the module will be
           called mhi_wwan_ctrl.
  
+config MHI_WWAN_MBIM
+        tristate "MHI WWAN MBIM network driver for QCOM-based PCIe modems"
+        depends on MHI_BUS
+        help
+          MHI WWAN MBIM is a WWAN network driver for QCOM-based PCIe modems.
+          It implements MBIM over MHI, for IP data aggregation and muxing.
+          A default wwan0 network interface is created for MBIM data session
+          ID 0. Additional links can be created via wwan rtnetlink type.
+
+          To compile this driver as a module, choose M here: the module will be
+          called mhi_wwan_mbim.
+
  config RPMSG_WWAN_CTRL
         tristate "RPMSG WWAN control driver"
         depends on RPMSG
diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile

index d90ac33..fe51fee 100644 (file)
--- a/drivers/net/wwan/Makefile
+++ b/drivers/net/wwan/Makefile
@@ -9,5 +9,6 @@ wwan-objs += wwan_core.o
  obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o
  
  obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o
+obj-$(CONFIG_MHI_WWAN_MBIM) += mhi_wwan_mbim.o
  obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o
  obj-$(CONFIG_IOSM) += iosm/
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.h b/drivers/net/wwan/iosm/iosm_ipc_mmio.h

index 45e6923..f861994 100644 (file)
--- a/drivers/net/wwan/iosm/iosm_ipc_mmio.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.h
@@ -10,10 +10,10 @@
  #define IOSM_CP_VERSION 0x0100UL
  
  /* DL dir Aggregation support mask */
-#define DL_AGGR BIT(23)
+#define DL_AGGR BIT(9)
  
  /* UL dir Aggregation support mask */
-#define UL_AGGR BIT(22)
+#define UL_AGGR BIT(8)
  
  /* UL flow credit support mask */
  #define UL_FLOW_CREDIT BIT(21)
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c

index 562de27..bdb2d32 100644 (file)
--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c
@@ -320,7 +320,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux,
                 return;
         }
  
-       ul_credits = fct->vfl.nr_of_bytes;
+       ul_credits = le32_to_cpu(fct->vfl.nr_of_bytes);
  
         dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d",
                 if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits);
@@ -586,7 +586,7 @@ static bool ipc_mux_lite_send_qlt(struct iosm_mux *ipc_mux)
                 qlt->reserved[0] = 0;
                 qlt->reserved[1] = 0;
  
-               qlt->vfl.nr_of_bytes = session->ul_list.qlen;
+               qlt->vfl.nr_of_bytes = cpu_to_le32(session->ul_list.qlen);
  
                 /* Add QLT to the transfer list. */
                 skb_queue_tail(&ipc_mux->channel->ul_list,
diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h

index 4a74e3c..aae83db 100644 (file)
--- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
+++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h
@@ -106,7 +106,7 @@ struct mux_lite_cmdh {
   * @nr_of_bytes:       Number of bytes available to transmit in the queue.
   */
  struct mux_lite_vfl {
-       u32 nr_of_bytes;
+       __le32 nr_of_bytes;
  };
  
  /**
diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c

index 91109e2..35d5907 100644 (file)
--- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c
@@ -412,8 +412,8 @@ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol,
         }
  
         if (p_td->buffer.address != IPC_CB(skb)->mapping) {
-               dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p",
-                       (void *)p_td->buffer.address, skb->data);
+               dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p",
+                       (unsigned long long)p_td->buffer.address, skb->data);
                 ipc_pcie_kfree_skb(ipc_protocol->pcie, skb);
                 skb = NULL;
                 goto ret;
diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c

index b2357ad..b571d9c 100644 (file)
--- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c
@@ -228,7 +228,7 @@ static void ipc_wwan_dellink(void *ctxt, struct net_device *dev,
  
         RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL);
         /* unregistering includes synchronize_net() */
-       unregister_netdevice(dev);
+       unregister_netdevice_queue(dev, head);
  
  unlock:
         mutex_unlock(&ipc_wwan->if_mutex);
diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c

index 1bc6b69..1e18420 100644 (file)
--- a/drivers/net/wwan/mhi_wwan_ctrl.c
+++ b/drivers/net/wwan/mhi_wwan_ctrl.c
@@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port)
         int ret;
  
         /* Start mhi device's channel(s) */
-       ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev);
+       ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0);
         if (ret)
                 return ret;
  
diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c

new file mode 100644 (file)

index 0000000..377529b
--- /dev/null
+++ b/drivers/net/wwan/mhi_wwan_mbim.c
@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* MHI MBIM Network driver - Network/MBIM over MHI bus
+ *
+ * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org>
+ *
+ * This driver copy some code from cdc_ncm, which is:
+ * Copyright (C) ST-Ericsson 2010-2012
+ * and cdc_mbim, which is:
+ * Copyright (c) 2012  Smith Micro Software, Inc.
+ * Copyright (c) 2012  Bjørn Mork <bjorn@mork.no>
+ *
+ */
+
+#include <linux/ethtool.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/mhi.h>
+#include <linux/mii.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/usb.h>
+#include <linux/usb/cdc.h>
+#include <linux/usb/usbnet.h>
+#include <linux/usb/cdc_ncm.h>
+#include <linux/wwan.h>
+
+/* 3500 allows to optimize skb allocation, the skbs will basically fit in
+ * one 4K page. Large MBIM packets will simply be split over several MHI
+ * transfers and chained by the MHI net layer (zerocopy).
+ */
+#define MHI_DEFAULT_MRU 3500
+
+#define MHI_MBIM_DEFAULT_MTU 1500
+#define MHI_MAX_BUF_SZ 0xffff
+
+#define MBIM_NDP16_SIGN_MASK 0x00ffffff
+
+#define MHI_MBIM_LINK_HASH_SIZE 8
+#define LINK_HASH(session) ((session) % MHI_MBIM_LINK_HASH_SIZE)
+
+struct mhi_mbim_link {
+       struct mhi_mbim_context *mbim;
+       struct net_device *ndev;
+       unsigned int session;
+
+       /* stats */
+       u64_stats_t rx_packets;
+       u64_stats_t rx_bytes;
+       u64_stats_t rx_errors;
+       u64_stats_t tx_packets;
+       u64_stats_t tx_bytes;
+       u64_stats_t tx_errors;
+       u64_stats_t tx_dropped;
+       struct u64_stats_sync tx_syncp;
+       struct u64_stats_sync rx_syncp;
+
+       struct hlist_node hlnode;
+};
+
+struct mhi_mbim_context {
+       struct mhi_device *mdev;
+       struct sk_buff *skbagg_head;
+       struct sk_buff *skbagg_tail;
+       unsigned int mru;
+       u32 rx_queue_sz;
+       u16 rx_seq;
+       u16 tx_seq;
+       struct delayed_work rx_refill;
+       spinlock_t tx_lock;
+       struct hlist_head link_list[MHI_MBIM_LINK_HASH_SIZE];
+};
+
+struct mbim_tx_hdr {
+       struct usb_cdc_ncm_nth16 nth16;
+       struct usb_cdc_ncm_ndp16 ndp16;
+       struct usb_cdc_ncm_dpe16 dpe16[2];
+} __packed;
+
+static struct mhi_mbim_link *mhi_mbim_get_link_rcu(struct mhi_mbim_context *mbim,
+                                                  unsigned int session)
+{
+       struct mhi_mbim_link *link;
+
+       hlist_for_each_entry_rcu(link, &mbim->link_list[LINK_HASH(session)], hlnode) {
+               if (link->session == session)
+                       return link;
+       }
+
+       return NULL;
+}
+
+static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session,
+                                    u16 tx_seq)
+{
+       unsigned int dgram_size = skb->len;
+       struct usb_cdc_ncm_nth16 *nth16;
+       struct usb_cdc_ncm_ndp16 *ndp16;
+       struct mbim_tx_hdr *mbim_hdr;
+
+       /* Only one NDP is sent, containing the IP packet (no aggregation) */
+
+       /* Ensure we have enough headroom for crafting MBIM header */
+       if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) {
+               dev_kfree_skb_any(skb);
+               return NULL;
+       }
+
+       mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr));
+
+       /* Fill NTB header */
+       nth16 = &mbim_hdr->nth16;
+       nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN);
+       nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+       nth16->wSequence = cpu_to_le16(tx_seq);
+       nth16->wBlockLength = cpu_to_le16(skb->len);
+       nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16));
+
+       /* Fill the unique NDP */
+       ndp16 = &mbim_hdr->ndp16;
+       ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN | (session << 24));
+       ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16)
+                                       + sizeof(struct usb_cdc_ncm_dpe16) * 2);
+       ndp16->wNextNdpIndex = 0;
+
+       /* Datagram follows the mbim header */
+       ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr));
+       ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size);
+
+       /* null termination */
+       ndp16->dpe16[1].wDatagramIndex = 0;
+       ndp16->dpe16[1].wDatagramLength = 0;
+
+       return skb;
+}
+
+static netdev_tx_t mhi_mbim_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+       struct mhi_mbim_context *mbim = link->mbim;
+       unsigned long flags;
+       int err = -ENOMEM;
+
+       /* Serialize MHI channel queuing and MBIM seq */
+       spin_lock_irqsave(&mbim->tx_lock, flags);
+
+       skb = mbim_tx_fixup(skb, link->session, mbim->tx_seq);
+       if (unlikely(!skb))
+               goto exit_unlock;
+
+       err = mhi_queue_skb(mbim->mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT);
+
+       if (mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+               netif_stop_queue(ndev);
+
+       if (!err)
+               mbim->tx_seq++;
+
+exit_unlock:
+       spin_unlock_irqrestore(&mbim->tx_lock, flags);
+
+       if (unlikely(err)) {
+               net_err_ratelimited("%s: Failed to queue TX buf (%d)\n",
+                                   ndev->name, err);
+               dev_kfree_skb_any(skb);
+               goto exit_drop;
+       }
+
+       return NETDEV_TX_OK;
+
+exit_drop:
+       u64_stats_update_begin(&link->tx_syncp);
+       u64_stats_inc(&link->tx_dropped);
+       u64_stats_update_end(&link->tx_syncp);
+
+       return NETDEV_TX_OK;
+}
+
+static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+       struct usb_cdc_ncm_nth16 *nth16;
+       int len;
+
+       if (skb->len < sizeof(struct usb_cdc_ncm_nth16) +
+                       sizeof(struct usb_cdc_ncm_ndp16)) {
+               net_err_ratelimited("frame too short\n");
+               return -EINVAL;
+       }
+
+       nth16 = (struct usb_cdc_ncm_nth16 *)skb->data;
+
+       if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) {
+               net_err_ratelimited("invalid NTH16 signature <%#010x>\n",
+                                   le32_to_cpu(nth16->dwSignature));
+               return -EINVAL;
+       }
+
+       /* No limit on the block length, except the size of the data pkt */
+       len = le16_to_cpu(nth16->wBlockLength);
+       if (len > skb->len) {
+               net_err_ratelimited("NTB does not fit into the skb %u/%u\n",
+                                   len, skb->len);
+               return -EINVAL;
+       }
+
+       if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) &&
+           (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) &&
+           !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) {
+               net_err_ratelimited("sequence number glitch prev=%d curr=%d\n",
+                                   mbim->rx_seq, le16_to_cpu(nth16->wSequence));
+       }
+       mbim->rx_seq = le16_to_cpu(nth16->wSequence);
+
+       return le16_to_cpu(nth16->wNdpIndex);
+}
+
+static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16)
+{
+       int ret;
+
+       if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) {
+               net_err_ratelimited("invalid DPT16 length <%u>\n",
+                                   le16_to_cpu(ndp16->wLength));
+               return -EINVAL;
+       }
+
+       ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16))
+                       / sizeof(struct usb_cdc_ncm_dpe16));
+       ret--; /* Last entry is always a NULL terminator */
+
+       if (sizeof(struct usb_cdc_ncm_ndp16) +
+            ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) {
+               net_err_ratelimited("Invalid nframes = %d\n", ret);
+               return -EINVAL;
+       }
+
+       return ret;
+}
+
+static void mhi_mbim_rx(struct mhi_mbim_context *mbim, struct sk_buff *skb)
+{
+       int ndpoffset;
+
+       /* Check NTB header and retrieve first NDP offset */
+       ndpoffset = mbim_rx_verify_nth16(mbim, skb);
+       if (ndpoffset < 0) {
+               net_err_ratelimited("mbim: Incorrect NTB header\n");
+               goto error;
+       }
+
+       /* Process each NDP */
+       while (1) {
+               struct usb_cdc_ncm_ndp16 ndp16;
+               struct usb_cdc_ncm_dpe16 dpe16;
+               struct mhi_mbim_link *link;
+               int nframes, n, dpeoffset;
+               unsigned int session;
+
+               if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) {
+                       net_err_ratelimited("mbim: Incorrect NDP offset (%u)\n",
+                                           ndpoffset);
+                       goto error;
+               }
+
+               /* Check NDP header and retrieve number of datagrams */
+               nframes = mbim_rx_verify_ndp16(skb, &ndp16);
+               if (nframes < 0) {
+                       net_err_ratelimited("mbim: Incorrect NDP16\n");
+                       goto error;
+               }
+
+                /* Only IP data type supported, no DSS in MHI context */
+               if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK))
+                               != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) {
+                       net_err_ratelimited("mbim: Unsupported NDP type\n");
+                       goto next_ndp;
+               }
+
+               session = (le32_to_cpu(ndp16.dwSignature) & ~MBIM_NDP16_SIGN_MASK) >> 24;
+
+               rcu_read_lock();
+
+               link = mhi_mbim_get_link_rcu(mbim, session);
+               if (!link) {
+                       net_err_ratelimited("mbim: bad packet session (%u)\n", session);
+                       goto unlock;
+               }
+
+               /* de-aggregate and deliver IP packets */
+               dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16);
+               for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) {
+                       u16 dgram_offset, dgram_len;
+                       struct sk_buff *skbn;
+
+                       if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16)))
+                               break;
+
+                       dgram_offset = le16_to_cpu(dpe16.wDatagramIndex);
+                       dgram_len = le16_to_cpu(dpe16.wDatagramLength);
+
+                       if (!dgram_offset || !dgram_len)
+                               break; /* null terminator */
+
+                       skbn = netdev_alloc_skb(link->ndev, dgram_len);
+                       if (!skbn)
+                               continue;
+
+                       skb_put(skbn, dgram_len);
+                       skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len);
+
+                       switch (skbn->data[0] & 0xf0) {
+                       case 0x40:
+                               skbn->protocol = htons(ETH_P_IP);
+                               break;
+                       case 0x60:
+                               skbn->protocol = htons(ETH_P_IPV6);
+                               break;
+                       default:
+                               net_err_ratelimited("%s: unknown protocol\n",
+                                                   link->ndev->name);
+                               dev_kfree_skb_any(skbn);
+                               u64_stats_update_begin(&link->rx_syncp);
+                               u64_stats_inc(&link->rx_errors);
+                               u64_stats_update_end(&link->rx_syncp);
+                               continue;
+                       }
+
+                       u64_stats_update_begin(&link->rx_syncp);
+                       u64_stats_inc(&link->rx_packets);
+                       u64_stats_add(&link->rx_bytes, skbn->len);
+                       u64_stats_update_end(&link->rx_syncp);
+
+                       netif_rx(skbn);
+               }
+unlock:
+               rcu_read_unlock();
+next_ndp:
+               /* Other NDP to process? */
+               ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex);
+               if (!ndpoffset)
+                       break;
+       }
+
+       /* free skb */
+       dev_consume_skb_any(skb);
+       return;
+error:
+       dev_kfree_skb_any(skb);
+}
+
+static struct sk_buff *mhi_net_skb_agg(struct mhi_mbim_context *mbim,
+                                      struct sk_buff *skb)
+{
+       struct sk_buff *head = mbim->skbagg_head;
+       struct sk_buff *tail = mbim->skbagg_tail;
+
+       /* This is non-paged skb chaining using frag_list */
+       if (!head) {
+               mbim->skbagg_head = skb;
+               return skb;
+       }
+
+       if (!skb_shinfo(head)->frag_list)
+               skb_shinfo(head)->frag_list = skb;
+       else
+               tail->next = skb;
+
+       head->len += skb->len;
+       head->data_len += skb->len;
+       head->truesize += skb->truesize;
+
+       mbim->skbagg_tail = skb;
+
+       return mbim->skbagg_head;
+}
+
+static void mhi_net_rx_refill_work(struct work_struct *work)
+{
+       struct mhi_mbim_context *mbim = container_of(work, struct mhi_mbim_context,
+                                                    rx_refill.work);
+       struct mhi_device *mdev = mbim->mdev;
+       int err;
+
+       while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) {
+               struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL);
+
+               if (unlikely(!skb))
+                       break;
+
+               err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb,
+                                   MHI_DEFAULT_MRU, MHI_EOT);
+               if (unlikely(err)) {
+                       kfree_skb(skb);
+                       break;
+               }
+
+               /* Do not hog the CPU if rx buffers are consumed faster than
+                * queued (unlikely).
+                */
+               cond_resched();
+       }
+
+       /* If we're still starved of rx buffers, reschedule later */
+       if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mbim->rx_queue_sz)
+               schedule_delayed_work(&mbim->rx_refill, HZ / 2);
+}
+
+static void mhi_mbim_dl_callback(struct mhi_device *mhi_dev,
+                                struct mhi_result *mhi_res)
+{
+       struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+       struct sk_buff *skb = mhi_res->buf_addr;
+       int free_desc_count;
+
+       free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       if (unlikely(mhi_res->transaction_status)) {
+               switch (mhi_res->transaction_status) {
+               case -EOVERFLOW:
+                       /* Packet has been split over multiple transfers */
+                       skb_put(skb, mhi_res->bytes_xferd);
+                       mhi_net_skb_agg(mbim, skb);
+                       break;
+               case -ENOTCONN:
+                       /* MHI layer stopping/resetting the DL channel */
+                       dev_kfree_skb_any(skb);
+                       return;
+               default:
+                       /* Unknown error, simply drop */
+                       dev_kfree_skb_any(skb);
+               }
+       } else {
+               skb_put(skb, mhi_res->bytes_xferd);
+
+               if (mbim->skbagg_head) {
+                       /* Aggregate the final fragment */
+                       skb = mhi_net_skb_agg(mbim, skb);
+                       mbim->skbagg_head = NULL;
+               }
+
+               mhi_mbim_rx(mbim, skb);
+       }
+
+       /* Refill if RX buffers queue becomes low */
+       if (free_desc_count >= mbim->rx_queue_sz / 2)
+               schedule_delayed_work(&mbim->rx_refill, 0);
+}
+
+static void mhi_mbim_ndo_get_stats64(struct net_device *ndev,
+                                    struct rtnl_link_stats64 *stats)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+       unsigned int start;
+
+       do {
+               start = u64_stats_fetch_begin_irq(&link->rx_syncp);
+               stats->rx_packets = u64_stats_read(&link->rx_packets);
+               stats->rx_bytes = u64_stats_read(&link->rx_bytes);
+               stats->rx_errors = u64_stats_read(&link->rx_errors);
+       } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start));
+
+       do {
+               start = u64_stats_fetch_begin_irq(&link->tx_syncp);
+               stats->tx_packets = u64_stats_read(&link->tx_packets);
+               stats->tx_bytes = u64_stats_read(&link->tx_bytes);
+               stats->tx_errors = u64_stats_read(&link->tx_errors);
+               stats->tx_dropped = u64_stats_read(&link->tx_dropped);
+       } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start));
+}
+
+static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev,
+                                struct mhi_result *mhi_res)
+{
+       struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+       struct sk_buff *skb = mhi_res->buf_addr;
+       struct net_device *ndev = skb->dev;
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+       /* Hardware has consumed the buffer, so free the skb (which is not
+        * freed by the MHI stack) and perform accounting.
+        */
+       dev_consume_skb_any(skb);
+
+       u64_stats_update_begin(&link->tx_syncp);
+       if (unlikely(mhi_res->transaction_status)) {
+               /* MHI layer stopping/resetting the UL channel */
+               if (mhi_res->transaction_status == -ENOTCONN) {
+                       u64_stats_update_end(&link->tx_syncp);
+                       return;
+               }
+
+               u64_stats_inc(&link->tx_errors);
+       } else {
+               u64_stats_inc(&link->tx_packets);
+               u64_stats_add(&link->tx_bytes, mhi_res->bytes_xferd);
+       }
+       u64_stats_update_end(&link->tx_syncp);
+
+       if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE))
+               netif_wake_queue(ndev);
+}
+
+static int mhi_mbim_ndo_open(struct net_device *ndev)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+       /* Feed the MHI rx buffer pool */
+       schedule_delayed_work(&link->mbim->rx_refill, 0);
+
+       /* Carrier is established via out-of-band channel (e.g. qmi) */
+       netif_carrier_on(ndev);
+
+       netif_start_queue(ndev);
+
+       return 0;
+}
+
+static int mhi_mbim_ndo_stop(struct net_device *ndev)
+{
+       netif_stop_queue(ndev);
+       netif_carrier_off(ndev);
+
+       return 0;
+}
+
+static const struct net_device_ops mhi_mbim_ndo = {
+       .ndo_open = mhi_mbim_ndo_open,
+       .ndo_stop = mhi_mbim_ndo_stop,
+       .ndo_start_xmit = mhi_mbim_ndo_xmit,
+       .ndo_get_stats64 = mhi_mbim_ndo_get_stats64,
+};
+
+static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
+                           struct netlink_ext_ack *extack)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+       struct mhi_mbim_context *mbim = ctxt;
+
+       link->session = if_id;
+       link->mbim = mbim;
+       link->ndev = ndev;
+       u64_stats_init(&link->rx_syncp);
+       u64_stats_init(&link->tx_syncp);
+
+       rcu_read_lock();
+       if (mhi_mbim_get_link_rcu(mbim, if_id)) {
+               rcu_read_unlock();
+               return -EEXIST;
+       }
+       rcu_read_unlock();
+
+       /* Already protected by RTNL lock */
+       hlist_add_head_rcu(&link->hlnode, &mbim->link_list[LINK_HASH(if_id)]);
+
+       return register_netdevice(ndev);
+}
+
+static void mhi_mbim_dellink(void *ctxt, struct net_device *ndev,
+                            struct list_head *head)
+{
+       struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev);
+
+       hlist_del_init_rcu(&link->hlnode);
+       synchronize_rcu();
+
+       unregister_netdevice_queue(ndev, head);
+}
+
+static void mhi_mbim_setup(struct net_device *ndev)
+{
+       ndev->header_ops = NULL;  /* No header */
+       ndev->type = ARPHRD_RAWIP;
+       ndev->needed_headroom = sizeof(struct mbim_tx_hdr);
+       ndev->hard_header_len = 0;
+       ndev->addr_len = 0;
+       ndev->flags = IFF_POINTOPOINT | IFF_NOARP;
+       ndev->netdev_ops = &mhi_mbim_ndo;
+       ndev->mtu = MHI_MBIM_DEFAULT_MTU;
+       ndev->min_mtu = ETH_MIN_MTU;
+       ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom;
+       ndev->tx_queue_len = 1000;
+}
+
+static const struct wwan_ops mhi_mbim_wwan_ops = {
+       .priv_size = sizeof(struct mhi_mbim_link),
+       .setup = mhi_mbim_setup,
+       .newlink = mhi_mbim_newlink,
+       .dellink = mhi_mbim_dellink,
+};
+
+static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+       struct mhi_mbim_context *mbim;
+       int err;
+
+       mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL);
+       if (!mbim)
+               return -ENOMEM;
+
+       spin_lock_init(&mbim->tx_lock);
+       dev_set_drvdata(&mhi_dev->dev, mbim);
+       mbim->mdev = mhi_dev;
+       mbim->mru = mhi_dev->mhi_cntrl->mru ? mhi_dev->mhi_cntrl->mru : MHI_DEFAULT_MRU;
+
+       INIT_DELAYED_WORK(&mbim->rx_refill, mhi_net_rx_refill_work);
+
+       /* Start MHI channels */
+       err = mhi_prepare_for_transfer(mhi_dev, 0);
+       if (err)
+               return err;
+
+       /* Number of transfer descriptors determines size of the queue */
+       mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+
+       /* Register wwan link ops with MHI controller representing WWAN instance */
+       return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0);
+}
+
+static void mhi_mbim_remove(struct mhi_device *mhi_dev)
+{
+       struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev);
+       struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
+
+       mhi_unprepare_from_transfer(mhi_dev);
+       cancel_delayed_work_sync(&mbim->rx_refill);
+       wwan_unregister_ops(&cntrl->mhi_dev->dev);
+       kfree_skb(mbim->skbagg_head);
+       dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static const struct mhi_device_id mhi_mbim_id_table[] = {
+       /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */
+       { .chan = "IP_HW0_MBIM", .driver_data = 0 },
+       {}
+};
+MODULE_DEVICE_TABLE(mhi, mhi_mbim_id_table);
+
+static struct mhi_driver mhi_mbim_driver = {
+       .probe = mhi_mbim_probe,
+       .remove = mhi_mbim_remove,
+       .dl_xfer_cb = mhi_mbim_dl_callback,
+       .ul_xfer_cb = mhi_mbim_ul_callback,
+       .id_table = mhi_mbim_id_table,
+       .driver = {
+               .name = "mhi_wwan_mbim",
+               .owner = THIS_MODULE,
+       },
+};
+
+module_mhi_driver(mhi_mbim_driver);
+
+MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>");
+MODULE_DESCRIPTION("Network/MBIM over MHI");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c

index 1575467..85bf8d5 100644 (file)
--- a/drivers/nfc/nfcsim.c
+++ b/drivers/nfc/nfcsim.c
@@ -192,8 +192,7 @@ static void nfcsim_recv_wq(struct work_struct *work)
  
                 if (!IS_ERR(skb))
                         dev_kfree_skb(skb);
-
-               skb = ERR_PTR(-ENODEV);
+               return;
         }
  
         dev->cb(dev->nfc_digital_dev, dev->arg, skb);
diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c

index 1421ffd..1af7a1e 100644 (file)
--- a/drivers/nfc/s3fwrn5/firmware.c
+++ b/drivers/nfc/s3fwrn5/firmware.c
@@ -422,7 +422,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info)
         tfm = crypto_alloc_shash("sha1", 0, 0);
         if (IS_ERR(tfm)) {
                 dev_err(&fw_info->ndev->nfc_dev->dev,
-                       "Cannot allocate shash (code=%d)\n", ret);
+                       "Cannot allocate shash (code=%pe)\n", tfm);
                 return PTR_ERR(tfm);
         }
  
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index 11779be..dfd9dec 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -900,7 +900,10 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
                 cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
         cmnd->write_zeroes.length =
                 cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-       cmnd->write_zeroes.control = 0;
+       if (nvme_ns_has_pi(ns))
+               cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT);
+       else
+               cmnd->write_zeroes.control = 0;
         return BLK_STS_OK;
  }
  
@@ -3807,6 +3810,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid,
  
  static void nvme_ns_remove(struct nvme_ns *ns)
  {
+       bool last_path = false;
+
         if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
                 return;
  
@@ -3815,8 +3820,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
  
         mutex_lock(&ns->ctrl->subsys->lock);
         list_del_rcu(&ns->siblings);
-       if (list_empty(&ns->head->list))
-               list_del_init(&ns->head->entry);
         mutex_unlock(&ns->ctrl->subsys->lock);
  
         synchronize_rcu(); /* guarantee not available in head->list */
@@ -3836,7 +3839,15 @@ static void nvme_ns_remove(struct nvme_ns *ns)
         list_del_init(&ns->list);
         up_write(&ns->ctrl->namespaces_rwsem);
  
-       nvme_mpath_check_last_path(ns);
+       /* Synchronize with nvme_init_ns_head() */
+       mutex_lock(&ns->head->subsys->lock);
+       if (list_empty(&ns->head->list)) {
+               list_del_init(&ns->head->entry);
+               last_path = true;
+       }
+       mutex_unlock(&ns->head->subsys->lock);
+       if (last_path)
+               nvme_mpath_shutdown_disk(ns->head);
         nvme_put_ns(ns);
  }
  
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c

index 0ea5298..3f32c5e 100644 (file)
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -760,14 +760,21 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
  #endif
  }
  
-void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
  {
         if (!head->disk)
                 return;
+       kblockd_schedule_work(&head->requeue_work);
         if (head->disk->flags & GENHD_FL_UP) {
                 nvme_cdev_del(&head->cdev, &head->cdev_device);
                 del_gendisk(head->disk);
         }
+}
+
+void nvme_mpath_remove_disk(struct nvme_ns_head *head)
+{
+       if (!head->disk)
+               return;
         blk_set_queue_dying(head->disk->queue);
         /* make sure all pending bios are cleaned up */
         kblockd_schedule_work(&head->requeue_work);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h

index 18ef8dd..5cd1fa3 100644 (file)
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -716,14 +716,7 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
  void nvme_mpath_stop(struct nvme_ctrl *ctrl);
  bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
  void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
-
-static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
-{
-       struct nvme_ns_head *head = ns->head;
-
-       if (head->disk && list_empty(&head->list))
-               kblockd_schedule_work(&head->requeue_work);
-}
+void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
  
  static inline void nvme_trace_bio_complete(struct request *req)
  {
@@ -772,7 +765,7 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
  static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
  {
  }
-static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
  {
  }
  static inline void nvme_trace_bio_complete(struct request *req)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index 320051f..5185208 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2631,7 +2631,9 @@ static void nvme_reset_work(struct work_struct *work)
         bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
         int result;
  
-       if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
+       if (dev->ctrl.state != NVME_CTRL_RESETTING) {
+               dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
+                        dev->ctrl.state);
                 result = -ENODEV;
                 goto out;
         }
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h

index daaf700..35bac7a 100644 (file)
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -56,7 +56,7 @@ TRACE_EVENT(nvme_setup_cmd,
                 __field(u8, fctype)
                 __field(u16, cid)
                 __field(u32, nsid)
-               __field(u64, metadata)
+               __field(bool, metadata)
                 __array(u8, cdw10, 24)
             ),
             TP_fast_assign(
@@ -66,13 +66,13 @@ TRACE_EVENT(nvme_setup_cmd,
                 __entry->flags = cmd->common.flags;
                 __entry->cid = cmd->common.command_id;
                 __entry->nsid = le32_to_cpu(cmd->common.nsid);
-               __entry->metadata = le64_to_cpu(cmd->common.metadata);
+               __entry->metadata = !!blk_integrity_rq(req);
                 __entry->fctype = cmd->fabrics.fctype;
                 __assign_disk_name(__entry->disk, req->rq_disk);
                 memcpy(__entry->cdw10, &cmd->common.cdw10,
                         sizeof(__entry->cdw10));
             ),
-           TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)",
+           TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%x, cmd=(%s %s)",
                       __entry->ctrl_id, __print_disk_name(__entry->disk),
                       __entry->qid, __entry->cid, __entry->nsid,
                       __entry->flags, __entry->metadata,
diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c

index 85887d8..192c904 100644 (file)
--- a/drivers/pcmcia/i82092.c
+++ b/drivers/pcmcia/i82092.c
@@ -112,6 +112,7 @@ static int i82092aa_pci_probe(struct pci_dev *dev,
         for (i = 0; i < socket_count; i++) {
                 sockets[i].card_state = 1; /* 1 = present but empty */
                 sockets[i].io_base = pci_resource_start(dev, 0);
+               sockets[i].dev = dev;
                 sockets[i].socket.features |= SS_CAP_PCCARD;
                 sockets[i].socket.map_size = 0x1000;
                 sockets[i].socket.irq_mask = 0;
diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c

index b9da58e..3481479 100644 (file)
--- a/drivers/platform/x86/amd-pmc.c
+++ b/drivers/platform/x86/amd-pmc.c
@@ -46,34 +46,79 @@
  #define AMD_PMC_RESULT_CMD_UNKNOWN           0xFE
  #define AMD_PMC_RESULT_FAILED                0xFF
  
+/* FCH SSC Registers */
+#define FCH_S0I3_ENTRY_TIME_L_OFFSET   0x30
+#define FCH_S0I3_ENTRY_TIME_H_OFFSET   0x34
+#define FCH_S0I3_EXIT_TIME_L_OFFSET    0x38
+#define FCH_S0I3_EXIT_TIME_H_OFFSET    0x3C
+#define FCH_SSC_MAPPING_SIZE           0x800
+#define FCH_BASE_PHY_ADDR_LOW          0xFED81100
+#define FCH_BASE_PHY_ADDR_HIGH         0x00000000
+
+/* SMU Message Definations */
+#define SMU_MSG_GETSMUVERSION          0x02
+#define SMU_MSG_LOG_GETDRAM_ADDR_HI    0x04
+#define SMU_MSG_LOG_GETDRAM_ADDR_LO    0x05
+#define SMU_MSG_LOG_START              0x06
+#define SMU_MSG_LOG_RESET              0x07
+#define SMU_MSG_LOG_DUMP_DATA          0x08
+#define SMU_MSG_GET_SUP_CONSTRAINTS    0x09
  /* List of supported CPU ids */
  #define AMD_CPU_ID_RV                  0x15D0
  #define AMD_CPU_ID_RN                  0x1630
  #define AMD_CPU_ID_PCO                 AMD_CPU_ID_RV
  #define AMD_CPU_ID_CZN                 AMD_CPU_ID_RN
+#define AMD_CPU_ID_YC                  0x14B5
  
-#define AMD_SMU_FW_VERSION             0x0
  #define PMC_MSG_DELAY_MIN_US           100
  #define RESPONSE_REGISTER_LOOP_MAX     200
  
+#define SOC_SUBSYSTEM_IP_MAX   12
+#define DELAY_MIN_US           2000
+#define DELAY_MAX_US           3000
  enum amd_pmc_def {
         MSG_TEST = 0x01,
         MSG_OS_HINT_PCO,
         MSG_OS_HINT_RN,
  };
  
+struct amd_pmc_bit_map {
+       const char *name;
+       u32 bit_mask;
+};
+
+static const struct amd_pmc_bit_map soc15_ip_blk[] = {
+       {"DISPLAY",     BIT(0)},
+       {"CPU",         BIT(1)},
+       {"GFX",         BIT(2)},
+       {"VDD",         BIT(3)},
+       {"ACP",         BIT(4)},
+       {"VCN",         BIT(5)},
+       {"ISP",         BIT(6)},
+       {"NBIO",        BIT(7)},
+       {"DF",          BIT(8)},
+       {"USB0",        BIT(9)},
+       {"USB1",        BIT(10)},
+       {"LAPIC",       BIT(11)},
+       {}
+};
+
  struct amd_pmc_dev {
         void __iomem *regbase;
-       void __iomem *smu_base;
+       void __iomem *smu_virt_addr;
+       void __iomem *fch_virt_addr;
         u32 base_addr;
         u32 cpu_id;
+       u32 active_ips;
         struct device *dev;
+       struct mutex lock; /* generic mutex lock */
  #if IS_ENABLED(CONFIG_DEBUG_FS)
         struct dentry *dbgfs_dir;
  #endif /* CONFIG_DEBUG_FS */
  };
  
  static struct amd_pmc_dev pmc;
+static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret);
  
  static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset)
  {
@@ -85,18 +130,77 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3
         iowrite32(val, dev->regbase + reg_offset);
  }
  
+struct smu_metrics {
+       u32 table_version;
+       u32 hint_count;
+       u32 s0i3_cyclecount;
+       u32 timein_s0i2;
+       u64 timeentering_s0i3_lastcapture;
+       u64 timeentering_s0i3_totaltime;
+       u64 timeto_resume_to_os_lastcapture;
+       u64 timeto_resume_to_os_totaltime;
+       u64 timein_s0i3_lastcapture;
+       u64 timein_s0i3_totaltime;
+       u64 timein_swdrips_lastcapture;
+       u64 timein_swdrips_totaltime;
+       u64 timecondition_notmet_lastcapture[SOC_SUBSYSTEM_IP_MAX];
+       u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX];
+} __packed;
+
  #ifdef CONFIG_DEBUG_FS
  static int smu_fw_info_show(struct seq_file *s, void *unused)
  {
         struct amd_pmc_dev *dev = s->private;
-       u32 value;
+       struct smu_metrics table;
+       int idx;
+
+       if (dev->cpu_id == AMD_CPU_ID_PCO)
+               return -EINVAL;
+
+       memcpy_fromio(&table, dev->smu_virt_addr, sizeof(struct smu_metrics));
+
+       seq_puts(s, "\n=== SMU Statistics ===\n");
+       seq_printf(s, "Table Version: %d\n", table.table_version);
+       seq_printf(s, "Hint Count: %d\n", table.hint_count);
+       seq_printf(s, "S0i3 Cycle Count: %d\n", table.s0i3_cyclecount);
+       seq_printf(s, "Time (in us) to S0i3: %lld\n", table.timeentering_s0i3_lastcapture);
+       seq_printf(s, "Time (in us) in S0i3: %lld\n", table.timein_s0i3_lastcapture);
+
+       seq_puts(s, "\n=== Active time (in us) ===\n");
+       for (idx = 0 ; idx < SOC_SUBSYSTEM_IP_MAX ; idx++) {
+               if (soc15_ip_blk[idx].bit_mask & dev->active_ips)
+                       seq_printf(s, "%-8s : %lld\n", soc15_ip_blk[idx].name,
+                                  table.timecondition_notmet_lastcapture[idx]);
+       }
  
-       value = ioread32(dev->smu_base + AMD_SMU_FW_VERSION);
-       seq_printf(s, "SMU FW Info: %x\n", value);
         return 0;
  }
  DEFINE_SHOW_ATTRIBUTE(smu_fw_info);
  
+static int s0ix_stats_show(struct seq_file *s, void *unused)
+{
+       struct amd_pmc_dev *dev = s->private;
+       u64 entry_time, exit_time, residency;
+
+       entry_time = ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_H_OFFSET);
+       entry_time = entry_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_L_OFFSET);
+
+       exit_time = ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_H_OFFSET);
+       exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET);
+
+       /* It's in 48MHz. We need to convert it */
+       residency = exit_time - entry_time;
+       do_div(residency, 48);
+
+       seq_puts(s, "=== S0ix statistics ===\n");
+       seq_printf(s, "S0ix Entry Time: %lld\n", entry_time);
+       seq_printf(s, "S0ix Exit Time: %lld\n", exit_time);
+       seq_printf(s, "Residency Time: %lld\n", residency);
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(s0ix_stats);
+
  static void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev)
  {
         debugfs_remove_recursive(dev->dbgfs_dir);
@@ -107,6 +211,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
         dev->dbgfs_dir = debugfs_create_dir("amd_pmc", NULL);
         debugfs_create_file("smu_fw_info", 0644, dev->dbgfs_dir, dev,
                             &smu_fw_info_fops);
+       debugfs_create_file("s0ix_stats", 0644, dev->dbgfs_dir, dev,
+                           &s0ix_stats_fops);
  }
  #else
  static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev)
@@ -118,6 +224,32 @@ static inline void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev)
  }
  #endif /* CONFIG_DEBUG_FS */
  
+static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev)
+{
+       u32 phys_addr_low, phys_addr_hi;
+       u64 smu_phys_addr;
+
+       if (dev->cpu_id == AMD_CPU_ID_PCO)
+               return -EINVAL;
+
+       /* Get Active devices list from SMU */
+       amd_pmc_send_cmd(dev, 0, &dev->active_ips, SMU_MSG_GET_SUP_CONSTRAINTS, 1);
+
+       /* Get dram address */
+       amd_pmc_send_cmd(dev, 0, &phys_addr_low, SMU_MSG_LOG_GETDRAM_ADDR_LO, 1);
+       amd_pmc_send_cmd(dev, 0, &phys_addr_hi, SMU_MSG_LOG_GETDRAM_ADDR_HI, 1);
+       smu_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low);
+
+       dev->smu_virt_addr = devm_ioremap(dev->dev, smu_phys_addr, sizeof(struct smu_metrics));
+       if (!dev->smu_virt_addr)
+               return -ENOMEM;
+
+       /* Start the logging */
+       amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, 0);
+
+       return 0;
+}
+
  static void amd_pmc_dump_registers(struct amd_pmc_dev *dev)
  {
         u32 value;
@@ -132,19 +264,19 @@ static void amd_pmc_dump_registers(struct amd_pmc_dev *dev)
         dev_dbg(dev->dev, "AMD_PMC_REGISTER_MESSAGE:%x\n", value);
  }
  
-static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
+static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret)
  {
         int rc;
-       u8 msg;
         u32 val;
  
+       mutex_lock(&dev->lock);
         /* Wait until we get a valid response */
         rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE,
-                               val, val > 0, PMC_MSG_DELAY_MIN_US,
+                               val, val != 0, PMC_MSG_DELAY_MIN_US,
                                 PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
         if (rc) {
                 dev_err(dev->dev, "failed to talk to SMU\n");
-               return rc;
+               goto out_unlock;
         }
  
         /* Write zero to response register */
@@ -154,34 +286,91 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set)
         amd_pmc_reg_write(dev, AMD_PMC_REGISTER_ARGUMENT, set);
  
         /* Write message ID to message ID register */
-       msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO;
         amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg);
-       return 0;
+
+       /* Wait until we get a valid response */
+       rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE,
+                               val, val != 0, PMC_MSG_DELAY_MIN_US,
+                               PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX);
+       if (rc) {
+               dev_err(dev->dev, "SMU response timed out\n");
+               goto out_unlock;
+       }
+
+       switch (val) {
+       case AMD_PMC_RESULT_OK:
+               if (ret) {
+                       /* PMFW may take longer time to return back the data */
+                       usleep_range(DELAY_MIN_US, 10 * DELAY_MAX_US);
+                       *data = amd_pmc_reg_read(dev, AMD_PMC_REGISTER_ARGUMENT);
+               }
+               break;
+       case AMD_PMC_RESULT_CMD_REJECT_BUSY:
+               dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val);
+               rc = -EBUSY;
+               goto out_unlock;
+       case AMD_PMC_RESULT_CMD_UNKNOWN:
+               dev_err(dev->dev, "SMU cmd unknown. err: 0x%x\n", val);
+               rc = -EINVAL;
+               goto out_unlock;
+       case AMD_PMC_RESULT_CMD_REJECT_PREREQ:
+       case AMD_PMC_RESULT_FAILED:
+       default:
+               dev_err(dev->dev, "SMU cmd failed. err: 0x%x\n", val);
+               rc = -EIO;
+               goto out_unlock;
+       }
+
+out_unlock:
+       mutex_unlock(&dev->lock);
+       amd_pmc_dump_registers(dev);
+       return rc;
+}
+
+static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev)
+{
+       switch (dev->cpu_id) {
+       case AMD_CPU_ID_PCO:
+               return MSG_OS_HINT_PCO;
+       case AMD_CPU_ID_RN:
+       case AMD_CPU_ID_YC:
+               return MSG_OS_HINT_RN;
+       }
+       return -EINVAL;
  }
  
  static int __maybe_unused amd_pmc_suspend(struct device *dev)
  {
         struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
         int rc;
+       u8 msg;
+
+       /* Reset and Start SMU logging - to monitor the s0i3 stats */
+       amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0);
+       amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0);
  
-       rc = amd_pmc_send_cmd(pdev, 1);
+       msg = amd_pmc_get_os_hint(pdev);
+       rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0);
         if (rc)
                 dev_err(pdev->dev, "suspend failed\n");
  
-       amd_pmc_dump_registers(pdev);
-       return 0;
+       return rc;
  }
  
  static int __maybe_unused amd_pmc_resume(struct device *dev)
  {
         struct amd_pmc_dev *pdev = dev_get_drvdata(dev);
         int rc;
+       u8 msg;
+
+       /* Let SMU know that we are looking for stats */
+       amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_DUMP_DATA, 0);
  
-       rc = amd_pmc_send_cmd(pdev, 0);
+       msg = amd_pmc_get_os_hint(pdev);
+       rc = amd_pmc_send_cmd(pdev, 0, NULL, msg, 0);
         if (rc)
                 dev_err(pdev->dev, "resume failed\n");
  
-       amd_pmc_dump_registers(pdev);
         return 0;
  }
  
@@ -190,6 +379,7 @@ static const struct dev_pm_ops amd_pmc_pm_ops = {
  };
  
  static const struct pci_device_id pmc_pci_ids[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) },
         { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PCO) },
@@ -201,9 +391,8 @@ static int amd_pmc_probe(struct platform_device *pdev)
  {
         struct amd_pmc_dev *dev = &pmc;
         struct pci_dev *rdev;
-       u32 base_addr_lo;
-       u32 base_addr_hi;
-       u64 base_addr;
+       u32 base_addr_lo, base_addr_hi;
+       u64 base_addr, fch_phys_addr;
         int err;
         u32 val;
  
@@ -248,16 +437,25 @@ static int amd_pmc_probe(struct platform_device *pdev)
         pci_dev_put(rdev);
         base_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
  
-       dev->smu_base = devm_ioremap(dev->dev, base_addr, AMD_PMC_MAPPING_SIZE);
-       if (!dev->smu_base)
-               return -ENOMEM;
-
         dev->regbase = devm_ioremap(dev->dev, base_addr + AMD_PMC_BASE_ADDR_OFFSET,
                                     AMD_PMC_MAPPING_SIZE);
         if (!dev->regbase)
                 return -ENOMEM;
  
-       amd_pmc_dump_registers(dev);
+       mutex_init(&dev->lock);
+
+       /* Use FCH registers to get the S0ix stats */
+       base_addr_lo = FCH_BASE_PHY_ADDR_LOW;
+       base_addr_hi = FCH_BASE_PHY_ADDR_HIGH;
+       fch_phys_addr = ((u64)base_addr_hi << 32 | base_addr_lo);
+       dev->fch_virt_addr = devm_ioremap(dev->dev, fch_phys_addr, FCH_SSC_MAPPING_SIZE);
+       if (!dev->fch_virt_addr)
+               return -ENOMEM;
+
+       /* Use SMU to get the s0i3 debug stats */
+       err = amd_pmc_setup_smu_logging(dev);
+       if (err)
+               dev_err(dev->dev, "SMU debugging info not supported on this platform\n");
  
         platform_set_drvdata(pdev, dev);
         amd_pmc_dbgfs_register(dev);
@@ -269,11 +467,14 @@ static int amd_pmc_remove(struct platform_device *pdev)
         struct amd_pmc_dev *dev = platform_get_drvdata(pdev);
  
         amd_pmc_dbgfs_unregister(dev);
+       mutex_destroy(&dev->lock);
         return 0;
  }
  
  static const struct acpi_device_id amd_pmc_acpi_ids[] = {
         {"AMDI0005", 0},
+       {"AMDI0006", 0},
+       {"AMDI0007", 0},
         {"AMD0004", 0},
         { }
  };
diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c

index 5529d7b..fbb224a 100644 (file)
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -141,6 +141,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
  
  static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
         DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
+       DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"),
         DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"),
         DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
         DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c

index 078648a..e5fbe01 100644 (file)
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -25,6 +25,7 @@ static const struct acpi_device_id intel_hid_ids[] = {
         {"INT33D5", 0},
         {"INTC1051", 0},
         {"INTC1054", 0},
+       {"INTC1070", 0},
         {"", 0},
  };
  MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c

index 3671b5d..6cfed44 100644 (file)
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -571,6 +571,11 @@ static ssize_t current_value_store(struct kobject *kobj,
         else
                 ret = tlmi_save_bios_settings("");
  
+       if (!ret && !tlmi_priv.pending_changes) {
+               tlmi_priv.pending_changes = true;
+               /* let userland know it may need to check reboot pending again */
+               kobject_uevent(&tlmi_priv.class_dev->kobj, KOBJ_CHANGE);
+       }
  out:
         kfree(auth_str);
         kfree(set_str);
@@ -647,6 +652,14 @@ static struct kobj_type tlmi_pwd_setting_ktype = {
         .sysfs_ops      = &tlmi_kobj_sysfs_ops,
  };
  
+static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr,
+                                  char *buf)
+{
+       return sprintf(buf, "%d\n", tlmi_priv.pending_changes);
+}
+
+static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot);
+
  /* ---- Initialisation --------------------------------------------------------- */
  static void tlmi_release_attr(void)
  {
@@ -659,6 +672,7 @@ static void tlmi_release_attr(void)
                         kobject_put(&tlmi_priv.setting[i]->kobj);
                 }
         }
+       sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
         kset_unregister(tlmi_priv.attribute_kset);
  
         /* Authentication structures */
@@ -709,8 +723,8 @@ static int tlmi_sysfs_init(void)
  
                 /* Build attribute */
                 tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset;
-               ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype,
-                               NULL, "%s", tlmi_priv.setting[i]->display_name);
+               ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL,
+                                 "%s", tlmi_priv.setting[i]->display_name);
                 if (ret)
                         goto fail_create_attr;
  
@@ -719,6 +733,10 @@ static int tlmi_sysfs_init(void)
                         goto fail_create_attr;
         }
  
+       ret = sysfs_create_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr);
+       if (ret)
+               goto fail_create_attr;
+
         /* Create authentication entries */
         tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL,
                                                                 &tlmi_priv.class_dev->kobj);
@@ -727,8 +745,7 @@ static int tlmi_sysfs_init(void)
                 goto fail_create_attr;
         }
         tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset;
-       ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype,
-                       NULL, "%s", "Admin");
+       ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin");
         if (ret)
                 goto fail_create_attr;
  
@@ -737,8 +754,7 @@ static int tlmi_sysfs_init(void)
                 goto fail_create_attr;
  
         tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset;
-       ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype,
-                       NULL, "%s", "System");
+       ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "System");
         if (ret)
                 goto fail_create_attr;
  
@@ -818,6 +834,7 @@ static int tlmi_analyze(void)
                                 pr_info("Error retrieving possible values for %d : %s\n",
                                                 i, setting->display_name);
                 }
+               kobject_init(&setting->kobj, &tlmi_attr_setting_ktype);
                 tlmi_priv.setting[i] = setting;
                 tlmi_priv.settings_count++;
                 kfree(item);
@@ -844,10 +861,12 @@ static int tlmi_analyze(void)
         if (pwdcfg.password_state & TLMI_PAP_PWD)
                 tlmi_priv.pwd_admin->valid = true;
  
+       kobject_init(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype);
+
         tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL);
         if (!tlmi_priv.pwd_power) {
                 ret = -ENOMEM;
-               goto fail_clear_attr;
+               goto fail_free_pwd_admin;
         }
         strscpy(tlmi_priv.pwd_power->kbdlang, "us", TLMI_LANG_MAXLEN);
         tlmi_priv.pwd_power->encoding = TLMI_ENCODING_ASCII;
@@ -859,11 +878,19 @@ static int tlmi_analyze(void)
         if (pwdcfg.password_state & TLMI_POP_PWD)
                 tlmi_priv.pwd_power->valid = true;
  
+       kobject_init(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype);
+
         return 0;
  
+fail_free_pwd_admin:
+       kfree(tlmi_priv.pwd_admin);
  fail_clear_attr:
-       for (i = 0; i < TLMI_SETTINGS_COUNT; ++i)
-               kfree(tlmi_priv.setting[i]);
+       for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) {
+               if (tlmi_priv.setting[i]) {
+                       kfree(tlmi_priv.setting[i]->possible_values);
+                       kfree(tlmi_priv.setting[i]);
+               }
+       }
         return ret;
  }
  
diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h

index 6fa8da7..eb59884 100644 (file)
--- a/drivers/platform/x86/think-lmi.h
+++ b/drivers/platform/x86/think-lmi.h
@@ -60,6 +60,7 @@ struct think_lmi {
         bool can_get_bios_selections;
         bool can_set_bios_password;
         bool can_get_password_settings;
+       bool pending_changes;
  
         struct tlmi_attr_setting *setting[TLMI_SETTINGS_COUNT];
         struct device *class_dev;
diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c

index b010e4c..11c60a2 100644 (file)
--- a/drivers/platform/x86/wireless-hotkey.c
+++ b/drivers/platform/x86/wireless-hotkey.c
@@ -78,7 +78,7 @@ static int wl_add(struct acpi_device *device)
  
         err = wireless_input_setup();
         if (err)
-               pr_err("Failed to setup hp wireless hotkeys\n");
+               pr_err("Failed to setup wireless hotkeys\n");
  
         return err;
  }
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig

index 8c20e52..8b08745 100644 (file)
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -157,6 +157,13 @@ config PTP_1588_CLOCK_OCP
         tristate "OpenCompute TimeCard as PTP clock"
         depends on PTP_1588_CLOCK
         depends on HAS_IOMEM && PCI
+       depends on SPI && I2C && MTD
+       imply SPI_MEM
+       imply SPI_XILINX
+       imply MTD_SPI_NOR
+       imply I2C_XILINX
+       select SERIAL_8250
+
         default n
         help
           This driver adds support for an OpenCompute time card.
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c

index 0d1034e..92edf77 100644 (file)
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -6,15 +6,29 @@
  #include <linux/module.h>
  #include <linux/init.h>
  #include <linux/pci.h>
+#include <linux/serial_8250.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
  #include <linux/ptp_clock_kernel.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/xilinx_spi.h>
+#include <net/devlink.h>
+#include <linux/i2c.h>
+#include <linux/mtd/mtd.h>
  
-static const struct pci_device_id ptp_ocp_pcidev_id[] = {
-       { PCI_DEVICE(0x1d9b, 0x0400) },
-       { 0 }
-};
-MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+#ifndef PCI_VENDOR_ID_FACEBOOK
+#define PCI_VENDOR_ID_FACEBOOK 0x1d9b
+#endif
  
-#define OCP_REGISTER_OFFSET    0x01000000
+#ifndef PCI_DEVICE_ID_FACEBOOK_TIMECARD
+#define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400
+#endif
+
+static struct class timecard_class = {
+       .owner          = THIS_MODULE,
+       .name           = "timecard",
+};
  
  struct ocp_reg {
         u32     ctrl;
@@ -29,18 +43,29 @@ struct ocp_reg {
         u32     __pad1[2];
         u32     offset_ns;
         u32     offset_window_ns;
+       u32     __pad2[2];
+       u32     drift_ns;
+       u32     drift_window_ns;
+       u32     __pad3[6];
+       u32     servo_offset_p;
+       u32     servo_offset_i;
+       u32     servo_drift_p;
+       u32     servo_drift_i;
  };
  
  #define OCP_CTRL_ENABLE                BIT(0)
  #define OCP_CTRL_ADJUST_TIME   BIT(1)
  #define OCP_CTRL_ADJUST_OFFSET BIT(2)
+#define OCP_CTRL_ADJUST_DRIFT  BIT(3)
+#define OCP_CTRL_ADJUST_SERVO  BIT(8)
  #define OCP_CTRL_READ_TIME_REQ BIT(30)
  #define OCP_CTRL_READ_TIME_DONE        BIT(31)
  
  #define OCP_STATUS_IN_SYNC     BIT(0)
+#define OCP_STATUS_IN_HOLDOVER BIT(1)
  
  #define OCP_SELECT_CLK_NONE    0
-#define OCP_SELECT_CLK_REG     6
+#define OCP_SELECT_CLK_REG     0xfe
  
  struct tod_reg {
         u32     ctrl;
@@ -55,8 +80,6 @@ struct tod_reg {
         u32     leap;
  };
  
-#define TOD_REGISTER_OFFSET    0x01050000
-
  #define TOD_CTRL_PROTOCOL      BIT(28)
  #define TOD_CTRL_DISABLE_FMT_A BIT(17)
  #define TOD_CTRL_DISABLE_FMT_B BIT(16)
@@ -68,16 +91,264 @@ struct tod_reg {
  #define TOD_STATUS_UTC_VALID   BIT(8)
  #define TOD_STATUS_LEAP_VALID  BIT(16)
  
+struct ts_reg {
+       u32     enable;
+       u32     error;
+       u32     polarity;
+       u32     version;
+       u32     __pad0[4];
+       u32     cable_delay;
+       u32     __pad1[3];
+       u32     intr;
+       u32     intr_mask;
+       u32     event_count;
+       u32     __pad2[1];
+       u32     ts_count;
+       u32     time_ns;
+       u32     time_sec;
+       u32     data_width;
+       u32     data;
+};
+
+struct pps_reg {
+       u32     ctrl;
+       u32     status;
+       u32     __pad0[6];
+       u32     cable_delay;
+};
+
+#define PPS_STATUS_FILTER_ERR  BIT(0)
+#define PPS_STATUS_SUPERV_ERR  BIT(1)
+
+struct img_reg {
+       u32     version;
+};
+
+struct ptp_ocp_flash_info {
+       const char *name;
+       int pci_offset;
+       int data_size;
+       void *data;
+};
+
+struct ptp_ocp_ext_info {
+       const char *name;
+       int index;
+       irqreturn_t (*irq_fcn)(int irq, void *priv);
+       int (*enable)(void *priv, bool enable);
+};
+
+struct ptp_ocp_ext_src {
+       void __iomem            *mem;
+       struct ptp_ocp          *bp;
+       struct ptp_ocp_ext_info *info;
+       int                     irq_vec;
+};
+
  struct ptp_ocp {
         struct pci_dev          *pdev;
+       struct device           dev;
         spinlock_t              lock;
-       void __iomem            *base;
         struct ocp_reg __iomem  *reg;
         struct tod_reg __iomem  *tod;
+       struct pps_reg __iomem  *pps_to_ext;
+       struct pps_reg __iomem  *pps_to_clk;
+       struct ptp_ocp_ext_src  *pps;
+       struct ptp_ocp_ext_src  *ts0;
+       struct ptp_ocp_ext_src  *ts1;
+       struct img_reg __iomem  *image;
         struct ptp_clock        *ptp;
         struct ptp_clock_info   ptp_info;
+       struct platform_device  *i2c_ctrl;
+       struct platform_device  *spi_flash;
+       struct clk_hw           *i2c_clk;
+       struct timer_list       watchdog;
+       time64_t                gnss_lost;
+       int                     id;
+       int                     n_irqs;
+       int                     gnss_port;
+       int                     mac_port;       /* miniature atomic clock */
+       u8                      serial[6];
+       int                     flash_start;
+       bool                    has_serial;
  };
  
+struct ocp_resource {
+       unsigned long offset;
+       int size;
+       int irq_vec;
+       int (*setup)(struct ptp_ocp *bp, struct ocp_resource *r);
+       void *extra;
+       unsigned long bp_offset;
+};
+
+static int ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r);
+static int ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r);
+static irqreturn_t ptp_ocp_ts_irq(int irq, void *priv);
+static int ptp_ocp_ts_enable(void *priv, bool enable);
+
+#define bp_assign_entry(bp, res, val) ({                               \
+       uintptr_t addr = (uintptr_t)(bp) + (res)->bp_offset;            \
+       *(typeof(val) *)addr = val;                                     \
+})
+
+#define OCP_RES_LOCATION(member) \
+       .bp_offset = offsetof(struct ptp_ocp, member)
+
+#define OCP_MEM_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_mem
+
+#define OCP_SERIAL_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_serial
+
+#define OCP_I2C_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_i2c
+
+#define OCP_SPI_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_spi
+
+#define OCP_EXT_RESOURCE(member) \
+       OCP_RES_LOCATION(member), .setup = ptp_ocp_register_ext
+
+/* This is the MSI vector mapping used.
+ * 0: N/C
+ * 1: TS0
+ * 2: TS1
+ * 3: GPS
+ * 4: GPS2 (n/c)
+ * 5: MAC
+ * 6: SPI IMU (inertial measurement unit)
+ * 7: I2C oscillator
+ * 8: HWICAP
+ * 9: SPI Flash
+ */
+
+static struct ocp_resource ocp_fb_resource[] = {
+       {
+               OCP_MEM_RESOURCE(reg),
+               .offset = 0x01000000, .size = 0x10000,
+       },
+       {
+               OCP_EXT_RESOURCE(ts0),
+               .offset = 0x01010000, .size = 0x10000, .irq_vec = 1,
+               .extra = &(struct ptp_ocp_ext_info) {
+                       .name = "ts0", .index = 0,
+                       .irq_fcn = ptp_ocp_ts_irq,
+                       .enable = ptp_ocp_ts_enable,
+               },
+       },
+       {
+               OCP_EXT_RESOURCE(ts1),
+               .offset = 0x01020000, .size = 0x10000, .irq_vec = 2,
+               .extra = &(struct ptp_ocp_ext_info) {
+                       .name = "ts1", .index = 1,
+                       .irq_fcn = ptp_ocp_ts_irq,
+                       .enable = ptp_ocp_ts_enable,
+               },
+       },
+       {
+               OCP_MEM_RESOURCE(pps_to_ext),
+               .offset = 0x01030000, .size = 0x10000,
+       },
+       {
+               OCP_MEM_RESOURCE(pps_to_clk),
+               .offset = 0x01040000, .size = 0x10000,
+       },
+       {
+               OCP_MEM_RESOURCE(tod),
+               .offset = 0x01050000, .size = 0x10000,
+       },
+       {
+               OCP_MEM_RESOURCE(image),
+               .offset = 0x00020000, .size = 0x1000,
+       },
+       {
+               OCP_I2C_RESOURCE(i2c_ctrl),
+               .offset = 0x00150000, .size = 0x10000, .irq_vec = 7,
+       },
+       {
+               OCP_SERIAL_RESOURCE(gnss_port),
+               .offset = 0x00160000 + 0x1000, .irq_vec = 3,
+       },
+       {
+               OCP_SERIAL_RESOURCE(mac_port),
+               .offset = 0x00180000 + 0x1000, .irq_vec = 5,
+       },
+       {
+               OCP_SPI_RESOURCE(spi_flash),
+               .offset = 0x00310000, .size = 0x10000, .irq_vec = 9,
+               .extra = &(struct ptp_ocp_flash_info) {
+                       .name = "xilinx_spi", .pci_offset = 0,
+                       .data_size = sizeof(struct xspi_platform_data),
+                       .data = &(struct xspi_platform_data) {
+                               .num_chipselect = 1,
+                               .bits_per_word = 8,
+                               .num_devices = 1,
+                               .devices = &(struct spi_board_info) {
+                                       .modalias = "spi-nor",
+                               },
+                       },
+               },
+       },
+       {
+               .setup = ptp_ocp_fb_board_init,
+       },
+       { }
+};
+
+static const struct pci_device_id ptp_ocp_pcidev_id[] = {
+       { PCI_DEVICE_DATA(FACEBOOK, TIMECARD, &ocp_fb_resource) },
+       { 0 }
+};
+MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id);
+
+static DEFINE_MUTEX(ptp_ocp_lock);
+static DEFINE_IDR(ptp_ocp_idr);
+
+static struct {
+       const char *name;
+       int value;
+} ptp_ocp_clock[] = {
+       { .name = "NONE",       .value = 0 },
+       { .name = "TOD",        .value = 1 },
+       { .name = "IRIG",       .value = 2 },
+       { .name = "PPS",        .value = 3 },
+       { .name = "PTP",        .value = 4 },
+       { .name = "RTC",        .value = 5 },
+       { .name = "DCF",        .value = 6 },
+       { .name = "REGS",       .value = 0xfe },
+       { .name = "EXT",        .value = 0xff },
+};
+
+static const char *
+ptp_ocp_clock_name_from_val(int val)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++)
+               if (ptp_ocp_clock[i].value == val)
+                       return ptp_ocp_clock[i].name;
+       return NULL;
+}
+
+static int
+ptp_ocp_clock_val_from_name(const char *name)
+{
+       const char *clk;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+               clk = ptp_ocp_clock[i].name;
+               if (!strncasecmp(name, clk, strlen(clk)))
+                       return ptp_ocp_clock[i].value;
+       }
+       return -EINVAL;
+}
+
  static int
  __ptp_ocp_gettime_locked(struct ptp_ocp *bp, struct timespec64 *ts,
                          struct ptp_system_timestamp *sts)
@@ -192,6 +463,45 @@ ptp_ocp_null_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm)
         return -EOPNOTSUPP;
  }
  
+static int
+ptp_ocp_adjphase(struct ptp_clock_info *ptp_info, s32 phase_ns)
+{
+       return -EOPNOTSUPP;
+}
+
+static int
+ptp_ocp_enable(struct ptp_clock_info *ptp_info, struct ptp_clock_request *rq,
+              int on)
+{
+       struct ptp_ocp *bp = container_of(ptp_info, struct ptp_ocp, ptp_info);
+       struct ptp_ocp_ext_src *ext = NULL;
+       int err;
+
+       switch (rq->type) {
+       case PTP_CLK_REQ_EXTTS:
+               switch (rq->extts.index) {
+               case 0:
+                       ext = bp->ts0;
+                       break;
+               case 1:
+                       ext = bp->ts1;
+                       break;
+               }
+               break;
+       case PTP_CLK_REQ_PPS:
+               ext = bp->pps;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       err = -ENXIO;
+       if (ext)
+               err = ext->info->enable(ext, on);
+
+       return err;
+}
+
  static const struct ptp_clock_info ptp_ocp_clock_info = {
         .owner          = THIS_MODULE,
         .name           = KBUILD_MODNAME,
@@ -200,10 +510,57 @@ static const struct ptp_clock_info ptp_ocp_clock_info = {
         .settime64      = ptp_ocp_settime,
         .adjtime        = ptp_ocp_adjtime,
         .adjfine        = ptp_ocp_null_adjfine,
+       .adjphase       = ptp_ocp_adjphase,
+       .enable         = ptp_ocp_enable,
+       .pps            = true,
+       .n_ext_ts       = 2,
  };
  
+static void
+__ptp_ocp_clear_drift_locked(struct ptp_ocp *bp)
+{
+       u32 ctrl, select;
+
+       select = ioread32(&bp->reg->select);
+       iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select);
+
+       iowrite32(0, &bp->reg->drift_ns);
+
+       ctrl = ioread32(&bp->reg->ctrl);
+       ctrl |= OCP_CTRL_ADJUST_DRIFT;
+       iowrite32(ctrl, &bp->reg->ctrl);
+
+       /* restore clock selection */
+       iowrite32(select >> 16, &bp->reg->select);
+}
+
+static void
+ptp_ocp_watchdog(struct timer_list *t)
+{
+       struct ptp_ocp *bp = from_timer(bp, t, watchdog);
+       unsigned long flags;
+       u32 status;
+
+       status = ioread32(&bp->pps_to_clk->status);
+
+       if (status & PPS_STATUS_SUPERV_ERR) {
+               iowrite32(status, &bp->pps_to_clk->status);
+               if (!bp->gnss_lost) {
+                       spin_lock_irqsave(&bp->lock, flags);
+                       __ptp_ocp_clear_drift_locked(bp);
+                       spin_unlock_irqrestore(&bp->lock, flags);
+                       bp->gnss_lost = ktime_get_real_seconds();
+               }
+
+       } else if (bp->gnss_lost) {
+               bp->gnss_lost = 0;
+       }
+
+       mod_timer(&bp->watchdog, jiffies + HZ);
+}
+
  static int
-ptp_ocp_check_clock(struct ptp_ocp *bp)
+ptp_ocp_init_clock(struct ptp_ocp *bp)
  {
         struct timespec64 ts;
         bool sync;
@@ -214,6 +571,17 @@ ptp_ocp_check_clock(struct ptp_ocp *bp)
         ctrl |= OCP_CTRL_ENABLE;
         iowrite32(ctrl, &bp->reg->ctrl);
  
+       /* NO DRIFT Correction */
+       /* offset_p:i 1/8, offset_i: 1/16, drift_p: 0, drift_i: 0 */
+       iowrite32(0x2000, &bp->reg->servo_offset_p);
+       iowrite32(0x1000, &bp->reg->servo_offset_i);
+       iowrite32(0,      &bp->reg->servo_drift_p);
+       iowrite32(0,      &bp->reg->servo_drift_i);
+
+       /* latch servo values */
+       ctrl |= OCP_CTRL_ADJUST_SERVO;
+       iowrite32(ctrl, &bp->reg->ctrl);
+
         if ((ioread32(&bp->reg->ctrl) & OCP_CTRL_ENABLE) == 0) {
                 dev_err(&bp->pdev->dev, "clock not enabled\n");
                 return -ENODEV;
@@ -229,6 +597,9 @@ ptp_ocp_check_clock(struct ptp_ocp *bp)
                          ts.tv_sec, ts.tv_nsec,
                          sync ? "in-sync" : "UNSYNCED");
  
+       timer_setup(&bp->watchdog, ptp_ocp_watchdog, 0);
+       mod_timer(&bp->watchdog, jiffies + HZ);
+
         return 0;
  }
  
@@ -278,82 +649,839 @@ ptp_ocp_tod_info(struct ptp_ocp *bp)
                  reg & TOD_STATUS_LEAP_VALID ? 1 : 0);
  }
  
+static int
+ptp_ocp_firstchild(struct device *dev, void *data)
+{
+       return 1;
+}
+
+static int
+ptp_ocp_read_i2c(struct i2c_adapter *adap, u8 addr, u8 reg, u8 sz, u8 *data)
+{
+       struct i2c_msg msgs[2] = {
+               {
+                       .addr = addr,
+                       .len = 1,
+                       .buf = &reg,
+               },
+               {
+                       .addr = addr,
+                       .flags = I2C_M_RD,
+                       .len = 2,
+                       .buf = data,
+               },
+       };
+       int err;
+       u8 len;
+
+       /* xiic-i2c for some stupid reason only does 2 byte reads. */
+       while (sz) {
+               len = min_t(u8, sz, 2);
+               msgs[1].len = len;
+               err = i2c_transfer(adap, msgs, 2);
+               if (err != msgs[1].len)
+                       return err;
+               msgs[1].buf += len;
+               reg += len;
+               sz -= len;
+       }
+       return 0;
+}
+
+static void
+ptp_ocp_get_serial_number(struct ptp_ocp *bp)
+{
+       struct i2c_adapter *adap;
+       struct device *dev;
+       int err;
+
+       dev = device_find_child(&bp->i2c_ctrl->dev, NULL, ptp_ocp_firstchild);
+       if (!dev) {
+               dev_err(&bp->pdev->dev, "Can't find I2C adapter\n");
+               return;
+       }
+
+       adap = i2c_verify_adapter(dev);
+       if (!adap) {
+               dev_err(&bp->pdev->dev, "device '%s' isn't an I2C adapter\n",
+                       dev_name(dev));
+               goto out;
+       }
+
+       err = ptp_ocp_read_i2c(adap, 0x58, 0x9A, 6, bp->serial);
+       if (err) {
+               dev_err(&bp->pdev->dev, "could not read eeprom: %d\n", err);
+               goto out;
+       }
+
+       bp->has_serial = true;
+
+out:
+       put_device(dev);
+}
+
  static void
  ptp_ocp_info(struct ptp_ocp *bp)
  {
-       static const char * const clock_name[] = {
-               "NO", "TOD", "IRIG", "PPS", "PTP", "RTC", "REGS", "EXT"
-       };
         u32 version, select;
  
         version = ioread32(&bp->reg->version);
         select = ioread32(&bp->reg->select);
         dev_info(&bp->pdev->dev, "Version %d.%d.%d, clock %s, device ptp%d\n",
                  version >> 24, (version >> 16) & 0xff, version & 0xffff,
-                clock_name[select & 7],
+                ptp_ocp_clock_name_from_val(select >> 16),
                  ptp_clock_index(bp->ptp));
  
         ptp_ocp_tod_info(bp);
  }
  
+static struct device *
+ptp_ocp_find_flash(struct ptp_ocp *bp)
+{
+       struct device *dev, *last;
+
+       last = NULL;
+       dev = &bp->spi_flash->dev;
+
+       while ((dev = device_find_child(dev, NULL, ptp_ocp_firstchild))) {
+               if (!strcmp("mtd", dev_bus_name(dev)))
+                       break;
+               put_device(last);
+               last = dev;
+       }
+       put_device(last);
+
+       return dev;
+}
+
  static int
-ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ptp_ocp_devlink_flash(struct devlink *devlink, struct device *dev,
+                     const struct firmware *fw)
  {
-       struct ptp_ocp *bp;
+       struct mtd_info *mtd = dev_get_drvdata(dev);
+       struct ptp_ocp *bp = devlink_priv(devlink);
+       size_t off, len, resid, wrote;
+       struct erase_info erase;
+       size_t base, blksz;
+       int err;
+
+       off = 0;
+       base = bp->flash_start;
+       blksz = 4096;
+       resid = fw->size;
+
+       while (resid) {
+               devlink_flash_update_status_notify(devlink, "Flashing",
+                                                  NULL, off, fw->size);
+
+               len = min_t(size_t, resid, blksz);
+               erase.addr = base + off;
+               erase.len = blksz;
+
+               err = mtd_erase(mtd, &erase);
+               if (err)
+                       goto out;
+
+               err = mtd_write(mtd, base + off, len, &wrote, &fw->data[off]);
+               if (err)
+                       goto out;
+
+               off += blksz;
+               resid -= len;
+       }
+out:
+       return err;
+}
+
+static int
+ptp_ocp_devlink_flash_update(struct devlink *devlink,
+                            struct devlink_flash_update_params *params,
+                            struct netlink_ext_ack *extack)
+{
+       struct ptp_ocp *bp = devlink_priv(devlink);
+       struct device *dev;
+       const char *msg;
+       int err;
+
+       dev = ptp_ocp_find_flash(bp);
+       if (!dev) {
+               dev_err(&bp->pdev->dev, "Can't find Flash SPI adapter\n");
+               return -ENODEV;
+       }
+
+       devlink_flash_update_status_notify(devlink, "Preparing to flash",
+                                          NULL, 0, 0);
+
+       err = ptp_ocp_devlink_flash(devlink, dev, params->fw);
+
+       msg = err ? "Flash error" : "Flash complete";
+       devlink_flash_update_status_notify(devlink, msg, NULL, 0, 0);
+
+       put_device(dev);
+       return err;
+}
+
+static int
+ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
+                        struct netlink_ext_ack *extack)
+{
+       struct ptp_ocp *bp = devlink_priv(devlink);
+       char buf[32];
+       int err;
+
+       err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
+       if (err)
+               return err;
+
+       if (bp->image) {
+               u32 ver = ioread32(&bp->image->version);
+
+               if (ver & 0xffff) {
+                       sprintf(buf, "%d", ver);
+                       err = devlink_info_version_running_put(req,
+                                                              "fw",
+                                                              buf);
+               } else {
+                       sprintf(buf, "%d", ver >> 16);
+                       err = devlink_info_version_running_put(req,
+                                                              "loader",
+                                                              buf);
+               }
+               if (err)
+                       return err;
+       }
+
+       if (!bp->has_serial)
+               ptp_ocp_get_serial_number(bp);
+
+       if (bp->has_serial) {
+               sprintf(buf, "%pM", bp->serial);
+               err = devlink_info_serial_number_put(req, buf);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static const struct devlink_ops ptp_ocp_devlink_ops = {
+       .flash_update = ptp_ocp_devlink_flash_update,
+       .info_get = ptp_ocp_devlink_info_get,
+};
+
+static void __iomem *
+__ptp_ocp_get_mem(struct ptp_ocp *bp, unsigned long start, int size)
+{
+       struct resource res = DEFINE_RES_MEM_NAMED(start, size, "ptp_ocp");
+
+       return devm_ioremap_resource(&bp->pdev->dev, &res);
+}
+
+static void __iomem *
+ptp_ocp_get_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       unsigned long start;
+
+       start = pci_resource_start(bp->pdev, 0) + r->offset;
+       return __ptp_ocp_get_mem(bp, start, r->size);
+}
+
+static void
+ptp_ocp_set_irq_resource(struct resource *res, int irq)
+{
+       struct resource r = DEFINE_RES_IRQ(irq);
+       *res = r;
+}
+
+static void
+ptp_ocp_set_mem_resource(struct resource *res, unsigned long start, int size)
+{
+       struct resource r = DEFINE_RES_MEM(start, size);
+       *res = r;
+}
+
+static int
+ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct ptp_ocp_flash_info *info;
+       struct pci_dev *pdev = bp->pdev;
+       struct platform_device *p;
+       struct resource res[2];
+       unsigned long start;
+       int id;
+
+       /* XXX hack to work around old FPGA */
+       if (bp->n_irqs < 10) {
+               dev_err(&bp->pdev->dev, "FPGA does not have SPI devices\n");
+               return 0;
+       }
+
+       if (r->irq_vec > bp->n_irqs) {
+               dev_err(&bp->pdev->dev, "spi device irq %d out of range\n",
+                       r->irq_vec);
+               return 0;
+       }
+
+       start = pci_resource_start(pdev, 0) + r->offset;
+       ptp_ocp_set_mem_resource(&res[0], start, r->size);
+       ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+       info = r->extra;
+       id = pci_dev_id(pdev) << 1;
+       id += info->pci_offset;
+
+       p = platform_device_register_resndata(&pdev->dev, info->name, id,
+                                             res, 2, info->data,
+                                             info->data_size);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       bp_assign_entry(bp, r, p);
+
+       return 0;
+}
+
+static struct platform_device *
+ptp_ocp_i2c_bus(struct pci_dev *pdev, struct ocp_resource *r, int id)
+{
+       struct resource res[2];
+       unsigned long start;
+
+       start = pci_resource_start(pdev, 0) + r->offset;
+       ptp_ocp_set_mem_resource(&res[0], start, r->size);
+       ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec));
+
+       return platform_device_register_resndata(&pdev->dev, "xiic-i2c",
+                                                id, res, 2, NULL, 0);
+}
+
+static int
+ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct pci_dev *pdev = bp->pdev;
+       struct platform_device *p;
+       struct clk_hw *clk;
+       char buf[32];
+       int id;
+
+       if (r->irq_vec > bp->n_irqs) {
+               dev_err(&bp->pdev->dev, "i2c device irq %d out of range\n",
+                       r->irq_vec);
+               return 0;
+       }
+
+       id = pci_dev_id(bp->pdev);
+
+       sprintf(buf, "AXI.%d", id);
+       clk = clk_hw_register_fixed_rate(&pdev->dev, buf, NULL, 0, 50000000);
+       if (IS_ERR(clk))
+               return PTR_ERR(clk);
+       bp->i2c_clk = clk;
+
+       sprintf(buf, "xiic-i2c.%d", id);
+       devm_clk_hw_register_clkdev(&pdev->dev, clk, NULL, buf);
+       p = ptp_ocp_i2c_bus(bp->pdev, r, id);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       bp_assign_entry(bp, r, p);
+
+       return 0;
+}
+
+static irqreturn_t
+ptp_ocp_ts_irq(int irq, void *priv)
+{
+       struct ptp_ocp_ext_src *ext = priv;
+       struct ts_reg __iomem *reg = ext->mem;
+       struct ptp_clock_event ev;
+       u32 sec, nsec;
+
+       /* XXX should fix API - this converts s/ns -> ts -> s/ns */
+       sec = ioread32(&reg->time_sec);
+       nsec = ioread32(&reg->time_ns);
+
+       ev.type = PTP_CLOCK_EXTTS;
+       ev.index = ext->info->index;
+       ev.timestamp = sec * 1000000000ULL + nsec;
+
+       ptp_clock_event(ext->bp->ptp, &ev);
+
+       iowrite32(1, &reg->intr);       /* write 1 to ack */
+
+       return IRQ_HANDLED;
+}
+
+static int
+ptp_ocp_ts_enable(void *priv, bool enable)
+{
+       struct ptp_ocp_ext_src *ext = priv;
+       struct ts_reg __iomem *reg = ext->mem;
+
+       if (enable) {
+               iowrite32(1, &reg->enable);
+               iowrite32(1, &reg->intr_mask);
+               iowrite32(1, &reg->intr);
+       } else {
+               iowrite32(0, &reg->intr_mask);
+               iowrite32(0, &reg->enable);
+       }
+
+       return 0;
+}
+
+static void
+ptp_ocp_unregister_ext(struct ptp_ocp_ext_src *ext)
+{
+       ext->info->enable(ext, false);
+       pci_free_irq(ext->bp->pdev, ext->irq_vec, ext);
+       kfree(ext);
+}
+
+static int
+ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct pci_dev *pdev = bp->pdev;
+       struct ptp_ocp_ext_src *ext;
         int err;
  
-       bp = kzalloc(sizeof(*bp), GFP_KERNEL);
-       if (!bp)
+       ext = kzalloc(sizeof(*ext), GFP_KERNEL);
+       if (!ext)
                 return -ENOMEM;
-       bp->pdev = pdev;
-       pci_set_drvdata(pdev, bp);
  
-       err = pci_enable_device(pdev);
+       err = -EINVAL;
+       ext->mem = ptp_ocp_get_mem(bp, r);
+       if (!ext->mem)
+               goto out;
+
+       ext->bp = bp;
+       ext->info = r->extra;
+       ext->irq_vec = r->irq_vec;
+
+       err = pci_request_irq(pdev, r->irq_vec, ext->info->irq_fcn, NULL,
+                             ext, "ocp%d.%s", bp->id, ext->info->name);
         if (err) {
-               dev_err(&pdev->dev, "pci_enable_device\n");
-               goto out_free;
+               dev_err(&pdev->dev, "Could not get irq %d\n", r->irq_vec);
+               goto out;
         }
  
-       err = pci_request_regions(pdev, KBUILD_MODNAME);
-       if (err) {
-               dev_err(&pdev->dev, "pci_request_region\n");
-               goto out_disable;
+       bp_assign_entry(bp, r, ext);
+
+       return 0;
+
+out:
+       kfree(ext);
+       return err;
+}
+
+static int
+ptp_ocp_serial_line(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       struct pci_dev *pdev = bp->pdev;
+       struct uart_8250_port uart;
+
+       /* Setting UPF_IOREMAP and leaving port.membase unspecified lets
+        * the serial port device claim and release the pci resource.
+        */
+       memset(&uart, 0, sizeof(uart));
+       uart.port.dev = &pdev->dev;
+       uart.port.iotype = UPIO_MEM;
+       uart.port.regshift = 2;
+       uart.port.mapbase = pci_resource_start(pdev, 0) + r->offset;
+       uart.port.irq = pci_irq_vector(pdev, r->irq_vec);
+       uart.port.uartclk = 50000000;
+       uart.port.flags = UPF_FIXED_TYPE | UPF_IOREMAP;
+       uart.port.type = PORT_16550A;
+
+       return serial8250_register_8250_port(&uart);
+}
+
+static int
+ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       int port;
+
+       if (r->irq_vec > bp->n_irqs) {
+               dev_err(&bp->pdev->dev, "serial device irq %d out of range\n",
+                       r->irq_vec);
+               return 0;
         }
  
-       bp->base = pci_ioremap_bar(pdev, 0);
-       if (!bp->base) {
-               dev_err(&pdev->dev, "io_remap bar0\n");
-               err = -ENOMEM;
-               goto out_release_regions;
+       port = ptp_ocp_serial_line(bp, r);
+       if (port < 0)
+               return port;
+
+       bp_assign_entry(bp, r, port);
+
+       return 0;
+}
+
+static int
+ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       void __iomem *mem;
+
+       mem = ptp_ocp_get_mem(bp, r);
+       if (!mem)
+               return -EINVAL;
+
+       bp_assign_entry(bp, r, mem);
+
+       return 0;
+}
+
+/* FB specific board initializers; last "resource" registered. */
+static int
+ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r)
+{
+       bp->flash_start = 1024 * 4096;
+
+       return ptp_ocp_init_clock(bp);
+}
+
+static int
+ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data)
+{
+       struct ocp_resource *r, *table;
+       int err = 0;
+
+       table = (struct ocp_resource *)driver_data;
+       for (r = table; r->setup; r++) {
+               err = r->setup(bp, r);
+               if (err)
+                       break;
+       }
+       return err;
+}
+
+static ssize_t
+serialnum_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+       if (!bp->has_serial)
+               ptp_ocp_get_serial_number(bp);
+
+       return sysfs_emit(buf, "%pM\n", bp->serial);
+}
+static DEVICE_ATTR_RO(serialnum);
+
+static ssize_t
+gnss_sync_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+       ssize_t ret;
+
+       if (bp->gnss_lost)
+               ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gnss_lost);
+       else
+               ret = sysfs_emit(buf, "SYNC\n");
+
+       return ret;
+}
+static DEVICE_ATTR_RO(gnss_sync);
+
+static ssize_t
+clock_source_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+       const char *p;
+       u32 select;
+
+       select = ioread32(&bp->reg->select);
+       p = ptp_ocp_clock_name_from_val(select >> 16);
+
+       return sysfs_emit(buf, "%s\n", p);
+}
+
+static ssize_t
+clock_source_store(struct device *dev, struct device_attribute *attr,
+                  const char *buf, size_t count)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+       unsigned long flags;
+       int val;
+
+       val = ptp_ocp_clock_val_from_name(buf);
+       if (val < 0)
+               return val;
+
+       spin_lock_irqsave(&bp->lock, flags);
+       iowrite32(val, &bp->reg->select);
+       spin_unlock_irqrestore(&bp->lock, flags);
+
+       return count;
+}
+static DEVICE_ATTR_RW(clock_source);
+
+static ssize_t
+available_clock_sources_show(struct device *dev,
+                            struct device_attribute *attr, char *buf)
+{
+       const char *clk;
+       ssize_t count;
+       int i;
+
+       count = 0;
+       for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) {
+               clk = ptp_ocp_clock[i].name;
+               count += sysfs_emit_at(buf, count, "%s ", clk);
+       }
+       if (count)
+               count--;
+       count += sysfs_emit_at(buf, count, "\n");
+       return count;
+}
+static DEVICE_ATTR_RO(available_clock_sources);
+
+static struct attribute *timecard_attrs[] = {
+       &dev_attr_serialnum.attr,
+       &dev_attr_gnss_sync.attr,
+       &dev_attr_clock_source.attr,
+       &dev_attr_available_clock_sources.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(timecard);
+
+static void
+ptp_ocp_dev_release(struct device *dev)
+{
+       struct ptp_ocp *bp = dev_get_drvdata(dev);
+
+       mutex_lock(&ptp_ocp_lock);
+       idr_remove(&ptp_ocp_idr, bp->id);
+       mutex_unlock(&ptp_ocp_lock);
+}
+
+static int
+ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev)
+{
+       int err;
+
+       mutex_lock(&ptp_ocp_lock);
+       err = idr_alloc(&ptp_ocp_idr, bp, 0, 0, GFP_KERNEL);
+       mutex_unlock(&ptp_ocp_lock);
+       if (err < 0) {
+               dev_err(&pdev->dev, "idr_alloc failed: %d\n", err);
+               return err;
         }
-       bp->reg = bp->base + OCP_REGISTER_OFFSET;
-       bp->tod = bp->base + TOD_REGISTER_OFFSET;
+       bp->id = err;
+
         bp->ptp_info = ptp_ocp_clock_info;
         spin_lock_init(&bp->lock);
+       bp->gnss_port = -1;
+       bp->mac_port = -1;
+       bp->pdev = pdev;
+
+       device_initialize(&bp->dev);
+       dev_set_name(&bp->dev, "ocp%d", bp->id);
+       bp->dev.class = &timecard_class;
+       bp->dev.parent = &pdev->dev;
+       bp->dev.release = ptp_ocp_dev_release;
+       dev_set_drvdata(&bp->dev, bp);
+
+       err = device_add(&bp->dev);
+       if (err) {
+               dev_err(&bp->dev, "device add failed: %d\n", err);
+               goto out;
+       }
+
+       pci_set_drvdata(pdev, bp);
+
+       return 0;
+
+out:
+       ptp_ocp_dev_release(&bp->dev);
+       put_device(&bp->dev);
+       return err;
+}
+
+static void
+ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link)
+{
+       struct device *dev = &bp->dev;
+
+       if (sysfs_create_link(&dev->kobj, &child->kobj, link))
+               dev_err(dev, "%s symlink failed\n", link);
+}
+
+static void
+ptp_ocp_link_child(struct ptp_ocp *bp, const char *name, const char *link)
+{
+       struct device *dev, *child;
+
+       dev = &bp->pdev->dev;
+
+       child = device_find_child_by_name(dev, name);
+       if (!child) {
+               dev_err(dev, "Could not find device %s\n", name);
+               return;
+       }
+
+       ptp_ocp_symlink(bp, child, link);
+       put_device(child);
+}
+
+static int
+ptp_ocp_complete(struct ptp_ocp *bp)
+{
+       struct pps_device *pps;
+       char buf[32];
+
+       if (bp->gnss_port != -1) {
+               sprintf(buf, "ttyS%d", bp->gnss_port);
+               ptp_ocp_link_child(bp, buf, "ttyGNSS");
+       }
+       if (bp->mac_port != -1) {
+               sprintf(buf, "ttyS%d", bp->mac_port);
+               ptp_ocp_link_child(bp, buf, "ttyMAC");
+       }
+       sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp));
+       ptp_ocp_link_child(bp, buf, "ptp");
+
+       pps = pps_lookup_dev(bp->ptp);
+       if (pps)
+               ptp_ocp_symlink(bp, pps->dev, "pps");
+
+       if (device_add_groups(&bp->dev, timecard_groups))
+               pr_err("device add groups failed\n");
  
-       err = ptp_ocp_check_clock(bp);
+       return 0;
+}
+
+static void
+ptp_ocp_resource_summary(struct ptp_ocp *bp)
+{
+       struct device *dev = &bp->pdev->dev;
+
+       if (bp->image) {
+               u32 ver = ioread32(&bp->image->version);
+
+               dev_info(dev, "version %x\n", ver);
+               if (ver & 0xffff)
+                       dev_info(dev, "regular image, version %d\n",
+                                ver & 0xffff);
+               else
+                       dev_info(dev, "golden image, version %d\n",
+                                ver >> 16);
+       }
+       if (bp->gnss_port != -1)
+               dev_info(dev, "GNSS @ /dev/ttyS%d 115200\n", bp->gnss_port);
+       if (bp->mac_port != -1)
+               dev_info(dev, "MAC @ /dev/ttyS%d   57600\n", bp->mac_port);
+}
+
+static void
+ptp_ocp_detach_sysfs(struct ptp_ocp *bp)
+{
+       struct device *dev = &bp->dev;
+
+       sysfs_remove_link(&dev->kobj, "ttyGNSS");
+       sysfs_remove_link(&dev->kobj, "ttyMAC");
+       sysfs_remove_link(&dev->kobj, "ptp");
+       sysfs_remove_link(&dev->kobj, "pps");
+       device_remove_groups(dev, timecard_groups);
+}
+
+static void
+ptp_ocp_detach(struct ptp_ocp *bp)
+{
+       ptp_ocp_detach_sysfs(bp);
+       if (timer_pending(&bp->watchdog))
+               del_timer_sync(&bp->watchdog);
+       if (bp->ts0)
+               ptp_ocp_unregister_ext(bp->ts0);
+       if (bp->ts1)
+               ptp_ocp_unregister_ext(bp->ts1);
+       if (bp->pps)
+               ptp_ocp_unregister_ext(bp->pps);
+       if (bp->gnss_port != -1)
+               serial8250_unregister_port(bp->gnss_port);
+       if (bp->mac_port != -1)
+               serial8250_unregister_port(bp->mac_port);
+       if (bp->spi_flash)
+               platform_device_unregister(bp->spi_flash);
+       if (bp->i2c_ctrl)
+               platform_device_unregister(bp->i2c_ctrl);
+       if (bp->i2c_clk)
+               clk_hw_unregister_fixed_rate(bp->i2c_clk);
+       if (bp->n_irqs)
+               pci_free_irq_vectors(bp->pdev);
+       if (bp->ptp)
+               ptp_clock_unregister(bp->ptp);
+       device_unregister(&bp->dev);
+}
+
+static int
+ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct devlink *devlink;
+       struct ptp_ocp *bp;
+       int err;
+
+       devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev);
+       if (!devlink) {
+               dev_err(&pdev->dev, "devlink_alloc failed\n");
+               return -ENOMEM;
+       }
+
+       err = devlink_register(devlink);
+       if (err)
+               goto out_free;
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(&pdev->dev, "pci_enable_device\n");
+               goto out_unregister;
+       }
+
+       bp = devlink_priv(devlink);
+       err = ptp_ocp_device_init(bp, pdev);
+       if (err)
+               goto out_unregister;
+
+       /* compat mode.
+        * Older FPGA firmware only returns 2 irq's.
+        * allow this - if not all of the IRQ's are returned, skip the
+        * extra devices and just register the clock.
+        */
+       err = pci_alloc_irq_vectors(pdev, 1, 10, PCI_IRQ_MSI | PCI_IRQ_MSIX);
+       if (err < 0) {
+               dev_err(&pdev->dev, "alloc_irq_vectors err: %d\n", err);
+               goto out;
+       }
+       bp->n_irqs = err;
+       pci_set_master(pdev);
+
+       err = ptp_ocp_register_resources(bp, id->driver_data);
         if (err)
                 goto out;
  
         bp->ptp = ptp_clock_register(&bp->ptp_info, &pdev->dev);
         if (IS_ERR(bp->ptp)) {
-               dev_err(&pdev->dev, "ptp_clock_register\n");
                 err = PTR_ERR(bp->ptp);
+               dev_err(&pdev->dev, "ptp_clock_register: %d\n", err);
+               bp->ptp = NULL;
                 goto out;
         }
  
+       err = ptp_ocp_complete(bp);
+       if (err)
+               goto out;
+
         ptp_ocp_info(bp);
+       ptp_ocp_resource_summary(bp);
  
         return 0;
  
  out:
-       pci_iounmap(pdev, bp->base);
-out_release_regions:
-       pci_release_regions(pdev);
-out_disable:
+       ptp_ocp_detach(bp);
         pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+out_unregister:
+       devlink_unregister(devlink);
  out_free:
-       kfree(bp);
+       devlink_free(devlink);
  
         return err;
  }
@@ -362,13 +1490,14 @@ static void
  ptp_ocp_remove(struct pci_dev *pdev)
  {
         struct ptp_ocp *bp = pci_get_drvdata(pdev);
+       struct devlink *devlink = priv_to_devlink(bp);
  
-       ptp_clock_unregister(bp->ptp);
-       pci_iounmap(pdev, bp->base);
-       pci_release_regions(pdev);
+       ptp_ocp_detach(bp);
         pci_disable_device(pdev);
         pci_set_drvdata(pdev, NULL);
-       kfree(bp);
+
+       devlink_unregister(devlink);
+       devlink_free(devlink);
  }
  
  static struct pci_driver ptp_ocp_driver = {
@@ -378,19 +1507,84 @@ static struct pci_driver ptp_ocp_driver = {
         .remove         = ptp_ocp_remove,
  };
  
+static int
+ptp_ocp_i2c_notifier_call(struct notifier_block *nb,
+                         unsigned long action, void *data)
+{
+       struct device *dev, *child = data;
+       struct ptp_ocp *bp;
+       bool add;
+
+       switch (action) {
+       case BUS_NOTIFY_ADD_DEVICE:
+       case BUS_NOTIFY_DEL_DEVICE:
+               add = action == BUS_NOTIFY_ADD_DEVICE;
+               break;
+       default:
+               return 0;
+       }
+
+       if (!i2c_verify_adapter(child))
+               return 0;
+
+       dev = child;
+       while ((dev = dev->parent))
+               if (dev->driver && !strcmp(dev->driver->name, KBUILD_MODNAME))
+                       goto found;
+       return 0;
+
+found:
+       bp = dev_get_drvdata(dev);
+       if (add)
+               ptp_ocp_symlink(bp, child, "i2c");
+       else
+               sysfs_remove_link(&bp->dev.kobj, "i2c");
+
+       return 0;
+}
+
+static struct notifier_block ptp_ocp_i2c_notifier = {
+       .notifier_call = ptp_ocp_i2c_notifier_call,
+};
+
  static int __init
  ptp_ocp_init(void)
  {
+       const char *what;
         int err;
  
+       what = "timecard class";
+       err = class_register(&timecard_class);
+       if (err)
+               goto out;
+
+       what = "i2c notifier";
+       err = bus_register_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+       if (err)
+               goto out_notifier;
+
+       what = "ptp_ocp driver";
         err = pci_register_driver(&ptp_ocp_driver);
+       if (err)
+               goto out_register;
+
+       return 0;
+
+out_register:
+       bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
+out_notifier:
+       class_unregister(&timecard_class);
+out:
+       pr_err(KBUILD_MODNAME ": failed to register %s: %d\n", what, err);
         return err;
  }
  
  static void __exit
  ptp_ocp_fini(void)
  {
+       bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier);
         pci_unregister_driver(&ptp_ocp_driver);
+       class_unregister(&timecard_class);
  }
  
  module_init(ptp_ocp_init);
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig

index cff91b4..9c67b97 100644 (file)
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -74,6 +74,7 @@ config QETH_L2
         def_tristate y
         prompt "qeth layer 2 device support"
         depends on QETH
+       depends on BRIDGE || BRIDGE=n
         help
           Select this option to be able to run qeth devices in layer 2 mode.
           To compile as a module, choose M. The module name is qeth_l2.
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c

index 69afc03..4871f71 100644 (file)
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -717,6 +717,227 @@ static int qeth_l2_dev2br_an_set(struct qeth_card *card, bool enable)
         return rc;
  }
  
+struct qeth_l2_br2dev_event_work {
+       struct work_struct work;
+       struct net_device *br_dev;
+       struct net_device *lsync_dev;
+       struct net_device *dst_dev;
+       unsigned long event;
+       unsigned char addr[ETH_ALEN];
+};
+
+static const struct net_device_ops qeth_l2_netdev_ops;
+
+static bool qeth_l2_must_learn(struct net_device *netdev,
+                              struct net_device *dstdev)
+{
+       struct qeth_priv *priv;
+
+       priv = netdev_priv(netdev);
+       return (netdev != dstdev &&
+               (priv->brport_features & BR_LEARNING_SYNC) &&
+               !(br_port_flag_is_set(netdev, BR_ISOLATED) &&
+                 br_port_flag_is_set(dstdev, BR_ISOLATED)) &&
+               netdev->netdev_ops == &qeth_l2_netdev_ops);
+}
+
+/**
+ *     qeth_l2_br2dev_worker() - update local MACs
+ *     @work: bridge to device FDB update
+ *
+ *     Update local MACs of a learning_sync bridgeport so it can receive
+ *     messages for a destination port.
+ *     In case of an isolated learning_sync port, also update its isolated
+ *     siblings.
+ */
+static void qeth_l2_br2dev_worker(struct work_struct *work)
+{
+       struct qeth_l2_br2dev_event_work *br2dev_event_work =
+               container_of(work, struct qeth_l2_br2dev_event_work, work);
+       struct net_device *lsyncdev = br2dev_event_work->lsync_dev;
+       struct net_device *dstdev = br2dev_event_work->dst_dev;
+       struct net_device *brdev = br2dev_event_work->br_dev;
+       unsigned long event = br2dev_event_work->event;
+       unsigned char *addr = br2dev_event_work->addr;
+       struct qeth_card *card = lsyncdev->ml_priv;
+       struct net_device *lowerdev;
+       struct list_head *iter;
+       int err = 0;
+
+       kfree(br2dev_event_work);
+       QETH_CARD_TEXT_(card, 4, "b2dw%04x", event);
+       QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr));
+
+       rcu_read_lock();
+       /* Verify preconditions are still valid: */
+       if (!netif_is_bridge_port(lsyncdev) ||
+           brdev != netdev_master_upper_dev_get_rcu(lsyncdev))
+               goto unlock;
+       if (!qeth_l2_must_learn(lsyncdev, dstdev))
+               goto unlock;
+
+       if (br_port_flag_is_set(lsyncdev, BR_ISOLATED)) {
+               /* Update lsyncdev and its isolated sibling(s): */
+               iter = &brdev->adj_list.lower;
+               lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+               while (lowerdev) {
+                       if (br_port_flag_is_set(lowerdev, BR_ISOLATED)) {
+                               switch (event) {
+                               case SWITCHDEV_FDB_ADD_TO_DEVICE:
+                                       err = dev_uc_add(lowerdev, addr);
+                                       break;
+                               case SWITCHDEV_FDB_DEL_TO_DEVICE:
+                                       err = dev_uc_del(lowerdev, addr);
+                                       break;
+                               default:
+                                       break;
+                               }
+                               if (err) {
+                                       QETH_CARD_TEXT(card, 2, "b2derris");
+                                       QETH_CARD_TEXT_(card, 2,
+                                                       "err%02x%03d", event,
+                                                       lowerdev->ifindex);
+                               }
+                       }
+                       lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+               }
+       } else {
+               switch (event) {
+               case SWITCHDEV_FDB_ADD_TO_DEVICE:
+                       err = dev_uc_add(lsyncdev, addr);
+                       break;
+               case SWITCHDEV_FDB_DEL_TO_DEVICE:
+                       err = dev_uc_del(lsyncdev, addr);
+                       break;
+               default:
+                       break;
+               }
+               if (err)
+                       QETH_CARD_TEXT_(card, 2, "b2derr%02x", event);
+       }
+
+unlock:
+       rcu_read_unlock();
+       dev_put(brdev);
+       dev_put(lsyncdev);
+       dev_put(dstdev);
+}
+
+static int qeth_l2_br2dev_queue_work(struct net_device *brdev,
+                                    struct net_device *lsyncdev,
+                                    struct net_device *dstdev,
+                                    unsigned long event,
+                                    const unsigned char *addr)
+{
+       struct qeth_l2_br2dev_event_work *worker_data;
+       struct qeth_card *card;
+
+       worker_data = kzalloc(sizeof(*worker_data), GFP_ATOMIC);
+       if (!worker_data)
+               return -ENOMEM;
+       INIT_WORK(&worker_data->work, qeth_l2_br2dev_worker);
+       worker_data->br_dev = brdev;
+       worker_data->lsync_dev = lsyncdev;
+       worker_data->dst_dev = dstdev;
+       worker_data->event = event;
+       ether_addr_copy(worker_data->addr, addr);
+
+       card = lsyncdev->ml_priv;
+       /* Take a reference on the sw port devices and the bridge */
+       dev_hold(brdev);
+       dev_hold(lsyncdev);
+       dev_hold(dstdev);
+       queue_work(card->event_wq, &worker_data->work);
+       return 0;
+}
+
+/* Called under rtnl_lock */
+static int qeth_l2_switchdev_event(struct notifier_block *unused,
+                                  unsigned long event, void *ptr)
+{
+       struct net_device *dstdev, *brdev, *lowerdev;
+       struct switchdev_notifier_fdb_info *fdb_info;
+       struct switchdev_notifier_info *info = ptr;
+       struct list_head *iter;
+       struct qeth_card *card;
+       int rc;
+
+       if (!(event == SWITCHDEV_FDB_ADD_TO_DEVICE ||
+             event == SWITCHDEV_FDB_DEL_TO_DEVICE))
+               return NOTIFY_DONE;
+
+       dstdev = switchdev_notifier_info_to_dev(info);
+       brdev = netdev_master_upper_dev_get_rcu(dstdev);
+       if (!brdev || !netif_is_bridge_master(brdev))
+               return NOTIFY_DONE;
+       fdb_info = container_of(info,
+                               struct switchdev_notifier_fdb_info,
+                               info);
+       iter = &brdev->adj_list.lower;
+       lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+       while (lowerdev) {
+               if (qeth_l2_must_learn(lowerdev, dstdev)) {
+                       card = lowerdev->ml_priv;
+                       QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event);
+                       rc = qeth_l2_br2dev_queue_work(brdev, lowerdev,
+                                                      dstdev, event,
+                                                      fdb_info->addr);
+                       if (rc) {
+                               QETH_CARD_TEXT(card, 2, "b2dqwerr");
+                               return NOTIFY_BAD;
+                       }
+               }
+               lowerdev = netdev_next_lower_dev_rcu(brdev, &iter);
+       }
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block qeth_l2_sw_notifier = {
+               .notifier_call = qeth_l2_switchdev_event,
+};
+
+static refcount_t qeth_l2_switchdev_notify_refcnt;
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_get(void)
+{
+       int rc;
+
+       if (!refcount_inc_not_zero(&qeth_l2_switchdev_notify_refcnt)) {
+               rc = register_switchdev_notifier(&qeth_l2_sw_notifier);
+               if (rc) {
+                       QETH_DBF_MESSAGE(2,
+                                        "failed to register qeth_l2_sw_notifier: %d\n",
+                                        rc);
+               } else {
+                       refcount_set(&qeth_l2_switchdev_notify_refcnt, 1);
+                       QETH_DBF_MESSAGE(2, "qeth_l2_sw_notifier registered\n");
+               }
+       }
+       QETH_DBF_TEXT_(SETUP, 2, "b2d+%04d",
+                      qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
+/* Called under rtnl_lock */
+static void qeth_l2_br2dev_put(void)
+{
+       int rc;
+
+       if (refcount_dec_and_test(&qeth_l2_switchdev_notify_refcnt)) {
+               rc = unregister_switchdev_notifier(&qeth_l2_sw_notifier);
+               if (rc) {
+                       QETH_DBF_MESSAGE(2,
+                                        "failed to unregister qeth_l2_sw_notifier: %d\n",
+                                        rc);
+               } else {
+                       QETH_DBF_MESSAGE(2,
+                                        "qeth_l2_sw_notifier unregistered\n");
+               }
+       }
+       QETH_DBF_TEXT_(SETUP, 2, "b2d-%04d",
+                      qeth_l2_switchdev_notify_refcnt.refs.counter);
+}
+
  static int qeth_l2_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                                   struct net_device *dev, u32 filter_mask,
                                   int nlflags)
@@ -810,16 +1031,19 @@ static int qeth_l2_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
         } else if (enable) {
                 qeth_l2_set_pnso_mode(card, QETH_PNSO_ADDR_INFO);
                 rc = qeth_l2_dev2br_an_set(card, true);
-               if (rc)
+               if (rc) {
                         qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
-               else
+               } else {
                         priv->brport_features |= BR_LEARNING_SYNC;
+                       qeth_l2_br2dev_get();
+               }
         } else {
                 rc = qeth_l2_dev2br_an_set(card, false);
                 if (!rc) {
                         qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE);
                         priv->brport_features ^= BR_LEARNING_SYNC;
                         qeth_l2_dev2br_fdb_flush(card);
+                       qeth_l2_br2dev_put();
                 }
         }
         mutex_unlock(&card->sbp_lock);
@@ -2072,6 +2296,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
  static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
  {
         struct qeth_card *card = dev_get_drvdata(&gdev->dev);
+       struct qeth_priv *priv;
  
         if (gdev->dev.type != &qeth_l2_devtype)
                 device_remove_groups(&gdev->dev, qeth_l2_attr_groups);
@@ -2083,8 +2308,15 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev)
                 qeth_set_offline(card, card->discipline, false);
  
         cancel_work_sync(&card->close_dev_work);
-       if (card->dev->reg_state == NETREG_REGISTERED)
+       if (card->dev->reg_state == NETREG_REGISTERED) {
+               priv = netdev_priv(card->dev);
+               if (priv->brport_features & BR_LEARNING_SYNC) {
+                       rtnl_lock();
+                       qeth_l2_br2dev_put();
+                       rtnl_unlock();
+               }
                 unregister_netdev(card->dev);
+       }
  }
  
  static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
@@ -2207,6 +2439,7 @@ EXPORT_SYMBOL_GPL(qeth_l2_discipline);
  static int __init qeth_l2_init(void)
  {
         pr_info("register layer 2 discipline\n");
+       refcount_set(&qeth_l2_switchdev_notify_refcnt, 0);
         return 0;
  }
  
diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c

index 84fc7a0..4a84599 100644 (file)
--- a/drivers/scsi/arm/acornscsi.c
+++ b/drivers/scsi/arm/acornscsi.c
@@ -2642,6 +2642,7 @@ int acornscsi_abort(struct scsi_cmnd *SCpnt)
  //#endif
                 clear_bit(SCpnt->device->id * 8 +
                           (u8)(SCpnt->device->lun & 0x7), host->busyluns);
+               fallthrough;
  
         /*
          * We found the command, and cleared it out.  Either
diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c

index 6baa9b3..9c4458a 100644 (file)
--- a/drivers/scsi/arm/fas216.c
+++ b/drivers/scsi/arm/fas216.c
@@ -1375,6 +1375,7 @@ static void fas216_busservice_intr(FAS216_Info *info, unsigned int stat, unsigne
                 case IS_COMPLETE:
                         break;
                 }
+               break;
  
         default:
                 break;
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c

index 25f6e1a..66652ab 100644 (file)
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -453,8 +453,8 @@ static int initialize_controller(struct scsi_device *sdev,
                 if (!h->ctlr)
                         err = SCSI_DH_RES_TEMP_UNAVAIL;
                 else {
-                       list_add_rcu(&h->node, &h->ctlr->dh_list);
                         h->sdev = sdev;
+                       list_add_rcu(&h->node, &h->ctlr->dh_list);
                 }
                 spin_unlock(&list_lock);
                 err = SCSI_DH_OK;
@@ -778,11 +778,11 @@ static void rdac_bus_detach( struct scsi_device *sdev )
         spin_lock(&list_lock);
         if (h->ctlr) {
                 list_del_rcu(&h->node);
-               h->sdev = NULL;
                 kref_put(&h->ctlr->kref, release_controller);
         }
         spin_unlock(&list_lock);
         sdev->handler_data = NULL;
+       synchronize_rcu();
         kfree(h);
  }
  
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c

index bee1bec..935b01e 100644 (file)
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost,
         for (i = 0; i < size; ++i) {
                 struct ibmvfc_event *evt = &pool->events[i];
  
+               /*
+                * evt->active states
+                *  1 = in flight
+                *  0 = being completed
+                * -1 = free/freed
+                */
+               atomic_set(&evt->active, -1);
                 atomic_set(&evt->free, 1);
                 evt->crq.valid = 0x80;
                 evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i));
@@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt)
  
         BUG_ON(!ibmvfc_valid_event(pool, evt));
         BUG_ON(atomic_inc_return(&evt->free) != 1);
+       BUG_ON(atomic_dec_and_test(&evt->active));
  
         spin_lock_irqsave(&evt->queue->l_lock, flags);
         list_add_tail(&evt->queue_list, &evt->queue->free);
@@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list)
   **/
  static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code)
  {
+       /*
+        * Anything we are failing should still be active. Otherwise, it
+        * implies we already got a response for the command and are doing
+        * something bad like double completing it.
+        */
+       BUG_ON(!atomic_dec_and_test(&evt->active));
         if (evt->cmnd) {
                 evt->cmnd->result = (error_code << 16);
                 evt->done = ibmvfc_scsi_eh_done;
@@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt,
  
                 evt->done(evt);
         } else {
+               atomic_set(&evt->active, 1);
                 spin_unlock_irqrestore(&evt->queue->l_lock, flags);
                 ibmvfc_trc_start(evt);
         }
@@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost,
                 return;
         }
  
-       if (unlikely(atomic_read(&evt->free))) {
+       if (unlikely(atomic_dec_if_positive(&evt->active))) {
                 dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
                         crq->ioba);
                 return;
@@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost
                 return;
         }
  
-       if (unlikely(atomic_read(&evt->free))) {
+       if (unlikely(atomic_dec_if_positive(&evt->active))) {
                 dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
                         crq->ioba);
                 return;
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h

index 4f0f3ba..92fb889 100644 (file)
--- a/drivers/scsi/ibmvscsi/ibmvfc.h
+++ b/drivers/scsi/ibmvscsi/ibmvfc.h
@@ -745,6 +745,7 @@ struct ibmvfc_event {
         struct ibmvfc_target *tgt;
         struct scsi_cmnd *cmnd;
         atomic_t free;
+       atomic_t active;
         union ibmvfc_iu *xfer_iu;
         void (*done)(struct ibmvfc_event *evt);
         void (*_done)(struct ibmvfc_event *evt);
diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c

index abf7b40..c509440 100644 (file)
--- a/drivers/scsi/megaraid/megaraid_mm.c
+++ b/drivers/scsi/megaraid/megaraid_mm.c
@@ -238,7 +238,7 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval)
         mimd_t          mimd;
         uint32_t        adapno;
         int             iterator;
-
+       bool            is_found;
  
         if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) {
                 *rval = -EFAULT;
@@ -254,12 +254,16 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval)
  
         adapter = NULL;
         iterator = 0;
+       is_found = false;
  
         list_for_each_entry(adapter, &adapters_list_g, list) {
-               if (iterator++ == adapno) break;
+               if (iterator++ == adapno) {
+                       is_found = true;
+                       break;
+               }
         }
  
-       if (!adapter) {
+       if (!is_found) {
                 *rval = -ENODEV;
                 return NULL;
         }
@@ -725,6 +729,7 @@ ioctl_done(uioc_t *kioc)
         uint32_t        adapno;
         int             iterator;
         mraid_mmadp_t*  adapter;
+       bool            is_found;
  
         /*
          * When the kioc returns from driver, make sure it still doesn't
@@ -747,19 +752,23 @@ ioctl_done(uioc_t *kioc)
                 iterator        = 0;
                 adapter         = NULL;
                 adapno          = kioc->adapno;
+               is_found        = false;
  
                 con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed "
                                         "ioctl that was timedout before\n"));
  
                 list_for_each_entry(adapter, &adapters_list_g, list) {
-                       if (iterator++ == adapno) break;
+                       if (iterator++ == adapno) {
+                               is_found = true;
+                               break;
+                       }
                 }
  
                 kioc->timedout = 0;
  
-               if (adapter) {
+               if (is_found)
                         mraid_mm_dealloc_kioc( adapter, kioc );
-               }
+
         }
         else {
                 wake_up(&wait_q);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c

index c399552..19b1c0c 100644 (file)
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2983,13 +2983,13 @@ _base_check_enable_msix(struct MPT3SAS_ADAPTER *ioc)
  }
  
  /**
- * _base_free_irq - free irq
+ * mpt3sas_base_free_irq - free irq
   * @ioc: per adapter object
   *
   * Freeing respective reply_queue from the list.
   */
-static void
-_base_free_irq(struct MPT3SAS_ADAPTER *ioc)
+void
+mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc)
  {
         struct adapter_reply_queue *reply_q, *next;
  
@@ -3191,12 +3191,12 @@ _base_check_and_enable_high_iops_queues(struct MPT3SAS_ADAPTER *ioc,
  }
  
  /**
- * _base_disable_msix - disables msix
+ * mpt3sas_base_disable_msix - disables msix
   * @ioc: per adapter object
   *
   */
-static void
-_base_disable_msix(struct MPT3SAS_ADAPTER *ioc)
+void
+mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc)
  {
         if (!ioc->msix_enable)
                 return;
@@ -3304,8 +3304,8 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc)
         for (i = 0; i < ioc->reply_queue_count; i++) {
                 r = _base_request_irq(ioc, i);
                 if (r) {
-                       _base_free_irq(ioc);
-                       _base_disable_msix(ioc);
+                       mpt3sas_base_free_irq(ioc);
+                       mpt3sas_base_disable_msix(ioc);
                         goto try_ioapic;
                 }
         }
@@ -3342,8 +3342,8 @@ mpt3sas_base_unmap_resources(struct MPT3SAS_ADAPTER *ioc)
  
         dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__));
  
-       _base_free_irq(ioc);
-       _base_disable_msix(ioc);
+       mpt3sas_base_free_irq(ioc);
+       mpt3sas_base_disable_msix(ioc);
  
         kfree(ioc->replyPostRegisterIndex);
         ioc->replyPostRegisterIndex = NULL;
@@ -7613,14 +7613,14 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc)
  }
  
  /**
- * _base_make_ioc_ready - put controller in READY state
+ * mpt3sas_base_make_ioc_ready - put controller in READY state
   * @ioc: per adapter object
   * @type: FORCE_BIG_HAMMER or SOFT_RESET
   *
   * Return: 0 for success, non-zero for failure.
   */
-static int
-_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type)
+int
+mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type)
  {
         u32 ioc_state;
         int rc;
@@ -7897,7 +7897,7 @@ mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc)
         if (ioc->chip_phys && ioc->chip) {
                 mpt3sas_base_mask_interrupts(ioc);
                 ioc->shost_recovery = 1;
-               _base_make_ioc_ready(ioc, SOFT_RESET);
+               mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
                 ioc->shost_recovery = 0;
         }
  
@@ -8017,7 +8017,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
         ioc->build_sg_mpi = &_base_build_sg;
         ioc->build_zero_len_sge_mpi = &_base_build_zero_len_sge;
  
-       r = _base_make_ioc_ready(ioc, SOFT_RESET);
+       r = mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
         if (r)
                 goto out_free_resources;
  
@@ -8471,7 +8471,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
         _base_pre_reset_handler(ioc);
         mpt3sas_wait_for_commands_to_complete(ioc);
         mpt3sas_base_mask_interrupts(ioc);
-       r = _base_make_ioc_ready(ioc, type);
+       r = mpt3sas_base_make_ioc_ready(ioc, type);
         if (r)
                 goto out;
         _base_clear_outstanding_commands(ioc);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h

index d4834c8..0c6c3df 100644 (file)
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -1730,6 +1730,10 @@ do {     ioc_err(ioc, "In func: %s\n", __func__); \
         status, mpi_request, sz); } while (0)
  
  int mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int wait_count);
+int
+mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type);
+void mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc);
+void mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc);
  
  /* scsih shared API */
  struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c

index 866d118..8e64a6f 100644 (file)
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -11295,7 +11295,12 @@ scsih_shutdown(struct pci_dev *pdev)
  
         _scsih_ir_shutdown(ioc);
         _scsih_nvme_shutdown(ioc);
-       mpt3sas_base_detach(ioc);
+       mpt3sas_base_mask_interrupts(ioc);
+       ioc->shost_recovery = 1;
+       mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
+       ioc->shost_recovery = 0;
+       mpt3sas_base_free_irq(ioc);
+       mpt3sas_base_disable_msix(ioc);
  }
  
  
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c

index 48548a9..32e60f0 100644 (file)
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -684,8 +684,7 @@ int pm8001_dev_found(struct domain_device *dev)
  
  void pm8001_task_done(struct sas_task *task)
  {
-       if (!del_timer(&task->slow_task->timer))
-               return;
+       del_timer(&task->slow_task->timer);
         complete(&task->slow_task->completion);
  }
  
@@ -693,9 +692,14 @@ static void pm8001_tmf_timedout(struct timer_list *t)
  {
         struct sas_task_slow *slow = from_timer(slow, t, timer);
         struct sas_task *task = slow->task;
+       unsigned long flags;
  
-       task->task_state_flags |= SAS_TASK_STATE_ABORTED;
-       complete(&task->slow_task->completion);
+       spin_lock_irqsave(&task->task_state_lock, flags);
+       if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
+               task->task_state_flags |= SAS_TASK_STATE_ABORTED;
+               complete(&task->slow_task->completion);
+       }
+       spin_unlock_irqrestore(&task->task_state_lock, flags);
  }
  
  #define PM8001_TASK_TIMEOUT 20
@@ -748,13 +752,10 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev,
                 }
                 res = -TMF_RESP_FUNC_FAILED;
                 /* Even TMF timed out, return direct. */
-               if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
-                       if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
-                               pm8001_dbg(pm8001_ha, FAIL,
-                                          "TMF task[%x]timeout.\n",
-                                          tmf->tmf);
-                               goto ex_err;
-                       }
+               if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+                       pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n",
+                                  tmf->tmf);
+                       goto ex_err;
                 }
  
                 if (task->task_status.resp == SAS_TASK_COMPLETE &&
@@ -834,12 +835,9 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha,
                 wait_for_completion(&task->slow_task->completion);
                 res = TMF_RESP_FUNC_FAILED;
                 /* Even TMF timed out, return direct. */
-               if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) {
-                       if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) {
-                               pm8001_dbg(pm8001_ha, FAIL,
-                                          "TMF task timeout.\n");
-                               goto ex_err;
-                       }
+               if (task->task_state_flags & SAS_TASK_STATE_ABORTED) {
+                       pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n");
+                       goto ex_err;
                 }
  
                 if (task->task_status.resp == SAS_TASK_COMPLETE &&
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c

index b059bf2..5b6996a 100644 (file)
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -475,7 +475,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
                 error = shost->hostt->target_alloc(starget);
  
                 if(error) {
-                       dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error);
+                       if (error != -ENXIO)
+                               dev_err(dev, "target allocation failed, error %d\n", error);
                         /* don't want scsi_target_reap to do the final
                          * put because it will be under the host lock */
                         scsi_target_destroy(starget);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c

index 32489d2..ae9bfc6 100644 (file)
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -807,11 +807,14 @@ store_state_field(struct device *dev, struct device_attribute *attr,
         mutex_lock(&sdev->state_mutex);
         ret = scsi_device_set_state(sdev, state);
         /*
-        * If the device state changes to SDEV_RUNNING, we need to run
-        * the queue to avoid I/O hang.
+        * If the device state changes to SDEV_RUNNING, we need to
+        * rescan the device to revalidate it, and run the queue to
+        * avoid I/O hang.
          */
-       if (ret == 0 && state == SDEV_RUNNING)
+       if (ret == 0 && state == SDEV_RUNNING) {
+               scsi_rescan_device(dev);
                 blk_mq_run_hw_queues(sdev->request_queue, true);
+       }
         mutex_unlock(&sdev->state_mutex);
  
         return ret == 0 ? count : -EINVAL;
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c

index b07105a..d8b05d8 100644 (file)
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -439,39 +439,10 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj,
         struct device *dev = container_of(kobj, struct device, kobj);
         struct iscsi_iface *iface = iscsi_dev_to_iface(dev);
         struct iscsi_transport *t = iface->transport;
-       int param;
-       int param_type;
+       int param = -1;
  
         if (attr == &dev_attr_iface_enabled.attr)
                 param = ISCSI_NET_PARAM_IFACE_ENABLE;
-       else if (attr == &dev_attr_iface_vlan_id.attr)
-               param = ISCSI_NET_PARAM_VLAN_ID;
-       else if (attr == &dev_attr_iface_vlan_priority.attr)
-               param = ISCSI_NET_PARAM_VLAN_PRIORITY;
-       else if (attr == &dev_attr_iface_vlan_enabled.attr)
-               param = ISCSI_NET_PARAM_VLAN_ENABLED;
-       else if (attr == &dev_attr_iface_mtu.attr)
-               param = ISCSI_NET_PARAM_MTU;
-       else if (attr == &dev_attr_iface_port.attr)
-               param = ISCSI_NET_PARAM_PORT;
-       else if (attr == &dev_attr_iface_ipaddress_state.attr)
-               param = ISCSI_NET_PARAM_IPADDR_STATE;
-       else if (attr == &dev_attr_iface_delayed_ack_en.attr)
-               param = ISCSI_NET_PARAM_DELAYED_ACK_EN;
-       else if (attr == &dev_attr_iface_tcp_nagle_disable.attr)
-               param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE;
-       else if (attr == &dev_attr_iface_tcp_wsf_disable.attr)
-               param = ISCSI_NET_PARAM_TCP_WSF_DISABLE;
-       else if (attr == &dev_attr_iface_tcp_wsf.attr)
-               param = ISCSI_NET_PARAM_TCP_WSF;
-       else if (attr == &dev_attr_iface_tcp_timer_scale.attr)
-               param = ISCSI_NET_PARAM_TCP_TIMER_SCALE;
-       else if (attr == &dev_attr_iface_tcp_timestamp_en.attr)
-               param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN;
-       else if (attr == &dev_attr_iface_cache_id.attr)
-               param = ISCSI_NET_PARAM_CACHE_ID;
-       else if (attr == &dev_attr_iface_redirect_en.attr)
-               param = ISCSI_NET_PARAM_REDIRECT_EN;
         else if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr)
                 param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO;
         else if (attr == &dev_attr_iface_header_digest.attr)
@@ -508,6 +479,38 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj,
                 param = ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN;
         else if (attr == &dev_attr_iface_initiator_name.attr)
                 param = ISCSI_IFACE_PARAM_INITIATOR_NAME;
+
+       if (param != -1)
+               return t->attr_is_visible(ISCSI_IFACE_PARAM, param);
+
+       if (attr == &dev_attr_iface_vlan_id.attr)
+               param = ISCSI_NET_PARAM_VLAN_ID;
+       else if (attr == &dev_attr_iface_vlan_priority.attr)
+               param = ISCSI_NET_PARAM_VLAN_PRIORITY;
+       else if (attr == &dev_attr_iface_vlan_enabled.attr)
+               param = ISCSI_NET_PARAM_VLAN_ENABLED;
+       else if (attr == &dev_attr_iface_mtu.attr)
+               param = ISCSI_NET_PARAM_MTU;
+       else if (attr == &dev_attr_iface_port.attr)
+               param = ISCSI_NET_PARAM_PORT;
+       else if (attr == &dev_attr_iface_ipaddress_state.attr)
+               param = ISCSI_NET_PARAM_IPADDR_STATE;
+       else if (attr == &dev_attr_iface_delayed_ack_en.attr)
+               param = ISCSI_NET_PARAM_DELAYED_ACK_EN;
+       else if (attr == &dev_attr_iface_tcp_nagle_disable.attr)
+               param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE;
+       else if (attr == &dev_attr_iface_tcp_wsf_disable.attr)
+               param = ISCSI_NET_PARAM_TCP_WSF_DISABLE;
+       else if (attr == &dev_attr_iface_tcp_wsf.attr)
+               param = ISCSI_NET_PARAM_TCP_WSF;
+       else if (attr == &dev_attr_iface_tcp_timer_scale.attr)
+               param = ISCSI_NET_PARAM_TCP_TIMER_SCALE;
+       else if (attr == &dev_attr_iface_tcp_timestamp_en.attr)
+               param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN;
+       else if (attr == &dev_attr_iface_cache_id.attr)
+               param = ISCSI_NET_PARAM_CACHE_ID;
+       else if (attr == &dev_attr_iface_redirect_en.attr)
+               param = ISCSI_NET_PARAM_REDIRECT_EN;
         else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV4) {
                 if (attr == &dev_attr_ipv4_iface_ipaddress.attr)
                         param = ISCSI_NET_PARAM_IPV4_ADDR;
@@ -598,32 +601,7 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj,
                 return 0;
         }
  
-       switch (param) {
-       case ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO:
-       case ISCSI_IFACE_PARAM_HDRDGST_EN:
-       case ISCSI_IFACE_PARAM_DATADGST_EN:
-       case ISCSI_IFACE_PARAM_IMM_DATA_EN:
-       case ISCSI_IFACE_PARAM_INITIAL_R2T_EN:
-       case ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN:
-       case ISCSI_IFACE_PARAM_PDU_INORDER_EN:
-       case ISCSI_IFACE_PARAM_ERL:
-       case ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH:
-       case ISCSI_IFACE_PARAM_FIRST_BURST:
-       case ISCSI_IFACE_PARAM_MAX_R2T:
-       case ISCSI_IFACE_PARAM_MAX_BURST:
-       case ISCSI_IFACE_PARAM_CHAP_AUTH_EN:
-       case ISCSI_IFACE_PARAM_BIDI_CHAP_EN:
-       case ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL:
-       case ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN:
-       case ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN:
-       case ISCSI_IFACE_PARAM_INITIATOR_NAME:
-               param_type = ISCSI_IFACE_PARAM;
-               break;
-       default:
-               param_type = ISCSI_NET_PARAM;
-       }
-
-       return t->attr_is_visible(param_type, param);
+       return t->attr_is_visible(ISCSI_NET_PARAM, param);
  }
  
  static struct attribute *iscsi_iface_attrs[] = {
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c

index 94c254e..a6d3ac0 100644 (file)
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -221,7 +221,7 @@ static unsigned int sr_get_events(struct scsi_device *sdev)
         else if (med->media_event_code == 2)
                 return DISK_EVENT_MEDIA_CHANGE;
         else if (med->media_event_code == 3)
-               return DISK_EVENT_EJECT_REQUEST;
+               return DISK_EVENT_MEDIA_CHANGE;
         return 0;
  }
  
diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c

index 19a02e9..8fcdf89 100644 (file)
--- a/drivers/staging/qlge/qlge_main.c
+++ b/drivers/staging/qlge/qlge_main.c
@@ -4547,7 +4547,8 @@ static int qlge_probe(struct pci_dev *pdev,
         static int cards_found;
         int err;
  
-       devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter));
+       devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter),
+                               &pdev->dev);
         if (!devlink)
                 return -ENOMEM;
  
@@ -4613,7 +4614,7 @@ static int qlge_probe(struct pci_dev *pdev,
                 goto netdev_free;
         }
  
-       err = devlink_register(devlink, &pdev->dev);
+       err = devlink_register(devlink);
         if (err)
                 goto netdev_free;
  
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c

index b32f4ee..ca1b231 100644 (file)
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -25,7 +25,7 @@
  #include "target_core_alua.h"
  
  static sense_reason_t
-sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char *, u32, bool);
+sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char, u32, bool);
  static sense_reason_t sbc_execute_unmap(struct se_cmd *cmd);
  
  static sense_reason_t
@@ -279,14 +279,14 @@ static inline unsigned long long transport_lba_64_ext(unsigned char *cdb)
  }
  
  static sense_reason_t
-sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *ops)
+sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *ops)
  {
         struct se_device *dev = cmd->se_dev;
         sector_t end_lba = dev->transport->get_blocks(dev) + 1;
         unsigned int sectors = sbc_get_write_same_sectors(cmd);
         sense_reason_t ret;
  
-       if ((flags[0] & 0x04) || (flags[0] & 0x02)) {
+       if ((flags & 0x04) || (flags & 0x02)) {
                 pr_err("WRITE_SAME PBDATA and LBDATA"
                         " bits not supported for Block Discard"
                         " Emulation\n");
@@ -308,7 +308,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
         }
  
         /* We always have ANC_SUP == 0 so setting ANCHOR is always an error */
-       if (flags[0] & 0x10) {
+       if (flags & 0x10) {
                 pr_warn("WRITE SAME with ANCHOR not supported\n");
                 return TCM_INVALID_CDB_FIELD;
         }
@@ -316,7 +316,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
          * Special case for WRITE_SAME w/ UNMAP=1 that ends up getting
          * translated into block discard requests within backend code.
          */
-       if (flags[0] & 0x08) {
+       if (flags & 0x08) {
                 if (!ops->execute_unmap)
                         return TCM_UNSUPPORTED_SCSI_OPCODE;
  
@@ -331,7 +331,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
         if (!ops->execute_write_same)
                 return TCM_UNSUPPORTED_SCSI_OPCODE;
  
-       ret = sbc_check_prot(dev, cmd, &cmd->t_task_cdb[0], sectors, true);
+       ret = sbc_check_prot(dev, cmd, flags >> 5, sectors, true);
         if (ret)
                 return ret;
  
@@ -717,10 +717,9 @@ sbc_set_prot_op_checks(u8 protect, bool fabric_prot, enum target_prot_type prot_
  }
  
  static sense_reason_t
-sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb,
+sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char protect,
                u32 sectors, bool is_write)
  {
-       u8 protect = cdb[1] >> 5;
         int sp_ops = cmd->se_sess->sup_prot_ops;
         int pi_prot_type = dev->dev_attrib.pi_prot_type;
         bool fabric_prot = false;
@@ -768,7 +767,7 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb,
                 fallthrough;
         default:
                 pr_err("Unable to determine pi_prot_type for CDB: 0x%02x "
-                      "PROTECT: 0x%02x\n", cdb[0], protect);
+                      "PROTECT: 0x%02x\n", cmd->t_task_cdb[0], protect);
                 return TCM_INVALID_CDB_FIELD;
         }
  
@@ -843,7 +842,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                 if (sbc_check_dpofua(dev, cmd, cdb))
                         return TCM_INVALID_CDB_FIELD;
  
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, false);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
                 if (ret)
                         return ret;
  
@@ -857,7 +856,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                 if (sbc_check_dpofua(dev, cmd, cdb))
                         return TCM_INVALID_CDB_FIELD;
  
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, false);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
                 if (ret)
                         return ret;
  
@@ -871,7 +870,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                 if (sbc_check_dpofua(dev, cmd, cdb))
                         return TCM_INVALID_CDB_FIELD;
  
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, false);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false);
                 if (ret)
                         return ret;
  
@@ -892,7 +891,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                 if (sbc_check_dpofua(dev, cmd, cdb))
                         return TCM_INVALID_CDB_FIELD;
  
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, true);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
                 if (ret)
                         return ret;
  
@@ -906,7 +905,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                 if (sbc_check_dpofua(dev, cmd, cdb))
                         return TCM_INVALID_CDB_FIELD;
  
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, true);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
                 if (ret)
                         return ret;
  
@@ -921,7 +920,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                 if (sbc_check_dpofua(dev, cmd, cdb))
                         return TCM_INVALID_CDB_FIELD;
  
-               ret = sbc_check_prot(dev, cmd, cdb, sectors, true);
+               ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true);
                 if (ret)
                         return ret;
  
@@ -980,7 +979,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                         size = sbc_get_size(cmd, 1);
                         cmd->t_task_lba = get_unaligned_be64(&cdb[12]);
  
-                       ret = sbc_setup_write_same(cmd, &cdb[10], ops);
+                       ret = sbc_setup_write_same(cmd, cdb[10], ops);
                         if (ret)
                                 return ret;
                         break;
@@ -1079,7 +1078,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                 size = sbc_get_size(cmd, 1);
                 cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
  
-               ret = sbc_setup_write_same(cmd, &cdb[1], ops);
+               ret = sbc_setup_write_same(cmd, cdb[1], ops);
                 if (ret)
                         return ret;
                 break;
@@ -1097,7 +1096,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                  * Follow sbcr26 with WRITE_SAME (10) and check for the existence
                  * of byte 1 bit 3 UNMAP instead of original reserved field
                  */
-               ret = sbc_setup_write_same(cmd, &cdb[1], ops);
+               ret = sbc_setup_write_same(cmd, cdb[1], ops);
                 if (ret)
                         return ret;
                 break;
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c

index 7e35edd..26ceabe 100644 (file)
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -886,7 +886,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
         INIT_WORK(&cmd->work, success ? target_complete_ok_work :
                   target_complete_failure_work);
  
-       if (wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID)
+       if (!wwn || wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID)
                 cpu = cmd->cpuid;
         else
                 cpu = wwn->cmd_compl_affinity;
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c

index fdf79bc..35d5908 100644 (file)
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -824,7 +824,7 @@ static struct usb_class_driver wdm_class = {
  };
  
  /* --- WWAN framework integration --- */
-#ifdef CONFIG_WWAN
+#ifdef CONFIG_WWAN_CORE
  static int wdm_wwan_port_start(struct wwan_port *port)
  {
         struct wdm_device *desc = wwan_port_get_drvdata(port);
@@ -963,11 +963,11 @@ static void wdm_wwan_rx(struct wdm_device *desc, int length)
         /* inbuf has been copied, it is safe to check for outstanding data */
         schedule_work(&desc->service_outs_intr);
  }
-#else /* CONFIG_WWAN */
+#else /* CONFIG_WWAN_CORE */
  static void wdm_wwan_init(struct wdm_device *desc) {}
  static void wdm_wwan_deinit(struct wdm_device *desc) {}
  static void wdm_wwan_rx(struct wdm_device *desc, int length) {}
-#endif /* CONFIG_WWAN */
+#endif /* CONFIG_WWAN_CORE */
  
  /* --- error handling --- */
  static void wdm_rxwork(struct work_struct *work)
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c

index b974644..9618ba6 100644 (file)
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1133,7 +1133,7 @@ static int do_proc_control(struct usb_dev_state *ps,
                 "wIndex=%04x wLength=%04x\n",
                 ctrl->bRequestType, ctrl->bRequest, ctrl->wValue,
                 ctrl->wIndex, ctrl->wLength);
-       if (ctrl->bRequestType & 0x80) {
+       if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) {
                 pipe = usb_rcvctrlpipe(dev, 0);
                 snoop_urb(dev, NULL, pipe, ctrl->wLength, tmo, SUBMIT, NULL, 0);
  
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c

index d1efc71..86658a8 100644 (file)
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -48,6 +48,7 @@
  
  #define USB_TP_TRANSMISSION_DELAY      40      /* ns */
  #define USB_TP_TRANSMISSION_DELAY_MAX  65535   /* ns */
+#define USB_PING_RESPONSE_TIME         400     /* ns */
  
  /* Protect struct usb_device->state and ->children members
   * Note: Both are also protected by ->dev.sem, except that ->state can
@@ -182,8 +183,9 @@ int usb_device_supports_lpm(struct usb_device *udev)
  }
  
  /*
- * Set the Maximum Exit Latency (MEL) for the host to initiate a transition from
- * either U1 or U2.
+ * Set the Maximum Exit Latency (MEL) for the host to wakup up the path from
+ * U1/U2, send a PING to the device and receive a PING_RESPONSE.
+ * See USB 3.1 section C.1.5.2
   */
  static void usb_set_lpm_mel(struct usb_device *udev,
                 struct usb3_lpm_parameters *udev_lpm_params,
@@ -193,35 +195,37 @@ static void usb_set_lpm_mel(struct usb_device *udev,
                 unsigned int hub_exit_latency)
  {
         unsigned int total_mel;
-       unsigned int device_mel;
-       unsigned int hub_mel;
  
         /*
-        * Calculate the time it takes to transition all links from the roothub
-        * to the parent hub into U0.  The parent hub must then decode the
-        * packet (hub header decode latency) to figure out which port it was
-        * bound for.
-        *
-        * The Hub Header decode latency is expressed in 0.1us intervals (0x1
-        * means 0.1us).  Multiply that by 100 to get nanoseconds.
+        * tMEL1. time to transition path from host to device into U0.
+        * MEL for parent already contains the delay up to parent, so only add
+        * the exit latency for the last link (pick the slower exit latency),
+        * and the hub header decode latency. See USB 3.1 section C 2.2.1
+        * Store MEL in nanoseconds
          */
         total_mel = hub_lpm_params->mel +
-               (hub->descriptor->u.ss.bHubHdrDecLat * 100);
+               max(udev_exit_latency, hub_exit_latency) * 1000 +
+               hub->descriptor->u.ss.bHubHdrDecLat * 100;
  
         /*
-        * How long will it take to transition the downstream hub's port into
-        * U0?  The greater of either the hub exit latency or the device exit
-        * latency.
-        *
-        * The BOS U1/U2 exit latencies are expressed in 1us intervals.
-        * Multiply that by 1000 to get nanoseconds.
+        * tMEL2. Time to submit PING packet. Sum of tTPTransmissionDelay for
+        * each link + wHubDelay for each hub. Add only for last link.
+        * tMEL4, the time for PING_RESPONSE to traverse upstream is similar.
+        * Multiply by 2 to include it as well.
          */
-       device_mel = udev_exit_latency * 1000;
-       hub_mel = hub_exit_latency * 1000;
-       if (device_mel > hub_mel)
-               total_mel += device_mel;
-       else
-               total_mel += hub_mel;
+       total_mel += (__le16_to_cpu(hub->descriptor->u.ss.wHubDelay) +
+                     USB_TP_TRANSMISSION_DELAY) * 2;
+
+       /*
+        * tMEL3, tPingResponse. Time taken by device to generate PING_RESPONSE
+        * after receiving PING. Also add 2100ns as stated in USB 3.1 C 1.5.2.4
+        * to cover the delay if the PING_RESPONSE is queued behind a Max Packet
+        * Size DP.
+        * Note these delays should be added only once for the entire path, so
+        * add them to the MEL of the device connected to the roothub.
+        */
+       if (!hub->hdev->parent)
+               total_mel += USB_PING_RESPONSE_TIME + 2100;
  
         udev_lpm_params->mel = total_mel;
  }
@@ -4112,6 +4116,47 @@ static int usb_set_lpm_timeout(struct usb_device *udev,
         return 0;
  }
  
+/*
+ * Don't allow device intiated U1/U2 if the system exit latency + one bus
+ * interval is greater than the minimum service interval of any active
+ * periodic endpoint. See USB 3.2 section 9.4.9
+ */
+static bool usb_device_may_initiate_lpm(struct usb_device *udev,
+                                       enum usb3_link_state state)
+{
+       unsigned int sel;               /* us */
+       int i, j;
+
+       if (state == USB3_LPM_U1)
+               sel = DIV_ROUND_UP(udev->u1_params.sel, 1000);
+       else if (state == USB3_LPM_U2)
+               sel = DIV_ROUND_UP(udev->u2_params.sel, 1000);
+       else
+               return false;
+
+       for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) {
+               struct usb_interface *intf;
+               struct usb_endpoint_descriptor *desc;
+               unsigned int interval;
+
+               intf = udev->actconfig->interface[i];
+               if (!intf)
+                       continue;
+
+               for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++) {
+                       desc = &intf->cur_altsetting->endpoint[j].desc;
+
+                       if (usb_endpoint_xfer_int(desc) ||
+                           usb_endpoint_xfer_isoc(desc)) {
+                               interval = (1 << (desc->bInterval - 1)) * 125;
+                               if (sel + 125 > interval)
+                                       return false;
+                       }
+               }
+       }
+       return true;
+}
+
  /*
   * Enable the hub-initiated U1/U2 idle timeouts, and enable device-initiated
   * U1/U2 entry.
@@ -4184,20 +4229,23 @@ static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev,
          * U1/U2_ENABLE
          */
         if (udev->actconfig &&
-           usb_set_device_initiated_lpm(udev, state, true) == 0) {
-               if (state == USB3_LPM_U1)
-                       udev->usb3_lpm_u1_enabled = 1;
-               else if (state == USB3_LPM_U2)
-                       udev->usb3_lpm_u2_enabled = 1;
-       } else {
-               /* Don't request U1/U2 entry if the device
-                * cannot transition to U1/U2.
-                */
-               usb_set_lpm_timeout(udev, state, 0);
-               hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state);
+           usb_device_may_initiate_lpm(udev, state)) {
+               if (usb_set_device_initiated_lpm(udev, state, true)) {
+                       /*
+                        * Request to enable device initiated U1/U2 failed,
+                        * better to turn off lpm in this case.
+                        */
+                       usb_set_lpm_timeout(udev, state, 0);
+                       hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state);
+                       return;
+               }
         }
-}
  
+       if (state == USB3_LPM_U1)
+               udev->usb3_lpm_u1_enabled = 1;
+       else if (state == USB3_LPM_U2)
+               udev->usb3_lpm_u2_enabled = 1;
+}
  /*
   * Disable the hub-initiated U1/U2 idle timeouts, and disable device-initiated
   * U1/U2 entry.
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c

index 6114cf8..8239fe7 100644 (file)
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -501,10 +501,6 @@ static const struct usb_device_id usb_quirk_list[] = {
         /* DJI CineSSD */
         { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
  
-       /* Fibocom L850-GL LTE Modem */
-       { USB_DEVICE(0x2cb7, 0x0007), .driver_info =
-                       USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
         /* INTEL VALUE SSD */
         { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
  
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h

index ab6b815..483de2b 100644 (file)
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -383,6 +383,9 @@ enum dwc2_ep0_state {
   *                     0 - No (default)
   *                     1 - Partial power down
   *                     2 - Hibernation
+ * @no_clock_gating:   Specifies whether to avoid clock gating feature.
+ *                     0 - No (use clock gating)
+ *                     1 - Yes (avoid it)
   * @lpm:               Enable LPM support.
   *                     0 - No
   *                     1 - Yes
@@ -480,6 +483,7 @@ struct dwc2_core_params {
  #define DWC2_POWER_DOWN_PARAM_NONE             0
  #define DWC2_POWER_DOWN_PARAM_PARTIAL          1
  #define DWC2_POWER_DOWN_PARAM_HIBERNATION      2
+       bool no_clock_gating;
  
         bool lpm;
         bool lpm_clock_gating;
diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c

index a5ab038..a5c52b2 100644 (file)
--- a/drivers/usb/dwc2/core_intr.c
+++ b/drivers/usb/dwc2/core_intr.c
@@ -556,7 +556,8 @@ static void dwc2_handle_usb_suspend_intr(struct dwc2_hsotg *hsotg)
                                  * If neither hibernation nor partial power down are supported,
                                  * clock gating is used to save power.
                                  */
-                               dwc2_gadget_enter_clock_gating(hsotg);
+                               if (!hsotg->params.no_clock_gating)
+                                       dwc2_gadget_enter_clock_gating(hsotg);
                         }
  
                         /*
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c

index c581ee4..3146df6 100644 (file)
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -2749,12 +2749,14 @@ static void dwc2_hsotg_complete_in(struct dwc2_hsotg *hsotg,
                 return;
         }
  
-       /* Zlp for all endpoints, for ep0 only in DATA IN stage */
+       /* Zlp for all endpoints in non DDMA, for ep0 only in DATA IN stage */
         if (hs_ep->send_zlp) {
-               dwc2_hsotg_program_zlp(hsotg, hs_ep);
                 hs_ep->send_zlp = 0;
-               /* transfer will be completed on next complete interrupt */
-               return;
+               if (!using_desc_dma(hsotg)) {
+                       dwc2_hsotg_program_zlp(hsotg, hs_ep);
+                       /* transfer will be completed on next complete interrupt */
+                       return;
+               }
         }
  
         if (hs_ep->index == 0 && hsotg->ep0_state == DWC2_EP0_DATA_IN) {
@@ -3900,9 +3902,27 @@ static void dwc2_hsotg_ep_stop_xfr(struct dwc2_hsotg *hsotg,
                                          __func__);
                 }
         } else {
+               /* Mask GINTSTS_GOUTNAKEFF interrupt */
+               dwc2_hsotg_disable_gsint(hsotg, GINTSTS_GOUTNAKEFF);
+
                 if (!(dwc2_readl(hsotg, GINTSTS) & GINTSTS_GOUTNAKEFF))
                         dwc2_set_bit(hsotg, DCTL, DCTL_SGOUTNAK);
  
+               if (!using_dma(hsotg)) {
+                       /* Wait for GINTSTS_RXFLVL interrupt */
+                       if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS,
+                                                   GINTSTS_RXFLVL, 100)) {
+                               dev_warn(hsotg->dev, "%s: timeout GINTSTS.RXFLVL\n",
+                                        __func__);
+                       } else {
+                               /*
+                                * Pop GLOBAL OUT NAK status packet from RxFIFO
+                                * to assert GOUTNAKEFF interrupt
+                                */
+                               dwc2_readl(hsotg, GRXSTSP);
+                       }
+               }
+
                 /* Wait for global nak to take effect */
                 if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS,
                                             GINTSTS_GOUTNAKEFF, 100))
@@ -4348,6 +4368,9 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now)
                 epctl = dwc2_readl(hs, epreg);
  
                 if (value) {
+                       /* Unmask GOUTNAKEFF interrupt */
+                       dwc2_hsotg_en_gsint(hs, GINTSTS_GOUTNAKEFF);
+
                         if (!(dwc2_readl(hs, GINTSTS) & GINTSTS_GOUTNAKEFF))
                                 dwc2_set_bit(hs, DCTL, DCTL_SGOUTNAK);
                         // STALL bit will be set in GOUTNAKEFF interrupt handler
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c

index 035d491..2a78289 100644 (file)
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -3338,7 +3338,8 @@ int dwc2_port_suspend(struct dwc2_hsotg *hsotg, u16 windex)
                  * If not hibernation nor partial power down are supported,
                  * clock gating is used to save power.
                  */
-               dwc2_host_enter_clock_gating(hsotg);
+               if (!hsotg->params.no_clock_gating)
+                       dwc2_host_enter_clock_gating(hsotg);
                 break;
         }
  
@@ -4402,7 +4403,8 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd)
                  * If not hibernation nor partial power down are supported,
                  * clock gating is used to save power.
                  */
-               dwc2_host_enter_clock_gating(hsotg);
+               if (!hsotg->params.no_clock_gating)
+                       dwc2_host_enter_clock_gating(hsotg);
  
                 /* After entering suspend, hardware is not accessible */
                 clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c

index 67c5eb1..59e1193 100644 (file)
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -76,6 +76,7 @@ static void dwc2_set_s3c6400_params(struct dwc2_hsotg *hsotg)
         struct dwc2_core_params *p = &hsotg->params;
  
         p->power_down = DWC2_POWER_DOWN_PARAM_NONE;
+       p->no_clock_gating = true;
         p->phy_utmi_width = 8;
  }
  
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h

index dccdf13..5991766 100644 (file)
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1279,6 +1279,7 @@ struct dwc3 {
         unsigned                dis_metastability_quirk:1;
  
         unsigned                dis_split_quirk:1;
+       unsigned                async_callbacks:1;
  
         u16                     imod_interval;
  };
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c

index 3cd2942..2f9e45e 100644 (file)
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -597,11 +597,13 @@ static int dwc3_ep0_set_address(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
  
  static int dwc3_ep0_delegate_req(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
  {
-       int ret;
+       int ret = -EINVAL;
  
-       spin_unlock(&dwc->lock);
-       ret = dwc->gadget_driver->setup(dwc->gadget, ctrl);
-       spin_lock(&dwc->lock);
+       if (dwc->async_callbacks) {
+               spin_unlock(&dwc->lock);
+               ret = dwc->gadget_driver->setup(dwc->gadget, ctrl);
+               spin_lock(&dwc->lock);
+       }
         return ret;
  }
  
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c

index af6d7f1..45f2bc0 100644 (file)
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2585,6 +2585,16 @@ static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA)
         return ret;
  }
  
+static void dwc3_gadget_async_callbacks(struct usb_gadget *g, bool enable)
+{
+       struct dwc3             *dwc = gadget_to_dwc(g);
+       unsigned long           flags;
+
+       spin_lock_irqsave(&dwc->lock, flags);
+       dwc->async_callbacks = enable;
+       spin_unlock_irqrestore(&dwc->lock, flags);
+}
+
  static const struct usb_gadget_ops dwc3_gadget_ops = {
         .get_frame              = dwc3_gadget_get_frame,
         .wakeup                 = dwc3_gadget_wakeup,
@@ -2596,6 +2606,7 @@ static const struct usb_gadget_ops dwc3_gadget_ops = {
         .udc_set_ssp_rate       = dwc3_gadget_set_ssp_rate,
         .get_config_params      = dwc3_gadget_config_params,
         .vbus_draw              = dwc3_gadget_vbus_draw,
+       .udc_async_callbacks    = dwc3_gadget_async_callbacks,
  };
  
  /* -------------------------------------------------------------------------- */
@@ -3231,7 +3242,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc,
  
  static void dwc3_disconnect_gadget(struct dwc3 *dwc)
  {
-       if (dwc->gadget_driver && dwc->gadget_driver->disconnect) {
+       if (dwc->async_callbacks && dwc->gadget_driver->disconnect) {
                 spin_unlock(&dwc->lock);
                 dwc->gadget_driver->disconnect(dwc->gadget);
                 spin_lock(&dwc->lock);
@@ -3240,7 +3251,7 @@ static void dwc3_disconnect_gadget(struct dwc3 *dwc)
  
  static void dwc3_suspend_gadget(struct dwc3 *dwc)
  {
-       if (dwc->gadget_driver && dwc->gadget_driver->suspend) {
+       if (dwc->async_callbacks && dwc->gadget_driver->suspend) {
                 spin_unlock(&dwc->lock);
                 dwc->gadget_driver->suspend(dwc->gadget);
                 spin_lock(&dwc->lock);
@@ -3249,7 +3260,7 @@ static void dwc3_suspend_gadget(struct dwc3 *dwc)
  
  static void dwc3_resume_gadget(struct dwc3 *dwc)
  {
-       if (dwc->gadget_driver && dwc->gadget_driver->resume) {
+       if (dwc->async_callbacks && dwc->gadget_driver->resume) {
                 spin_unlock(&dwc->lock);
                 dwc->gadget_driver->resume(dwc->gadget);
                 spin_lock(&dwc->lock);
@@ -3261,7 +3272,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc)
         if (!dwc->gadget_driver)
                 return;
  
-       if (dwc->gadget->speed != USB_SPEED_UNKNOWN) {
+       if (dwc->async_callbacks && dwc->gadget->speed != USB_SPEED_UNKNOWN) {
                 spin_unlock(&dwc->lock);
                 usb_gadget_udc_reset(dwc->gadget, dwc->gadget_driver);
                 spin_lock(&dwc->lock);
@@ -3585,7 +3596,7 @@ static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc)
          * implemented.
          */
  
-       if (dwc->gadget_driver && dwc->gadget_driver->resume) {
+       if (dwc->async_callbacks && dwc->gadget_driver->resume) {
                 spin_unlock(&dwc->lock);
                 dwc->gadget_driver->resume(dwc->gadget);
                 spin_lock(&dwc->lock);
diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c

index bffef8e..281ca76 100644 (file)
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -1198,7 +1198,7 @@ void gserial_free_line(unsigned char port_num)
         struct gs_port  *port;
  
         mutex_lock(&ports[port_num].lock);
-       if (WARN_ON(!ports[port_num].port)) {
+       if (!ports[port_num].port) {
                 mutex_unlock(&ports[port_num].lock);
                 return;
         }
diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c

index a54d1ce..c0ca714 100644 (file)
--- a/drivers/usb/gadget/udc/tegra-xudc.c
+++ b/drivers/usb/gadget/udc/tegra-xudc.c
@@ -3853,6 +3853,7 @@ static int tegra_xudc_probe(struct platform_device *pdev)
         return 0;
  
  free_eps:
+       pm_runtime_disable(&pdev->dev);
         tegra_xudc_free_eps(xudc);
  free_event_ring:
         tegra_xudc_free_event_ring(xudc);
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c

index 36f5bf6..10b0365 100644 (file)
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -703,24 +703,28 @@ EXPORT_SYMBOL_GPL(ehci_setup);
  static irqreturn_t ehci_irq (struct usb_hcd *hcd)
  {
         struct ehci_hcd         *ehci = hcd_to_ehci (hcd);
-       u32                     status, masked_status, pcd_status = 0, cmd;
+       u32                     status, current_status, masked_status, pcd_status = 0;
+       u32                     cmd;
         int                     bh;
  
         spin_lock(&ehci->lock);
  
-       status = ehci_readl(ehci, &ehci->regs->status);
+       status = 0;
+       current_status = ehci_readl(ehci, &ehci->regs->status);
+restart:
  
         /* e.g. cardbus physical eject */
-       if (status == ~(u32) 0) {
+       if (current_status == ~(u32) 0) {
                 ehci_dbg (ehci, "device removed\n");
                 goto dead;
         }
+       status |= current_status;
  
         /*
          * We don't use STS_FLR, but some controllers don't like it to
          * remain on, so mask it out along with the other status bits.
          */
-       masked_status = status & (INTR_MASK | STS_FLR);
+       masked_status = current_status & (INTR_MASK | STS_FLR);
  
         /* Shared IRQ? */
         if (!masked_status || unlikely(ehci->rh_state == EHCI_RH_HALTED)) {
@@ -730,6 +734,12 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd)
  
         /* clear (just) interrupts */
         ehci_writel(ehci, masked_status, &ehci->regs->status);
+
+       /* For edge interrupts, don't race with an interrupt bit being raised */
+       current_status = ehci_readl(ehci, &ehci->regs->status);
+       if (current_status & INTR_MASK)
+               goto restart;
+
         cmd = ehci_readl(ehci, &ehci->regs->command);
         bh = 0;
  
diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c

index e7a8e06..59cc1bc 100644 (file)
--- a/drivers/usb/host/max3421-hcd.c
+++ b/drivers/usb/host/max3421-hcd.c
@@ -153,8 +153,6 @@ struct max3421_hcd {
          */
         struct urb *curr_urb;
         enum scheduling_pass sched_pass;
-       struct usb_device *loaded_dev;  /* dev that's loaded into the chip */
-       int loaded_epnum;               /* epnum whose toggles are loaded */
         int urb_done;                   /* > 0 -> no errors, < 0: errno */
         size_t curr_len;
         u8 hien;
@@ -492,39 +490,17 @@ max3421_set_speed(struct usb_hcd *hcd, struct usb_device *dev)
   * Caller must NOT hold HCD spinlock.
   */
  static void
-max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum,
-                   int force_toggles)
+max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum)
  {
-       struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
-       int old_epnum, same_ep, rcvtog, sndtog;
-       struct usb_device *old_dev;
+       int rcvtog, sndtog;
         u8 hctl;
  
-       old_dev = max3421_hcd->loaded_dev;
-       old_epnum = max3421_hcd->loaded_epnum;
-
-       same_ep = (dev == old_dev && epnum == old_epnum);
-       if (same_ep && !force_toggles)
-               return;
-
-       if (old_dev && !same_ep) {
-               /* save the old end-points toggles: */
-               u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL);
-
-               rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1;
-               sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1;
-
-               /* no locking: HCD (i.e., we) own toggles, don't we? */
-               usb_settoggle(old_dev, old_epnum, 0, rcvtog);
-               usb_settoggle(old_dev, old_epnum, 1, sndtog);
-       }
         /* setup new endpoint's toggle bits: */
         rcvtog = usb_gettoggle(dev, epnum, 0);
         sndtog = usb_gettoggle(dev, epnum, 1);
         hctl = (BIT(rcvtog + MAX3421_HCTL_RCVTOG0_BIT) |
                 BIT(sndtog + MAX3421_HCTL_SNDTOG0_BIT));
  
-       max3421_hcd->loaded_epnum = epnum;
         spi_wr8(hcd, MAX3421_REG_HCTL, hctl);
  
         /*
@@ -532,7 +508,6 @@ max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum,
          * address-assignment so it's best to just always load the
          * address whenever the end-point changed/was forced.
          */
-       max3421_hcd->loaded_dev = dev;
         spi_wr8(hcd, MAX3421_REG_PERADDR, dev->devnum);
  }
  
@@ -667,7 +642,7 @@ max3421_select_and_start_urb(struct usb_hcd *hcd)
         struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
         struct urb *urb, *curr_urb = NULL;
         struct max3421_ep *max3421_ep;
-       int epnum, force_toggles = 0;
+       int epnum;
         struct usb_host_endpoint *ep;
         struct list_head *pos;
         unsigned long flags;
@@ -777,7 +752,6 @@ done:
                         usb_settoggle(urb->dev, epnum, 0, 1);
                         usb_settoggle(urb->dev, epnum, 1, 1);
                         max3421_ep->pkt_state = PKT_STATE_SETUP;
-                       force_toggles = 1;
                 } else
                         max3421_ep->pkt_state = PKT_STATE_TRANSFER;
         }
@@ -785,7 +759,7 @@ done:
         spin_unlock_irqrestore(&max3421_hcd->lock, flags);
  
         max3421_ep->last_active = max3421_hcd->frame_number;
-       max3421_set_address(hcd, urb->dev, epnum, force_toggles);
+       max3421_set_address(hcd, urb->dev, epnum);
         max3421_set_speed(hcd, urb->dev);
         max3421_next_transfer(hcd, 0);
         return 1;
@@ -1379,6 +1353,16 @@ max3421_urb_done(struct usb_hcd *hcd)
                 status = 0;
         urb = max3421_hcd->curr_urb;
         if (urb) {
+               /* save the old end-points toggles: */
+               u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL);
+               int rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1;
+               int sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1;
+               int epnum = usb_endpoint_num(&urb->ep->desc);
+
+               /* no locking: HCD (i.e., we) own toggles, don't we? */
+               usb_settoggle(urb->dev, epnum, 0, rcvtog);
+               usb_settoggle(urb->dev, epnum, 1, sndtog);
+
                 max3421_hcd->curr_urb = NULL;
                 spin_lock_irqsave(&max3421_hcd->lock, flags);
                 usb_hcd_unlink_urb_from_ep(hcd, urb);
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c

index e9b18fc..151e93c 100644 (file)
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1638,11 +1638,12 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf)
          * Inform the usbcore about resume-in-progress by returning
          * a non-zero value even if there are no status changes.
          */
+       spin_lock_irqsave(&xhci->lock, flags);
+
         status = bus_state->resuming_ports;
  
         mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC;
  
-       spin_lock_irqsave(&xhci->lock, flags);
         /* For each port, did anything change?  If so, set that bit in buf. */
         for (i = 0; i < max_ports; i++) {
                 temp = readl(ports[i]->addr);
diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c

index 1da6479..5923844 100644 (file)
--- a/drivers/usb/host/xhci-pci-renesas.c
+++ b/drivers/usb/host/xhci-pci-renesas.c
@@ -207,8 +207,7 @@ static int renesas_check_rom_state(struct pci_dev *pdev)
                         return 0;
  
                 case RENESAS_ROM_STATUS_NO_RESULT: /* No result yet */
-                       dev_dbg(&pdev->dev, "Unknown ROM status ...\n");
-                       break;
+                       return 0;
  
                 case RENESAS_ROM_STATUS_ERROR: /* Error State */
                 default: /* All other states are marked as "Reserved states" */
@@ -225,12 +224,13 @@ static int renesas_fw_check_running(struct pci_dev *pdev)
         u8 fw_state;
         int err;
  
-       /*
-        * Only if device has ROM and loaded FW we can skip loading and
-        * return success. Otherwise (even unknown state), attempt to load FW.
-        */
-       if (renesas_check_rom(pdev) && !renesas_check_rom_state(pdev))
-               return 0;
+       /* Check if device has ROM and loaded, if so skip everything */
+       err = renesas_check_rom(pdev);
+       if (err) { /* we have rom */
+               err = renesas_check_rom_state(pdev);
+               if (!err)
+                       return err;
+       }
  
         /*
          * Test if the device is actually needing the firmware. As most
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c

index 18c2bbd..1c9a795 100644 (file)
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -636,7 +636,14 @@ static const struct pci_device_id pci_ids[] = {
         { /* end: all zeroes */ }
  };
  MODULE_DEVICE_TABLE(pci, pci_ids);
+
+/*
+ * Without CONFIG_USB_XHCI_PCI_RENESAS renesas_xhci_check_request_fw() won't
+ * load firmware, so don't encumber the xhci-pci driver with it.
+ */
+#if IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS)
  MODULE_FIRMWARE("renesas_usb_fw.mem");
+#endif
  
  /* pci driver glue; this is a "new style" PCI driver module */
  static struct pci_driver xhci_pci_driver = {
diff --git a/drivers/usb/phy/phy.c b/drivers/usb/phy/phy.c

index 83ed508..1b24492 100644 (file)
--- a/drivers/usb/phy/phy.c
+++ b/drivers/usb/phy/phy.c
@@ -86,10 +86,10 @@ static struct usb_phy *__device_to_usb_phy(struct device *dev)
  
         list_for_each_entry(usb_phy, &phy_list, head) {
                 if (usb_phy->dev == dev)
-                       break;
+                       return usb_phy;
         }
  
-       return usb_phy;
+       return NULL;
  }
  
  static void usb_phy_set_default_current(struct usb_phy *usb_phy)
@@ -150,8 +150,14 @@ static int usb_phy_uevent(struct device *dev, struct kobj_uevent_env *env)
         struct usb_phy *usb_phy;
         char uchger_state[50] = { 0 };
         char uchger_type[50] = { 0 };
+       unsigned long flags;
  
+       spin_lock_irqsave(&phy_lock, flags);
         usb_phy = __device_to_usb_phy(dev);
+       spin_unlock_irqrestore(&phy_lock, flags);
+
+       if (!usb_phy)
+               return -ENODEV;
  
         snprintf(uchger_state, ARRAY_SIZE(uchger_state),
                  "USB_CHARGER_STATE=%s", usb_chger_state[usb_phy->chg_state]);
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c

index b5e7991..a3c2b01 100644 (file)
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -101,6 +101,8 @@ static struct dma_chan *usbhsf_dma_chan_get(struct usbhs_fifo *fifo,
  #define usbhsf_dma_map(p)      __usbhsf_dma_map_ctrl(p, 1)
  #define usbhsf_dma_unmap(p)    __usbhsf_dma_map_ctrl(p, 0)
  static int __usbhsf_dma_map_ctrl(struct usbhs_pkt *pkt, int map);
+static void usbhsf_tx_irq_ctrl(struct usbhs_pipe *pipe, int enable);
+static void usbhsf_rx_irq_ctrl(struct usbhs_pipe *pipe, int enable);
  struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt)
  {
         struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe);
@@ -123,6 +125,11 @@ struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt)
                 if (chan) {
                         dmaengine_terminate_all(chan);
                         usbhsf_dma_unmap(pkt);
+               } else {
+                       if (usbhs_pipe_is_dir_in(pipe))
+                               usbhsf_rx_irq_ctrl(pipe, 0);
+                       else
+                               usbhsf_tx_irq_ctrl(pipe, 0);
                 }
  
                 usbhs_pipe_clear_without_sequence(pipe, 0, 0);
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c

index 09b845d..3c80bfb 100644 (file)
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -155,6 +155,7 @@ static const struct usb_device_id id_table[] = {
         { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */
         { USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */
         { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
+       { USB_DEVICE(0x10C4, 0x8A5B) }, /* CEL EM3588 ZigBee USB Stick */
         { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
         { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */
         { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
@@ -202,8 +203,8 @@ static const struct usb_device_id id_table[] = {
         { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */
         { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */
         { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */
-       { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 Display serial interface */
-       { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 M.2 Key E serial interface */
+       { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 M.2 Key E serial interface */
+       { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 Display serial interface */
         { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */
         { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */
         { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c

index 7608584..0fbe253 100644 (file)
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -238,6 +238,7 @@ static void option_instat_callback(struct urb *urb);
  #define QUECTEL_PRODUCT_UC15                   0x9090
  /* These u-blox products use Qualcomm's vendor ID */
  #define UBLOX_PRODUCT_R410M                    0x90b2
+#define UBLOX_PRODUCT_R6XX                     0x90fa
  /* These Yuga products use Qualcomm's vendor ID */
  #define YUGA_PRODUCT_CLM920_NC5                        0x9625
  
@@ -1101,6 +1102,8 @@ static const struct usb_device_id option_ids[] = {
         /* u-blox products using Qualcomm vendor ID */
         { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M),
           .driver_info = RSVD(1) | RSVD(3) },
+       { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX),
+         .driver_info = RSVD(3) },
         /* Quectel products using Quectel vendor ID */
         { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff),
           .driver_info = NUMEP2 },
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h

index f9677a5..c35a6db 100644 (file)
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -45,6 +45,13 @@ UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999,
                 USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                 US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME),
  
+/* Reported-by: Julian Sikorski <belegdol@gmail.com> */
+UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999,
+               "LaCie",
+               "Rugged USB3-FW",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_IGNORE_UAS),
+
  /*
   * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI
   * commands in UAS mode.  Observed with the 1.28 firmware; are there others?
diff --git a/drivers/usb/typec/stusb160x.c b/drivers/usb/typec/stusb160x.c

index 6eaeba9..e7745d1 100644 (file)
--- a/drivers/usb/typec/stusb160x.c
+++ b/drivers/usb/typec/stusb160x.c
@@ -685,6 +685,15 @@ static int stusb160x_probe(struct i2c_client *client)
         if (!fwnode)
                 return -ENODEV;
  
+       /*
+        * This fwnode has a "compatible" property, but is never populated as a
+        * struct device. Instead we simply parse it to read the properties.
+        * This it breaks fw_devlink=on. To maintain backward compatibility
+        * with existing DT files, we work around this by deleting any
+        * fwnode_links to/from this fwnode.
+        */
+       fw_devlink_purge_absent_suppliers(fwnode);
+
         /*
          * When both VDD and VSYS power supplies are present, the low power
          * supply VSYS is selected when VSYS voltage is above 3.1 V.
@@ -739,10 +748,6 @@ static int stusb160x_probe(struct i2c_client *client)
         typec_set_pwr_opmode(chip->port, chip->pwr_opmode);
  
         if (client->irq) {
-               ret = stusb160x_irq_init(chip, client->irq);
-               if (ret)
-                       goto port_unregister;
-
                 chip->role_sw = fwnode_usb_role_switch_get(fwnode);
                 if (IS_ERR(chip->role_sw)) {
                         ret = PTR_ERR(chip->role_sw);
@@ -752,6 +757,10 @@ static int stusb160x_probe(struct i2c_client *client)
                                         ret);
                         goto port_unregister;
                 }
+
+               ret = stusb160x_irq_init(chip, client->irq);
+               if (ret)
+                       goto role_sw_put;
         } else {
                 /*
                  * If Source or Dual power role, need to enable VDD supply
@@ -775,6 +784,9 @@ static int stusb160x_probe(struct i2c_client *client)
  
         return 0;
  
+role_sw_put:
+       if (chip->role_sw)
+               usb_role_switch_put(chip->role_sw);
  port_unregister:
         typec_unregister_port(chip->port);
  all_reg_disable:
diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c

index 938219b..21b3ae2 100644 (file)
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -629,6 +629,15 @@ static int tps6598x_probe(struct i2c_client *client)
         if (!fwnode)
                 return -ENODEV;
  
+       /*
+        * This fwnode has a "compatible" property, but is never populated as a
+        * struct device. Instead we simply parse it to read the properties.
+        * This breaks fw_devlink=on. To maintain backward compatibility
+        * with existing DT files, we work around this by deleting any
+        * fwnode_links to/from this fwnode.
+        */
+       fw_devlink_purge_absent_suppliers(fwnode);
+
         tps->role_sw = fwnode_usb_role_switch_get(fwnode);
         if (IS_ERR(tps->role_sw)) {
                 ret = PTR_ERR(tps->role_sw);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c

index 2a31467..de8e8a1 100644 (file)
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -573,7 +573,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
         MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
         MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
-       MLX5_SET(cqc, cqc, c_eqn, eqn);
+       MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
         MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
  
         err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt

index 06fb7a9..4d5ae61 100644 (file)
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -168,21 +168,6 @@ config OSF4_COMPAT
           with v4 shared libraries freely available from Compaq. If you're
           going to use shared libraries from Tru64 version 5.0 or later, say N.
  
-config BINFMT_EM86
-       tristate "Kernel support for Linux/Intel ELF binaries"
-       depends on ALPHA
-       help
-         Say Y here if you want to be able to execute Linux/Intel ELF
-         binaries just like native Alpha binaries on your Alpha machine. For
-         this to work, you need to have the emulator /usr/bin/em86 in place.
-
-         You can get the same functionality by saying N here and saying Y to
-         "Kernel support for MISC binaries".
-
-         You may answer M to compile the emulation support as a module and
-         later load the module when you want to use a Linux/Intel binary. The
-         module will be called binfmt_em86. If unsure, say Y.
-
  config BINFMT_MISC
         tristate "Kernel support for MISC binaries"
         help
diff --git a/fs/Makefile b/fs/Makefile

index 9c708e1..f98f3e6 100644 (file)
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -39,7 +39,6 @@ obj-$(CONFIG_FS_ENCRYPTION)   += crypto/
  obj-$(CONFIG_FS_VERITY)                += verity/
  obj-$(CONFIG_FILE_LOCKING)      += locks.o
  obj-$(CONFIG_BINFMT_AOUT)      += binfmt_aout.o
-obj-$(CONFIG_BINFMT_EM86)      += binfmt_em86.o
  obj-$(CONFIG_BINFMT_MISC)      += binfmt_misc.o
  obj-$(CONFIG_BINFMT_SCRIPT)    += binfmt_script.o
  obj-$(CONFIG_BINFMT_ELF)       += binfmt_elf.o
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c

deleted file mode 100644 (file)

index 06b9b9f..0000000
--- a/fs/binfmt_em86.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  linux/fs/binfmt_em86.c
- *
- *  Based on linux/fs/binfmt_script.c
- *  Copyright (C) 1996  Martin von Löwis
- *  original #!-checking implemented by tytso.
- *
- *  em86 changes Copyright (C) 1997  Jim Paradis
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/binfmts.h>
-#include <linux/elf.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/errno.h>
-
-
-#define EM86_INTERP    "/usr/bin/em86"
-#define EM86_I_NAME    "em86"
-
-static int load_em86(struct linux_binprm *bprm)
-{
-       const char *i_name, *i_arg;
-       char *interp;
-       struct file * file;
-       int retval;
-       struct elfhdr   elf_ex;
-
-       /* Make sure this is a Linux/Intel ELF executable... */
-       elf_ex = *((struct elfhdr *)bprm->buf);
-
-       if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
-               return  -ENOEXEC;
-
-       /* First of all, some simple consistency checks */
-       if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) ||
-               (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) ||
-               !bprm->file->f_op->mmap) {
-                       return -ENOEXEC;
-       }
-
-       /* Need to be able to load the file after exec */
-       if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
-               return -ENOENT;
-
-       /* Unlike in the script case, we don't have to do any hairy
-        * parsing to find our interpreter... it's hardcoded!
-        */
-       interp = EM86_INTERP;
-       i_name = EM86_I_NAME;
-       i_arg = NULL;           /* We reserve the right to add an arg later */
-
-       /*
-        * Splice in (1) the interpreter's name for argv[0]
-        *           (2) (optional) argument to interpreter
-        *           (3) filename of emulated file (replace argv[0])
-        *
-        * This is done in reverse order, because of how the
-        * user environment and arguments are stored.
-        */
-       remove_arg_zero(bprm);
-       retval = copy_string_kernel(bprm->filename, bprm);
-       if (retval < 0) return retval; 
-       bprm->argc++;
-       if (i_arg) {
-               retval = copy_string_kernel(i_arg, bprm);
-               if (retval < 0) return retval; 
-               bprm->argc++;
-       }
-       retval = copy_string_kernel(i_name, bprm);
-       if (retval < 0) return retval;
-       bprm->argc++;
-
-       /*
-        * OK, now restart the process with the interpreter's inode.
-        * Note that we use open_exec() as the name is now in kernel
-        * space, and we don't need to copy it.
-        */
-       file = open_exec(interp);
-       if (IS_ERR(file))
-               return PTR_ERR(file);
-
-       bprm->interpreter = file;
-       return 0;
-}
-
-static struct linux_binfmt em86_format = {
-       .module         = THIS_MODULE,
-       .load_binary    = load_em86,
-};
-
-static int __init init_em86_binfmt(void)
-{
-       register_binfmt(&em86_format);
-       return 0;
-}
-
-static void __exit exit_em86_binfmt(void)
-{
-       unregister_binfmt(&em86_format);
-}
-
-core_initcall(init_em86_binfmt);
-module_exit(exit_em86_binfmt);
-MODULE_LICENSE("GPL");
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 0c424a0..9ef4f1f 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode)
         free_percpu(bdev->bd_stats);
         kfree(bdev->bd_meta_info);
  
+       if (!bdev_is_partition(bdev))
+               kfree(bdev->bd_disk);
         kmem_cache_free(bdev_cachep, BDEV_I(inode));
  }
  
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c

index 7a8a2fc..78b202d 100644 (file)
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1488,15 +1488,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
  int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
                          struct btrfs_fs_info *fs_info, u64 bytenr,
                          u64 time_seq, struct ulist **roots,
-                        bool ignore_offset)
+                        bool ignore_offset, bool skip_commit_root_sem)
  {
         int ret;
  
-       if (!trans)
+       if (!trans && !skip_commit_root_sem)
                 down_read(&fs_info->commit_root_sem);
         ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
                                         time_seq, roots, ignore_offset);
-       if (!trans)
+       if (!trans && !skip_commit_root_sem)
                 up_read(&fs_info->commit_root_sem);
         return ret;
  }
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h

index 17abde7..ff5f07f 100644 (file)
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -47,7 +47,8 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
                          const u64 *extent_item_pos, bool ignore_offset);
  int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
                          struct btrfs_fs_info *fs_info, u64 bytenr,
-                        u64 time_seq, struct ulist **roots, bool ignore_offset);
+                        u64 time_seq, struct ulist **roots, bool ignore_offset,
+                        bool skip_commit_root_sem);
  char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                         u32 name_len, unsigned long name_off,
                         struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c

index 9a023ae..30d82cd 100644 (file)
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -352,7 +352,7 @@ static void end_compressed_bio_write(struct bio *bio)
         btrfs_record_physical_zoned(inode, cb->start, bio);
         btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
                         cb->start, cb->start + cb->len - 1,
-                       bio->bi_status == BLK_STS_OK);
+                       !cb->errors);
  
         end_compressed_writeback(inode, cb);
         /* note, our inode could be gone now */
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c

index 06bc842..ca848b1 100644 (file)
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -974,7 +974,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
                 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
  
         if (qrecord_inserted)
-               btrfs_qgroup_trace_extent_post(fs_info, record);
+               btrfs_qgroup_trace_extent_post(trans, record);
  
         return 0;
  }
@@ -1069,7 +1069,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
  
  
         if (qrecord_inserted)
-               return btrfs_qgroup_trace_extent_post(fs_info, record);
+               return btrfs_qgroup_trace_extent_post(trans, record);
         return 0;
  }
  
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index b117dd3..a59ab7b 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -209,7 +209,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
  static void csum_tree_block(struct extent_buffer *buf, u8 *result)
  {
         struct btrfs_fs_info *fs_info = buf->fs_info;
-       const int num_pages = fs_info->nodesize >> PAGE_SHIFT;
+       const int num_pages = num_extent_pages(buf);
         const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
         SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
         char *kaddr;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index d296483..268ce58 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6019,6 +6019,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
         mutex_lock(&fs_info->fs_devices->device_list_mutex);
         devices = &fs_info->fs_devices->devices;
         list_for_each_entry(device, devices, dev_list) {
+               if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
+                       continue;
+
                 ret = btrfs_trim_free_extents(device, &group_trimmed);
                 if (ret) {
                         dev_failed++;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 8f60314..0117d86 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2992,7 +2992,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                 goto out;
         }
  
-       if (ordered_extent->disk)
+       if (ordered_extent->bdev)
                 btrfs_rewrite_logical_zoned(ordered_extent);
  
         btrfs_free_io_failure_record(inode, start, end);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 6eb41b7..5c0f848 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -190,8 +190,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
         entry->truncated_len = (u64)-1;
         entry->qgroup_rsv = ret;
         entry->physical = (u64)-1;
-       entry->disk = NULL;
-       entry->partno = (u8)-1;
  
         ASSERT(type == BTRFS_ORDERED_REGULAR ||
                type == BTRFS_ORDERED_NOCOW ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h

index 5664720..b2d88ab 100644 (file)
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -145,8 +145,7 @@ struct btrfs_ordered_extent {
          * command in a workqueue context
          */
         u64 physical;
-       struct gendisk *disk;
-       u8 partno;
+       struct block_device *bdev;
  };
  
  /*
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index 07ec06d..0fa1211 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1704,17 +1704,39 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
         return 0;
  }
  
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
                                    struct btrfs_qgroup_extent_record *qrecord)
  {
         struct ulist *old_root;
         u64 bytenr = qrecord->bytenr;
         int ret;
  
-       ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
+       /*
+        * We are always called in a context where we are already holding a
+        * transaction handle. Often we are called when adding a data delayed
+        * reference from btrfs_truncate_inode_items() (truncating or unlinking),
+        * in which case we will be holding a write lock on extent buffer from a
+        * subvolume tree. In this case we can't allow btrfs_find_all_roots() to
+        * acquire fs_info->commit_root_sem, because that is a higher level lock
+        * that must be acquired before locking any extent buffers.
+        *
+        * So we want btrfs_find_all_roots() to not acquire the commit_root_sem
+        * but we can't pass it a non-NULL transaction handle, because otherwise
+        * it would not use commit roots and would lock extent buffers, causing
+        * a deadlock if it ends up trying to read lock the same extent buffer
+        * that was previously write locked at btrfs_truncate_inode_items().
+        *
+        * So pass a NULL transaction handle to btrfs_find_all_roots() and
+        * explicitly tell it to not acquire the commit_root_sem - if we are
+        * holding a transaction handle we don't need its protection.
+        */
+       ASSERT(trans != NULL);
+
+       ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
+                                  false, true);
         if (ret < 0) {
-               fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-               btrfs_warn(fs_info,
+               trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+               btrfs_warn(trans->fs_info,
  "error accounting new delayed refs extent (err code: %d), quota inconsistent",
                         ret);
                 return 0;
@@ -1758,7 +1780,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
                 kfree(record);
                 return 0;
         }
-       return btrfs_qgroup_trace_extent_post(fs_info, record);
+       return btrfs_qgroup_trace_extent_post(trans, record);
  }
  
  int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
@@ -2629,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
                                 /* Search commit root to find old_roots */
                                 ret = btrfs_find_all_roots(NULL, fs_info,
                                                 record->bytenr, 0,
-                                               &record->old_roots, false);
+                                               &record->old_roots, false, false);
                                 if (ret < 0)
                                         goto cleanup;
                         }
@@ -2645,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
                          * current root. It's safe inside commit_transaction().
                          */
                         ret = btrfs_find_all_roots(trans, fs_info,
-                               record->bytenr, BTRFS_SEQ_LAST, &new_roots, false);
+                          record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false);
                         if (ret < 0)
                                 goto cleanup;
                         if (qgroup_to_skip) {
@@ -3179,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
                         num_bytes = found.offset;
  
                 ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
-                                          &roots, false);
+                                          &roots, false, false);
                 if (ret < 0)
                         goto out;
                 /* For rescan, just pass old_roots as NULL */
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h

index 7283e4f..880e9df 100644 (file)
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -298,7 +298,7 @@ int btrfs_qgroup_trace_extent_nolock(
   * using current root, then we can move all expensive backref walk out of
   * transaction committing, but not now as qgroup accounting will be wrong again.
   */
-int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
+int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
                                    struct btrfs_qgroup_extent_record *qrecord);
  
  /*
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c

index f313728..98b5aab 100644 (file)
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -224,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
          * quota.
          */
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -237,7 +237,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
                 return ret;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
@@ -261,7 +261,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
         new_roots = NULL;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -273,7 +273,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
                 return -EINVAL;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
@@ -325,7 +325,7 @@ static int test_multiple_refs(struct btrfs_root *root,
         }
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -338,7 +338,7 @@ static int test_multiple_refs(struct btrfs_root *root,
                 return ret;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
@@ -360,7 +360,7 @@ static int test_multiple_refs(struct btrfs_root *root,
         }
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -373,7 +373,7 @@ static int test_multiple_refs(struct btrfs_root *root,
                 return ret;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
@@ -401,7 +401,7 @@ static int test_multiple_refs(struct btrfs_root *root,
         }
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 test_err("couldn't find old roots: %d", ret);
@@ -414,7 +414,7 @@ static int test_multiple_refs(struct btrfs_root *root,
                 return ret;
  
         ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
-                       false);
+                       false, false);
         if (ret) {
                 ulist_free(old_roots);
                 ulist_free(new_roots);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index dc6eb08..e6430ac 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5526,16 +5526,29 @@ log_extents:
                 spin_lock(&inode->lock);
                 inode->logged_trans = trans->transid;
                 /*
-                * Don't update last_log_commit if we logged that an inode exists
-                * after it was loaded to memory (full_sync bit set).
-                * This is to prevent data loss when we do a write to the inode,
-                * then the inode gets evicted after all delalloc was flushed,
-                * then we log it exists (due to a rename for example) and then
-                * fsync it. This last fsync would do nothing (not logging the
-                * extents previously written).
+                * Don't update last_log_commit if we logged that an inode exists.
+                * We do this for two reasons:
+                *
+                * 1) We might have had buffered writes to this inode that were
+                *    flushed and had their ordered extents completed in this
+                *    transaction, but we did not previously log the inode with
+                *    LOG_INODE_ALL. Later the inode was evicted and after that
+                *    it was loaded again and this LOG_INODE_EXISTS log operation
+                *    happened. We must make sure that if an explicit fsync against
+                *    the inode is performed later, it logs the new extents, an
+                *    updated inode item, etc, and syncs the log. The same logic
+                *    applies to direct IO writes instead of buffered writes.
+                *
+                * 2) When we log the inode with LOG_INODE_EXISTS, its inode item
+                *    is logged with an i_size of 0 or whatever value was logged
+                *    before. If later the i_size of the inode is increased by a
+                *    truncate operation, the log is synced through an fsync of
+                *    some other inode and then finally an explicit fsync against
+                *    this inode is made, we must make sure this fsync logs the
+                *    inode with the new i_size, the hole between old i_size and
+                *    the new i_size, and syncs the log.
                  */
-               if (inode_only != LOG_INODE_EXISTS ||
-                   !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
+               if (inode_only != LOG_INODE_EXISTS)
                         inode->last_log_commit = inode->last_sub_trans;
                 spin_unlock(&inode->lock);
         }
@@ -6490,8 +6503,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
          * if this inode hasn't been logged and directory we're renaming it
          * from hasn't been logged, we don't need to log it
          */
-       if (inode->logged_trans < trans->transid &&
-           (!old_dir || old_dir->logged_trans < trans->transid))
+       if (!inode_logged(trans, inode) &&
+           (!old_dir || !inode_logged(trans, old_dir)))
                 return;
  
         /*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 1e4d43f..70f94b7 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1078,6 +1078,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
                 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                         list_del_init(&device->dev_alloc_list);
                         clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+                       fs_devices->rw_devices--;
                 }
                 list_del_init(&device->dev_list);
                 fs_devices->num_devices--;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c

index 297c0b1..907c2cc 100644 (file)
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1349,8 +1349,7 @@ void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
                 return;
  
         ordered->physical = physical;
-       ordered->disk = bio->bi_bdev->bd_disk;
-       ordered->partno = bio->bi_bdev->bd_partno;
+       ordered->bdev = bio->bi_bdev;
  
         btrfs_put_ordered_extent(ordered);
  }
@@ -1362,18 +1361,16 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
         struct extent_map_tree *em_tree;
         struct extent_map *em;
         struct btrfs_ordered_sum *sum;
-       struct block_device *bdev;
         u64 orig_logical = ordered->disk_bytenr;
         u64 *logical = NULL;
         int nr, stripe_len;
  
         /* Zoned devices should not have partitions. So, we can assume it is 0 */
-       ASSERT(ordered->partno == 0);
-       bdev = bdgrab(ordered->disk->part0);
-       if (WARN_ON(!bdev))
+       ASSERT(!bdev_is_partition(ordered->bdev));
+       if (WARN_ON(!ordered->bdev))
                 return;
  
-       if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev,
+       if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev,
                                      ordered->physical, &logical, &nr,
                                      &stripe_len)))
                 goto out;
@@ -1402,7 +1399,6 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
  
  out:
         kfree(logical);
-       bdput(bdev);
  }
  
  bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index a818213..9db1b39 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4456,7 +4456,7 @@ bool check_session_state(struct ceph_mds_session *s)
                 break;
         case CEPH_MDS_SESSION_CLOSING:
                 /* Should never reach this when we're unmounting */
-               WARN_ON_ONCE(true);
+               WARN_ON_ONCE(s->s_ttl);
                 fallthrough;
         case CEPH_MDS_SESSION_NEW:
         case CEPH_MDS_SESSION_RESTARTING:
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c

index f72e3b3..65d1a65 100644 (file)
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -873,8 +873,11 @@ PsxDelete:
                                 InformationLevel) - 4;
         offset = param_offset + params;
  
-       /* Setup pointer to Request Data (inode type) */
-       pRqD = (struct unlink_psx_rq *)(((char *)&pSMB->hdr.Protocol) + offset);
+       /* Setup pointer to Request Data (inode type).
+        * Note that SMB offsets are from the beginning of SMB which is 4 bytes
+        * in, after RFC1001 field
+        */
+       pRqD = (struct unlink_psx_rq *)((char *)(pSMB) + offset + 4);
         pRqD->type = cpu_to_le16(type);
         pSMB->ParameterOffset = cpu_to_le16(param_offset);
         pSMB->DataOffset = cpu_to_le16(offset);
@@ -1081,7 +1084,8 @@ PsxCreat:
         param_offset = offsetof(struct smb_com_transaction2_spi_req,
                                 InformationLevel) - 4;
         offset = param_offset + params;
-       pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset);
+       /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */
+       pdata = (OPEN_PSX_REQ *)((char *)(pSMB) + offset + 4);
         pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
         pdata->Permissions = cpu_to_le64(mode);
         pdata->PosixOpenFlags = cpu_to_le32(posix_flags);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c

index 1b04d6e..3781eee 100644 (file)
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -220,7 +220,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
  #ifdef CONFIG_CIFS_DFS_UPCALL
         struct super_block *sb = NULL;
         struct cifs_sb_info *cifs_sb = NULL;
-       struct dfs_cache_tgt_list tgt_list = {0};
+       struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
         struct dfs_cache_tgt_iterator *tgt_it = NULL;
  #endif
  
@@ -3130,7 +3130,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
  {
         int rc;
         char *npath = NULL;
-       struct dfs_cache_tgt_list tgt_list = {0};
+       struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
         struct dfs_cache_tgt_iterator *tgt_it = NULL;
         struct smb3_fs_context tmp_ctx = {NULL};
  
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c

index 7c17697..2837455 100644 (file)
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -19,6 +19,7 @@
  #include "cifs_debug.h"
  #include "cifs_unicode.h"
  #include "smb2glob.h"
+#include "dns_resolve.h"
  
  #include "dfs_cache.h"
  
@@ -911,6 +912,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl)
  
  err_free_it:
         list_for_each_entry_safe(it, nit, head, it_list) {
+               list_del(&it->it_list);
                 kfree(it->it_name);
                 kfree(it);
         }
@@ -1293,6 +1295,194 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
         return 0;
  }
  
+static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, const char *s2)
+{
+       char unc[sizeof("\\\\") + SERVER_NAME_LENGTH] = {0};
+       const char *host;
+       size_t hostlen;
+       char *ip = NULL;
+       struct sockaddr sa;
+       bool match;
+       int rc;
+
+       if (strcasecmp(s1, s2))
+               return false;
+
+       /*
+        * Resolve share's hostname and check if server address matches.  Otherwise just ignore it
+        * as we could not have upcall to resolve hostname or failed to convert ip address.
+        */
+       match = true;
+       extract_unc_hostname(s1, &host, &hostlen);
+       scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host);
+
+       rc = dns_resolve_server_name_to_ip(unc, &ip, NULL);
+       if (rc < 0) {
+               cifs_dbg(FYI, "%s: could not resolve %.*s. assuming server address matches.\n",
+                        __func__, (int)hostlen, host);
+               return true;
+       }
+
+       if (!cifs_convert_address(&sa, ip, strlen(ip))) {
+               cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n",
+                        __func__, ip);
+       } else {
+               mutex_lock(&server->srv_mutex);
+               match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa);
+               mutex_unlock(&server->srv_mutex);
+       }
+
+       kfree(ip);
+       return match;
+}
+
+/*
+ * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new
+ * target shares in @refs.
+ */
+static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl,
+                                        const struct dfs_info3_param *refs, int numrefs)
+{
+       struct dfs_cache_tgt_iterator *it;
+       int i;
+
+       for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) {
+               for (i = 0; i < numrefs; i++) {
+                       if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it),
+                                              refs[i].node_name))
+                               return;
+               }
+       }
+
+       cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__);
+       for (i = 0; i < tcon->ses->chan_count; i++) {
+               spin_lock(&GlobalMid_Lock);
+               if (tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+                       tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+               spin_unlock(&GlobalMid_Lock);
+       }
+}
+
+/* Refresh dfs referral of tcon and mark it for reconnect if needed */
+static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
+{
+       const char *path = tcon->dfs_path + 1;
+       struct cifs_ses *ses;
+       struct cache_entry *ce;
+       struct dfs_info3_param *refs = NULL;
+       int numrefs = 0;
+       bool needs_refresh = false;
+       struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+       int rc = 0;
+       unsigned int xid;
+
+       ses = find_ipc_from_server_path(sessions, path);
+       if (IS_ERR(ses)) {
+               cifs_dbg(FYI, "%s: could not find ipc session\n", __func__);
+               return PTR_ERR(ses);
+       }
+
+       down_read(&htable_rw_lock);
+       ce = lookup_cache_entry(path);
+       needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce);
+       if (!IS_ERR(ce)) {
+               rc = get_targets(ce, &tl);
+               if (rc)
+                       cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc);
+       }
+       up_read(&htable_rw_lock);
+
+       if (!needs_refresh) {
+               rc = 0;
+               goto out;
+       }
+
+       xid = get_xid();
+       rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
+       free_xid(xid);
+
+       /* Create or update a cache entry with the new referral */
+       if (!rc) {
+               dump_refs(refs, numrefs);
+
+               down_write(&htable_rw_lock);
+               ce = lookup_cache_entry(path);
+               if (IS_ERR(ce))
+                       add_cache_entry_locked(refs, numrefs);
+               else if (force_refresh || cache_entry_expired(ce))
+                       update_cache_entry_locked(ce, refs, numrefs);
+               up_write(&htable_rw_lock);
+
+               mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs);
+       }
+
+out:
+       dfs_cache_free_tgts(&tl);
+       free_dfs_info_array(refs, numrefs);
+       return rc;
+}
+
+/**
+ * dfs_cache_remount_fs - remount a DFS share
+ *
+ * Reconfigure dfs mount by forcing a new DFS referral and if the currently cached targets do not
+ * match any of the new targets, mark it for reconnect.
+ *
+ * @cifs_sb: cifs superblock.
+ *
+ * Return zero if remounted, otherwise non-zero.
+ */
+int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
+{
+       struct cifs_tcon *tcon;
+       struct mount_group *mg;
+       struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL};
+       int rc;
+
+       if (!cifs_sb || !cifs_sb->master_tlink)
+               return -EINVAL;
+
+       tcon = cifs_sb_master_tcon(cifs_sb);
+       if (!tcon->dfs_path) {
+               cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__);
+               return 0;
+       }
+
+       if (uuid_is_null(&cifs_sb->dfs_mount_id)) {
+               cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__);
+               return -EINVAL;
+       }
+
+       mutex_lock(&mount_group_list_lock);
+       mg = find_mount_group_locked(&cifs_sb->dfs_mount_id);
+       if (IS_ERR(mg)) {
+               mutex_unlock(&mount_group_list_lock);
+               cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__);
+               return PTR_ERR(mg);
+       }
+       kref_get(&mg->refcount);
+       mutex_unlock(&mount_group_list_lock);
+
+       spin_lock(&mg->lock);
+       memcpy(&sessions, mg->sessions, mg->num_sessions * sizeof(mg->sessions[0]));
+       spin_unlock(&mg->lock);
+
+       /*
+        * After reconnecting to a different server, unique ids won't match anymore, so we disable
+        * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE).
+        */
+       cifs_autodisable_serverino(cifs_sb);
+       /*
+        * Force the use of prefix path to support failover on DFS paths that resolve to targets
+        * that have different prefix paths.
+        */
+       cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+       rc = refresh_tcon(sessions, tcon, true);
+
+       kref_put(&mg->refcount, mount_group_release);
+       return rc;
+}
+
  /*
   * Refresh all active dfs mounts regardless of whether they are in cache or not.
   * (cache can be cleared)
@@ -1303,7 +1493,6 @@ static void refresh_mounts(struct cifs_ses **sessions)
         struct cifs_ses *ses;
         struct cifs_tcon *tcon, *ntcon;
         struct list_head tcons;
-       unsigned int xid;
  
         INIT_LIST_HEAD(&tcons);
  
@@ -1321,44 +1510,8 @@ static void refresh_mounts(struct cifs_ses **sessions)
         spin_unlock(&cifs_tcp_ses_lock);
  
         list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
-               const char *path = tcon->dfs_path + 1;
-               struct cache_entry *ce;
-               struct dfs_info3_param *refs = NULL;
-               int numrefs = 0;
-               bool needs_refresh = false;
-               int rc = 0;
-
                 list_del_init(&tcon->ulist);
-
-               ses = find_ipc_from_server_path(sessions, path);
-               if (IS_ERR(ses))
-                       goto next_tcon;
-
-               down_read(&htable_rw_lock);
-               ce = lookup_cache_entry(path);
-               needs_refresh = IS_ERR(ce) || cache_entry_expired(ce);
-               up_read(&htable_rw_lock);
-
-               if (!needs_refresh)
-                       goto next_tcon;
-
-               xid = get_xid();
-               rc = get_dfs_referral(xid, ses, path, &refs, &numrefs);
-               free_xid(xid);
-
-               /* Create or update a cache entry with the new referral */
-               if (!rc) {
-                       down_write(&htable_rw_lock);
-                       ce = lookup_cache_entry(path);
-                       if (IS_ERR(ce))
-                               add_cache_entry_locked(refs, numrefs);
-                       else if (cache_entry_expired(ce))
-                               update_cache_entry_locked(ce, refs, numrefs);
-                       up_write(&htable_rw_lock);
-               }
-
-next_tcon:
-               free_dfs_info_array(refs, numrefs);
+               refresh_tcon(sessions, tcon, false);
                 cifs_put_tcon(tcon);
         }
  }
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h

index b29d3ae..52070d1 100644 (file)
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -13,6 +13,8 @@
  #include <linux/uuid.h>
  #include "cifsglob.h"
  
+#define DFS_CACHE_TGT_LIST_INIT(var) { .tl_numtgts = 0, .tl_list = LIST_HEAD_INIT((var).tl_list), }
+
  struct dfs_cache_tgt_list {
         int tl_numtgts;
         struct list_head tl_list;
@@ -44,6 +46,7 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it,
  void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id);
  void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses);
  char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap);
+int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb);
  
  static inline struct dfs_cache_tgt_iterator *
  dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c

index cd10860..0a72840 100644 (file)
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -4619,7 +4619,7 @@ read_complete:
  
  static int cifs_readpage(struct file *file, struct page *page)
  {
-       loff_t offset = (loff_t)page->index << PAGE_SHIFT;
+       loff_t offset = page_file_offset(page);
         int rc = -EACCES;
         unsigned int xid;
  
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c

index 553adfb..eed59bc 100644 (file)
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -13,6 +13,9 @@
  #include <linux/magic.h>
  #include <linux/security.h>
  #include <net/net_namespace.h>
+#ifdef CONFIG_CIFS_DFS_UPCALL
+#include "dfs_cache.h"
+#endif
  */
  
  #include <linux/ctype.h>
@@ -779,6 +782,10 @@ static int smb3_reconfigure(struct fs_context *fc)
         smb3_cleanup_fs_context_contents(cifs_sb->ctx);
         rc = smb3_fs_context_dup(cifs_sb->ctx, ctx);
         smb3_update_mnt_flags(cifs_sb);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+       if (!rc)
+               rc = dfs_cache_remount_fs(cifs_sb);
+#endif
  
         return rc;
  }
@@ -918,6 +925,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
                 ctx->cred_uid = uid;
                 ctx->cruid_specified = true;
                 break;
+       case Opt_backupuid:
+               uid = make_kuid(current_user_ns(), result.uint_32);
+               if (!uid_valid(uid))
+                       goto cifs_parse_mount_err;
+               ctx->backupuid = uid;
+               ctx->backupuid_specified = true;
+               break;
         case Opt_backupgid:
                 gid = make_kgid(current_user_ns(), result.uint_32);
                 if (!gid_valid(gid))
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c

index ba3c58e..2dfd0d8 100644 (file)
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3618,6 +3618,7 @@ static int smb3_simple_fallocate_write_range(unsigned int xid,
  {
         struct cifs_io_parms io_parms = {0};
         int nbytes;
+       int rc = 0;
         struct kvec iov[2];
  
         io_parms.netfid = cfile->fid.netfid;
@@ -3625,13 +3626,25 @@ static int smb3_simple_fallocate_write_range(unsigned int xid,
         io_parms.tcon = tcon;
         io_parms.persistent_fid = cfile->fid.persistent_fid;
         io_parms.volatile_fid = cfile->fid.volatile_fid;
-       io_parms.offset = off;
-       io_parms.length = len;
  
-       /* iov[0] is reserved for smb header */
-       iov[1].iov_base = buf;
-       iov[1].iov_len = io_parms.length;
-       return SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+       while (len) {
+               io_parms.offset = off;
+               io_parms.length = len;
+               if (io_parms.length > SMB2_MAX_BUFFER_SIZE)
+                       io_parms.length = SMB2_MAX_BUFFER_SIZE;
+               /* iov[0] is reserved for smb header */
+               iov[1].iov_base = buf;
+               iov[1].iov_len = io_parms.length;
+               rc = SMB2_write(xid, &io_parms, &nbytes, iov, 1);
+               if (rc)
+                       break;
+               if (nbytes > len)
+                       return -EINVAL;
+               buf += nbytes;
+               off += nbytes;
+               len -= nbytes;
+       }
+       return rc;
  }
  
  static int smb3_simple_fallocate_range(unsigned int xid,
@@ -3655,11 +3668,6 @@ static int smb3_simple_fallocate_range(unsigned int xid,
                         (char **)&out_data, &out_data_len);
         if (rc)
                 goto out;
-       /*
-        * It is already all allocated
-        */
-       if (out_data_len == 0)
-               goto out;
  
         buf = kzalloc(1024 * 1024, GFP_KERNEL);
         if (buf == NULL) {
@@ -3782,6 +3790,24 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
                 goto out;
         }
  
+       if (keep_size == true) {
+               /*
+                * We can not preallocate pages beyond the end of the file
+                * in SMB2
+                */
+               if (off >= i_size_read(inode)) {
+                       rc = 0;
+                       goto out;
+               }
+               /*
+                * For fallocates that are partially beyond the end of file,
+                * clamp len so we only fallocate up to the end of file.
+                */
+               if (off + len > i_size_read(inode)) {
+                       len = i_size_read(inode) - off;
+               }
+       }
+
         if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
                 /*
                  * At this point, we are trying to fallocate an internal
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c

index 14292db..2c2f179 100644 (file)
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -106,12 +106,11 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
         return err;
  }
  
-static bool ext2_check_page(struct page *page, int quiet)
+static bool ext2_check_page(struct page *page, int quiet, char *kaddr)
  {
         struct inode *dir = page->mapping->host;
         struct super_block *sb = dir->i_sb;
         unsigned chunk_size = ext2_chunk_size(dir);
-       char *kaddr = page_address(page);
         u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
         unsigned offs, rec_len;
         unsigned limit = PAGE_SIZE;
@@ -205,7 +204,8 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n,
         if (!IS_ERR(page)) {
                 *page_addr = kmap_local_page(page);
                 if (unlikely(!PageChecked(page))) {
-                       if (PageError(page) || !ext2_check_page(page, quiet))
+                       if (PageError(page) || !ext2_check_page(page, quiet,
+                                                               *page_addr))
                                 goto fail;
                 }
         }
@@ -584,10 +584,10 @@ out_unlock:
   * ext2_delete_entry deletes a directory entry by merging it with the
   * previous entry. Page is up-to-date.
   */
-int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
+int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page,
+                       char *kaddr)
  {
         struct inode *inode = page->mapping->host;
-       char *kaddr = page_address(page);
         unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
         unsigned to = ((char *)dir - kaddr) +
                                 ext2_rec_len_from_disk(dir->rec_len);
@@ -607,7 +607,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
                 de = ext2_next_entry(de);
         }
         if (pde)
-               from = (char*)pde - (char*)page_address(page);
+               from = (char *)pde - kaddr;
         pos = page_offset(page) + from;
         lock_page(page);
         err = ext2_prepare_chunk(page, pos, to - from);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h

index b0a6948..e512630 100644 (file)
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -740,7 +740,8 @@ extern int ext2_inode_by_name(struct inode *dir,
  extern int ext2_make_empty(struct inode *, struct inode *);
  extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *,
                                                 struct page **, void **res_page_addr);
-extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page,
+                            char *kaddr);
  extern int ext2_empty_dir (struct inode *);
  extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa);
  extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, void *,
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c

index 1f69b81..5f6b756 100644 (file)
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -293,7 +293,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
                 goto out;
         }
  
-       err = ext2_delete_entry (de, page);
+       err = ext2_delete_entry (de, page, page_addr);
         ext2_put_page(page, page_addr);
         if (err)
                 goto out;
@@ -397,7 +397,7 @@ static int ext2_rename (struct user_namespace * mnt_userns,
         old_inode->i_ctime = current_time(old_inode);
         mark_inode_dirty(old_inode);
  
-       ext2_delete_entry(old_de, old_page);
+       ext2_delete_entry(old_de, old_page, old_page_addr);
  
         if (dir_de) {
                 if (old_dir != new_dir)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index 06d04a7..4c33705 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode,
          */
         smp_mb();
  
+       if (IS_DAX(inode))
+               return false;
+
         /* while holding I_WB_SWITCH, no one else can update the association */
         spin_lock(&inode->i_lock);
         if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 926eeb9..cdfb1ae 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -77,7 +77,7 @@ enum hugetlb_param {
  static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
         fsparam_u32   ("gid",           Opt_gid),
         fsparam_string("min_size",      Opt_min_size),
-       fsparam_u32   ("mode",          Opt_mode),
+       fsparam_u32oct("mode",          Opt_mode),
         fsparam_string("nr_inodes",     Opt_nr_inodes),
         fsparam_string("pagesize",      Opt_pagesize),
         fsparam_string("size",          Opt_size),
diff --git a/fs/internal.h b/fs/internal.h

index 3ce8edb..82e8eb3 100644 (file)
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -61,7 +61,6 @@ extern void __init chrdev_init(void);
   */
  extern const struct fs_context_operations legacy_fs_context_ops;
  extern int parse_monolithic_mount_data(struct fs_context *, void *);
-extern void fc_drop_locked(struct fs_context *);
  extern void vfs_clean_context(struct fs_context *fc);
  extern int finish_clean_context(struct fs_context *fc);
  
diff --git a/fs/io-wq.c b/fs/io-wq.c

index 843d4a7..cf086b0 100644 (file)
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -731,7 +731,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
         int work_flags;
         unsigned long flags;
  
-       if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) {
+       /*
+        * If io-wq is exiting for this task, or if the request has explicitly
+        * been marked as one that should not get executed, cancel it here.
+        */
+       if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
+           (work->flags & IO_WQ_WORK_CANCEL)) {
                 io_run_cancel(work, wqe);
                 return;
         }
diff --git a/fs/io_uring.c b/fs/io_uring.c

index 0cac361..bf548af 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1279,8 +1279,17 @@ static void io_prep_async_link(struct io_kiocb *req)
  {
         struct io_kiocb *cur;
  
-       io_for_each_link(cur, req)
-               io_prep_async_work(cur);
+       if (req->flags & REQ_F_LINK_TIMEOUT) {
+               struct io_ring_ctx *ctx = req->ctx;
+
+               spin_lock_irq(&ctx->completion_lock);
+               io_for_each_link(cur, req)
+                       io_prep_async_work(cur);
+               spin_unlock_irq(&ctx->completion_lock);
+       } else {
+               io_for_each_link(cur, req)
+                       io_prep_async_work(cur);
+       }
  }
  
  static void io_queue_async_work(struct io_kiocb *req)
@@ -1294,6 +1303,17 @@ static void io_queue_async_work(struct io_kiocb *req)
  
         /* init ->work of the whole link before punting */
         io_prep_async_link(req);
+
+       /*
+        * Not expected to happen, but if we do have a bug where this _can_
+        * happen, catch it here and ensure the request is marked as
+        * canceled. That will make io-wq go through the usual work cancel
+        * procedure rather than attempt to run this request (or create a new
+        * worker for it).
+        */
+       if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
+               req->work.flags |= IO_WQ_WORK_CANCEL;
+
         trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
                                         &req->work, req->flags);
         io_wq_enqueue(tctx->io_wq, &req->work);
@@ -1939,9 +1959,13 @@ static void tctx_task_work(struct callback_head *cb)
                         node = next;
                 }
                 if (wq_list_empty(&tctx->task_list)) {
+                       spin_lock_irq(&tctx->task_lock);
                         clear_bit(0, &tctx->task_state);
-                       if (wq_list_empty(&tctx->task_list))
+                       if (wq_list_empty(&tctx->task_list)) {
+                               spin_unlock_irq(&tctx->task_lock);
                                 break;
+                       }
+                       spin_unlock_irq(&tctx->task_lock);
                         /* another tctx_task_work() is enqueued, yield */
                         if (test_and_set_bit(0, &tctx->task_state))
                                 break;
@@ -2036,6 +2060,12 @@ static void io_req_task_queue(struct io_kiocb *req)
         io_req_task_work_add(req);
  }
  
+static void io_req_task_queue_reissue(struct io_kiocb *req)
+{
+       req->io_task_work.func = io_queue_async_work;
+       io_req_task_work_add(req);
+}
+
  static inline void io_queue_next(struct io_kiocb *req)
  {
         struct io_kiocb *nxt = io_req_find_next(req);
@@ -2205,7 +2235,7 @@ static inline bool io_run_task_work(void)
   * Find and free completed poll iocbs
   */
  static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
-                              struct list_head *done)
+                              struct list_head *done, bool resubmit)
  {
         struct req_batch rb;
         struct io_kiocb *req;
@@ -2220,11 +2250,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
                 req = list_first_entry(done, struct io_kiocb, inflight_entry);
                 list_del(&req->inflight_entry);
  
-               if (READ_ONCE(req->result) == -EAGAIN &&
+               if (READ_ONCE(req->result) == -EAGAIN && resubmit &&
                     !(req->flags & REQ_F_DONT_REISSUE)) {
                         req->iopoll_completed = 0;
                         req_ref_get(req);
-                       io_queue_async_work(req);
+                       io_req_task_queue_reissue(req);
                         continue;
                 }
  
@@ -2244,7 +2274,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
  }
  
  static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
-                       long min)
+                       long min, bool resubmit)
  {
         struct io_kiocb *req, *tmp;
         LIST_HEAD(done);
@@ -2287,7 +2317,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
         }
  
         if (!list_empty(&done))
-               io_iopoll_complete(ctx, nr_events, &done);
+               io_iopoll_complete(ctx, nr_events, &done, resubmit);
  
         return ret;
  }
@@ -2305,7 +2335,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
         while (!list_empty(&ctx->iopoll_list)) {
                 unsigned int nr_events = 0;
  
-               io_do_iopoll(ctx, &nr_events, 0);
+               io_do_iopoll(ctx, &nr_events, 0, false);
  
                 /* let it sleep and repeat later if can't complete a request */
                 if (nr_events == 0)
@@ -2367,7 +2397,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
                             list_empty(&ctx->iopoll_list))
                                 break;
                 }
-               ret = io_do_iopoll(ctx, &nr_events, min);
+               ret = io_do_iopoll(ctx, &nr_events, min, true);
         } while (!ret && nr_events < min && !need_resched());
  out:
         mutex_unlock(&ctx->uring_lock);
@@ -2417,6 +2447,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
          */
         if (percpu_ref_is_dying(&ctx->refs))
                 return false;
+       /*
+        * Play it safe and assume not safe to re-import and reissue if we're
+        * not in the original thread group (or in task context).
+        */
+       if (!same_thread_group(req->task, current) || !in_task())
+               return false;
         return true;
  }
  #else
@@ -2747,7 +2783,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
                 req->flags &= ~REQ_F_REISSUE;
                 if (io_resubmit_prep(req)) {
                         req_ref_get(req);
-                       io_queue_async_work(req);
+                       io_req_task_queue_reissue(req);
                 } else {
                         int cflags = 0;
  
@@ -4802,6 +4838,7 @@ IO_NETOP_FN(recv);
  struct io_poll_table {
         struct poll_table_struct pt;
         struct io_kiocb *req;
+       int nr_entries;
         int error;
  };
  
@@ -4902,7 +4939,6 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
         if (req->poll.events & EPOLLONESHOT)
                 flags = 0;
         if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
-               io_poll_remove_waitqs(req);
                 req->poll.done = true;
                 flags = 0;
         }
@@ -4925,6 +4961,7 @@ static void io_poll_task_func(struct io_kiocb *req)
  
                 done = io_poll_complete(req, req->result);
                 if (done) {
+                       io_poll_remove_double(req);
                         hash_del(&req->hash_node);
                 } else {
                         req->result = 0;
@@ -4995,11 +5032,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
         struct io_kiocb *req = pt->req;
  
         /*
-        * If poll->head is already set, it's because the file being polled
-        * uses multiple waitqueues for poll handling (eg one for read, one
-        * for write). Setup a separate io_poll_iocb if this happens.
+        * The file being polled uses multiple waitqueues for poll handling
+        * (e.g. one for read, one for write). Setup a separate io_poll_iocb
+        * if this happens.
          */
-       if (unlikely(poll->head)) {
+       if (unlikely(pt->nr_entries)) {
                 struct io_poll_iocb *poll_one = poll;
  
                 /* already have a 2nd entry, fail a third attempt */
@@ -5027,7 +5064,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
                 *poll_ptr = poll;
         }
  
-       pt->error = 0;
+       pt->nr_entries++;
         poll->head = head;
  
         if (poll->events & EPOLLEXCLUSIVE)
@@ -5104,11 +5141,16 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
  
         ipt->pt._key = mask;
         ipt->req = req;
-       ipt->error = -EINVAL;
+       ipt->error = 0;
+       ipt->nr_entries = 0;
  
         mask = vfs_poll(req->file, &ipt->pt) & poll->events;
+       if (unlikely(!ipt->nr_entries) && !ipt->error)
+               ipt->error = -EINVAL;
  
         spin_lock_irq(&ctx->completion_lock);
+       if (ipt->error || (mask && (poll->events & EPOLLONESHOT)))
+               io_poll_remove_double(req);
         if (likely(poll->head)) {
                 spin_lock(&poll->head->lock);
                 if (unlikely(list_empty(&poll->wait.entry))) {
@@ -5179,7 +5221,6 @@ static int io_arm_poll_handler(struct io_kiocb *req)
         ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
                                         io_async_wake);
         if (ret || ipt.error) {
-               io_poll_remove_double(req);
                 spin_unlock_irq(&ctx->completion_lock);
                 if (ret)
                         return IO_APOLL_READY;
@@ -6792,7 +6833,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
  
                 mutex_lock(&ctx->uring_lock);
                 if (!list_empty(&ctx->iopoll_list))
-                       io_do_iopoll(ctx, &nr_events, 0);
+                       io_do_iopoll(ctx, &nr_events, 0, true);
  
                 /*
                  * Don't submit if refs are dying, good for io_uring_register(),
@@ -7899,15 +7940,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
         struct io_wq_data data;
         unsigned int concurrency;
  
+       mutex_lock(&ctx->uring_lock);
         hash = ctx->hash_map;
         if (!hash) {
                 hash = kzalloc(sizeof(*hash), GFP_KERNEL);
-               if (!hash)
+               if (!hash) {
+                       mutex_unlock(&ctx->uring_lock);
                         return ERR_PTR(-ENOMEM);
+               }
                 refcount_set(&hash->refs, 1);
                 init_waitqueue_head(&hash->wait);
                 ctx->hash_map = hash;
         }
+       mutex_unlock(&ctx->uring_lock);
  
         data.hash = hash;
         data.task = task;
@@ -7981,9 +8026,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
                 f = fdget(p->wq_fd);
                 if (!f.file)
                         return -ENXIO;
-               fdput(f);
-               if (f.file->f_op != &io_uring_fops)
+               if (f.file->f_op != &io_uring_fops) {
+                       fdput(f);
                         return -EINVAL;
+               }
+               fdput(f);
         }
         if (ctx->flags & IORING_SETUP_SQPOLL) {
                 struct task_struct *tsk;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index 7756579..54d7843 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1529,6 +1529,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
         }
  }
  
+/*
+ * zero out partial blocks of one cluster.
+ *
+ * start: file offset where zero starts, will be made upper block aligned.
+ * len: it will be trimmed to the end of current cluster if "start + len"
+ *      is bigger than it.
+ */
+static int ocfs2_zeroout_partial_cluster(struct inode *inode,
+                                       u64 start, u64 len)
+{
+       int ret;
+       u64 start_block, end_block, nr_blocks;
+       u64 p_block, offset;
+       u32 cluster, p_cluster, nr_clusters;
+       struct super_block *sb = inode->i_sb;
+       u64 end = ocfs2_align_bytes_to_clusters(sb, start);
+
+       if (start + len < end)
+               end = start + len;
+
+       start_block = ocfs2_blocks_for_bytes(sb, start);
+       end_block = ocfs2_blocks_for_bytes(sb, end);
+       nr_blocks = end_block - start_block;
+       if (!nr_blocks)
+               return 0;
+
+       cluster = ocfs2_bytes_to_clusters(sb, start);
+       ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
+                               &nr_clusters, NULL);
+       if (ret)
+               return ret;
+       if (!p_cluster)
+               return 0;
+
+       offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
+       p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
+       return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
+}
+
  static int ocfs2_zero_partial_clusters(struct inode *inode,
                                        u64 start, u64 len)
  {
@@ -1538,6 +1577,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
         unsigned int csize = osb->s_clustersize;
         handle_t *handle;
+       loff_t isize = i_size_read(inode);
  
         /*
          * The "start" and "end" values are NOT necessarily part of
@@ -1558,6 +1598,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
         if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
                 goto out;
  
+       /* No page cache for EOF blocks, issue zero out to disk. */
+       if (end > isize) {
+               /*
+                * zeroout eof blocks in last cluster starting from
+                * "isize" even "start" > "isize" because it is
+                * complicated to zeroout just at "start" as "start"
+                * may be not aligned with block size, buffer write
+                * would be required to do that, but out of eof buffer
+                * write is not supported.
+                */
+               ret = ocfs2_zeroout_partial_cluster(inode, isize,
+                                       end - isize);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+               if (start >= isize)
+                       goto out;
+               end = isize;
+       }
         handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
         if (IS_ERR(handle)) {
                 ret = PTR_ERR(handle);
@@ -1855,45 +1915,6 @@ out:
         return ret;
  }
  
-/*
- * zero out partial blocks of one cluster.
- *
- * start: file offset where zero starts, will be made upper block aligned.
- * len: it will be trimmed to the end of current cluster if "start + len"
- *      is bigger than it.
- */
-static int ocfs2_zeroout_partial_cluster(struct inode *inode,
-                                       u64 start, u64 len)
-{
-       int ret;
-       u64 start_block, end_block, nr_blocks;
-       u64 p_block, offset;
-       u32 cluster, p_cluster, nr_clusters;
-       struct super_block *sb = inode->i_sb;
-       u64 end = ocfs2_align_bytes_to_clusters(sb, start);
-
-       if (start + len < end)
-               end = start + len;
-
-       start_block = ocfs2_blocks_for_bytes(sb, start);
-       end_block = ocfs2_blocks_for_bytes(sb, end);
-       nr_blocks = end_block - start_block;
-       if (!nr_blocks)
-               return 0;
-
-       cluster = ocfs2_bytes_to_clusters(sb, start);
-       ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
-                               &nr_clusters, NULL);
-       if (ret)
-               return ret;
-       if (!p_cluster)
-               return 0;
-
-       offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
-       p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
-       return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
-}
-
  /*
   * Parts of this function taken from xfs_change_file_space()
   */
@@ -1935,7 +1956,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
                 goto out_inode_unlock;
         }
  
-       orig_isize = i_size_read(inode);
         switch (sr->l_whence) {
         case 0: /*SEEK_SET*/
                 break;
@@ -1943,7 +1963,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
                 sr->l_start += f_pos;
                 break;
         case 2: /*SEEK_END*/
-               sr->l_start += orig_isize;
+               sr->l_start += i_size_read(inode);
                 break;
         default:
                 ret = -EINVAL;
@@ -1998,6 +2018,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
                 ret = -EINVAL;
         }
  
+       orig_isize = i_size_read(inode);
         /* zeroout eof blocks in the cluster. */
         if (!ret && change_size && orig_isize < size) {
                 ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
diff --git a/fs/pipe.c b/fs/pipe.c

index bfd946a..8e6ef62 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -31,6 +31,21 @@
  
  #include "internal.h"
  
+/*
+ * New pipe buffers will be restricted to this size while the user is exceeding
+ * their pipe buffer quota. The general pipe use case needs at least two
+ * buffers: one for data yet to be read, and one for new data. If this is less
+ * than two, then a write to a non-empty pipe may block even if the pipe is not
+ * full. This can occur with GNU make jobserver or similar uses of pipes as
+ * semaphores: multiple processes may be waiting to write tokens back to the
+ * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
+ *
+ * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
+ * own risk, namely: pipe writes to non-full pipes may block until the pipe is
+ * emptied.
+ */
+#define PIPE_MIN_DEF_BUFFERS 2
+
  /*
   * The max size that a non-root user is allowed to grow the pipe. Can
   * be set by root in /proc/sys/fs/pipe-max-size
@@ -429,20 +444,20 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
  #endif
  
         /*
-        * Only wake up if the pipe started out empty, since
-        * otherwise there should be no readers waiting.
+        * Epoll nonsensically wants a wakeup whether the pipe
+        * was already empty or not.
          *
          * If it wasn't empty we try to merge new data into
          * the last buffer.
          *
          * That naturally merges small writes, but it also
-        * page-aligs the rest of the writes for large writes
+        * page-aligns the rest of the writes for large writes
          * spanning multiple pages.
          */
         head = pipe->head;
-       was_empty = pipe_empty(head, pipe->tail);
+       was_empty = true;
         chars = total_len & (PAGE_SIZE-1);
-       if (chars && !was_empty) {
+       if (chars && !pipe_empty(head, pipe->tail)) {
                 unsigned int mask = pipe->ring_size - 1;
                 struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
                 int offset = buf->offset + buf->len;
@@ -781,8 +796,8 @@ struct pipe_inode_info *alloc_pipe_info(void)
         user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
  
         if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
-               user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
-               pipe_bufs = 1;
+               user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
+               pipe_bufs = PIPE_MIN_DEF_BUFFERS;
         }
  
         if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c

index 476a7ff..ef42729 100644 (file)
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -387,6 +387,24 @@ void pathrelse(struct treepath *search_path)
         search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
  }
  
+static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih)
+{
+       struct reiserfs_de_head *deh;
+       int i;
+
+       deh = B_I_DEH(bh, ih);
+       for (i = 0; i < ih_entry_count(ih); i++) {
+               if (deh_location(&deh[i]) > ih_item_len(ih)) {
+                       reiserfs_warning(NULL, "reiserfs-5094",
+                                        "directory entry location seems wrong %h",
+                                        &deh[i]);
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
  static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
  {
         struct block_head *blkh;
@@ -454,11 +472,14 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
                                          "(second one): %h", ih);
                         return 0;
                 }
-               if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) {
-                       reiserfs_warning(NULL, "reiserfs-5093",
-                                        "item entry count seems wrong %h",
-                                        ih);
-                       return 0;
+               if (is_direntry_le_ih(ih)) {
+                       if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) {
+                               reiserfs_warning(NULL, "reiserfs-5093",
+                                                "item entry count seems wrong %h",
+                                                ih);
+                               return 0;
+                       }
+                       return has_valid_deh_location(bh, ih);
                 }
                 prev_location = ih_location(ih);
         }
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c

index 3ffafc7..58481f8 100644 (file)
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2082,6 +2082,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
                 unlock_new_inode(root_inode);
         }
  
+       if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) ||
+           !root_inode->i_size) {
+               SWARN(silent, s, "", "corrupt root inode, run fsck");
+               iput(root_inode);
+               errval = -EUCLEAN;
+               goto error;
+       }
+
         s->s_root = d_make_root(root_inode);
         if (!s->s_root)
                 goto error;
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c

index f6e0f0c..5c2d806 100644 (file)
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
  }
  
  static __always_inline int validate_range(struct mm_struct *mm,
-                                         __u64 *start, __u64 len)
+                                         __u64 start, __u64 len)
  {
         __u64 task_size = mm->task_size;
  
-       *start = untagged_addr(*start);
-
-       if (*start & ~PAGE_MASK)
+       if (start & ~PAGE_MASK)
                 return -EINVAL;
         if (len & ~PAGE_MASK)
                 return -EINVAL;
         if (!len)
                 return -EINVAL;
-       if (*start < mmap_min_addr)
+       if (start < mmap_min_addr)
                 return -EINVAL;
-       if (*start >= task_size)
+       if (start >= task_size)
                 return -EINVAL;
-       if (len > task_size - *start)
+       if (len > task_size - start)
                 return -EINVAL;
         return 0;
  }
@@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                 vm_flags |= VM_UFFD_MINOR;
         }
  
-       ret = validate_range(mm, &uffdio_register.range.start,
+       ret = validate_range(mm, uffdio_register.range.start,
                              uffdio_register.range.len);
         if (ret)
                 goto out;
@@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
         if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
                 goto out;
  
-       ret = validate_range(mm, &uffdio_unregister.start,
+       ret = validate_range(mm, uffdio_unregister.start,
                              uffdio_unregister.len);
         if (ret)
                 goto out;
@@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
         if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
                 goto out;
  
-       ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
+       ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
         if (ret)
                 goto out;
  
@@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
                            sizeof(uffdio_copy)-sizeof(__s64)))
                 goto out;
  
-       ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
+       ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
         if (ret)
                 goto out;
         /*
@@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
                            sizeof(uffdio_zeropage)-sizeof(__s64)))
                 goto out;
  
-       ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
+       ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
                              uffdio_zeropage.range.len);
         if (ret)
                 goto out;
@@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
                            sizeof(struct uffdio_writeprotect)))
                 return -EFAULT;
  
-       ret = validate_range(ctx->mm, &uffdio_wp.range.start,
+       ret = validate_range(ctx->mm, uffdio_wp.range.start,
                              uffdio_wp.range.len);
         if (ret)
                 return ret;
@@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
                            sizeof(uffdio_continue) - (sizeof(__s64))))
                 goto out;
  
-       ret = validate_range(ctx->mm, &uffdio_continue.range.start,
+       ret = validate_range(ctx->mm, uffdio_continue.range.start,
                              uffdio_continue.range.len);
         if (ret)
                 goto out;
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h

index d548ea4..2c5bcbc 100644 (file)
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -411,7 +411,16 @@ struct xfs_log_dinode {
         /* start of the extended dinode, writable fields */
         uint32_t        di_crc;         /* CRC of the inode */
         uint64_t        di_changecount; /* number of attribute changes */
-       xfs_lsn_t       di_lsn;         /* flush sequence */
+
+       /*
+        * The LSN we write to this field during formatting is not a reflection
+        * of the current on-disk LSN. It should never be used for recovery
+        * sequencing, nor should it be recovered into the on-disk inode at all.
+        * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()
+        * for details.
+        */
+       xfs_lsn_t       di_lsn;
+
         uint64_t        di_flags2;      /* more random flags */
         uint32_t        di_cowextsize;  /* basic cow extent size for file */
         uint8_t         di_pad2[12];    /* more padding for future expansion */
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c

index d44e8b4..4775485 100644 (file)
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -698,7 +698,8 @@ xlog_recover_do_inode_buffer(
  static xfs_lsn_t
  xlog_recover_get_buf_lsn(
         struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
+       struct xfs_buf          *bp,
+       struct xfs_buf_log_format *buf_f)
  {
         uint32_t                magic32;
         uint16_t                magic16;
@@ -706,11 +707,20 @@ xlog_recover_get_buf_lsn(
         void                    *blk = bp->b_addr;
         uuid_t                  *uuid;
         xfs_lsn_t               lsn = -1;
+       uint16_t                blft;
  
         /* v4 filesystems always recover immediately */
         if (!xfs_sb_version_hascrc(&mp->m_sb))
                 goto recover_immediately;
  
+       /*
+        * realtime bitmap and summary file blocks do not have magic numbers or
+        * UUIDs, so we must recover them immediately.
+        */
+       blft = xfs_blft_from_flags(buf_f);
+       if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
+               goto recover_immediately;
+
         magic32 = be32_to_cpu(*(__be32 *)blk);
         switch (magic32) {
         case XFS_ABTB_CRC_MAGIC:
@@ -796,6 +806,7 @@ xlog_recover_get_buf_lsn(
         switch (magicda) {
         case XFS_DIR3_LEAF1_MAGIC:
         case XFS_DIR3_LEAFN_MAGIC:
+       case XFS_ATTR3_LEAF_MAGIC:
         case XFS_DA3_NODE_MAGIC:
                 lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
                 uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
@@ -919,7 +930,7 @@ xlog_recover_buf_commit_pass2(
          * the verifier will be reset to match whatever recover turns that
          * buffer into.
          */
-       lsn = xlog_recover_get_buf_lsn(mp, bp);
+       lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f);
         if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
                 trace_xfs_log_recover_buf_skip(log, buf_f);
                 xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c

index 7b79518..e0072a6 100644 (file)
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts(
  STATIC void
  xfs_log_dinode_to_disk(
         struct xfs_log_dinode   *from,
-       struct xfs_dinode       *to)
+       struct xfs_dinode       *to,
+       xfs_lsn_t               lsn)
  {
         to->di_magic = cpu_to_be16(from->di_magic);
         to->di_mode = cpu_to_be16(from->di_mode);
@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk(
                 to->di_flags2 = cpu_to_be64(from->di_flags2);
                 to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
                 to->di_ino = cpu_to_be64(from->di_ino);
-               to->di_lsn = cpu_to_be64(from->di_lsn);
+               to->di_lsn = cpu_to_be64(lsn);
                 memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
                 uuid_copy(&to->di_uuid, &from->di_uuid);
                 to->di_flushiter = 0;
@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2(
         }
  
         /*
-        * If the inode has an LSN in it, recover the inode only if it's less
-        * than the lsn of the transaction we are replaying. Note: we still
-        * need to replay an owner change even though the inode is more recent
-        * than the transaction as there is no guarantee that all the btree
-        * blocks are more recent than this transaction, too.
+        * If the inode has an LSN in it, recover the inode only if the on-disk
+        * inode's LSN is older than the lsn of the transaction we are
+        * replaying. We can have multiple checkpoints with the same start LSN,
+        * so the current LSN being equal to the on-disk LSN doesn't necessarily
+        * mean that the on-disk inode is more recent than the change being
+        * replayed.
+        *
+        * We must check the current_lsn against the on-disk inode
+        * here because the we can't trust the log dinode to contain a valid LSN
+        * (see comment below before replaying the log dinode for details).
+        *
+        * Note: we still need to replay an owner change even though the inode
+        * is more recent than the transaction as there is no guarantee that all
+        * the btree blocks are more recent than this transaction, too.
          */
         if (dip->di_version >= 3) {
                 xfs_lsn_t       lsn = be64_to_cpu(dip->di_lsn);
  
-               if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+               if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
                         trace_xfs_log_recover_inode_skip(log, in_f);
                         error = 0;
                         goto out_owner_change;
@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2(
                 goto out_release;
         }
  
-       /* recover the log dinode inode into the on disk inode */
-       xfs_log_dinode_to_disk(ldip, dip);
+       /*
+        * Recover the log dinode inode into the on disk inode.
+        *
+        * The LSN in the log dinode is garbage - it can be zero or reflect
+        * stale in-memory runtime state that isn't coherent with the changes
+        * logged in this transaction or the changes written to the on-disk
+        * inode.  Hence we write the current lSN into the inode because that
+        * matches what xfs_iflush() would write inode the inode when flushing
+        * the changes in this transaction.
+        */
+       xfs_log_dinode_to_disk(ldip, dip, current_lsn);
  
         fields = in_f->ilf_fields;
         if (fields & XFS_ILOG_DEV)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index 36fa265..60ac5fd 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -78,13 +78,12 @@ xlog_verify_iclog(
  STATIC void
  xlog_verify_tail_lsn(
         struct xlog             *log,
-       struct xlog_in_core     *iclog,
-       xfs_lsn_t               tail_lsn);
+       struct xlog_in_core     *iclog);
  #else
  #define xlog_verify_dest_ptr(a,b)
  #define xlog_verify_grant_tail(a)
  #define xlog_verify_iclog(a,b,c)
-#define xlog_verify_tail_lsn(a,b,c)
+#define xlog_verify_tail_lsn(a,b)
  #endif
  
  STATIC int
@@ -487,51 +486,80 @@ out_error:
         return error;
  }
  
-static bool
-__xlog_state_release_iclog(
-       struct xlog             *log,
-       struct xlog_in_core     *iclog)
-{
-       lockdep_assert_held(&log->l_icloglock);
-
-       if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
-               /* update tail before writing to iclog */
-               xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
-
-               iclog->ic_state = XLOG_STATE_SYNCING;
-               iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
-               xlog_verify_tail_lsn(log, iclog, tail_lsn);
-               /* cycle incremented when incrementing curr_block */
-               trace_xlog_iclog_syncing(iclog, _RET_IP_);
-               return true;
-       }
-
-       ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
-       return false;
-}
-
  /*
   * Flush iclog to disk if this is the last reference to the given iclog and the
   * it is in the WANT_SYNC state.
+ *
+ * If the caller passes in a non-zero @old_tail_lsn and the current log tail
+ * does not match, there may be metadata on disk that must be persisted before
+ * this iclog is written.  To satisfy that requirement, set the
+ * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new
+ * log tail value.
+ *
+ * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the
+ * log tail is updated correctly. NEED_FUA indicates that the iclog will be
+ * written to stable storage, and implies that a commit record is contained
+ * within the iclog. We need to ensure that the log tail does not move beyond
+ * the tail that the first commit record in the iclog ordered against, otherwise
+ * correct recovery of that checkpoint becomes dependent on future operations
+ * performed on this iclog.
+ *
+ * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the
+ * current tail into iclog. Once the iclog tail is set, future operations must
+ * not modify it, otherwise they potentially violate ordering constraints for
+ * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in
+ * the iclog will get zeroed on activation of the iclog after sync, so we
+ * always capture the tail lsn on the iclog on the first NEED_FUA release
+ * regardless of the number of active reference counts on this iclog.
   */
+
  int
  xlog_state_release_iclog(
         struct xlog             *log,
-       struct xlog_in_core     *iclog)
+       struct xlog_in_core     *iclog,
+       xfs_lsn_t               old_tail_lsn)
  {
+       xfs_lsn_t               tail_lsn;
         lockdep_assert_held(&log->l_icloglock);
  
         trace_xlog_iclog_release(iclog, _RET_IP_);
         if (iclog->ic_state == XLOG_STATE_IOERROR)
                 return -EIO;
  
-       if (atomic_dec_and_test(&iclog->ic_refcnt) &&
-           __xlog_state_release_iclog(log, iclog)) {
-               spin_unlock(&log->l_icloglock);
-               xlog_sync(log, iclog);
-               spin_lock(&log->l_icloglock);
+       /*
+        * Grabbing the current log tail needs to be atomic w.r.t. the writing
+        * of the tail LSN into the iclog so we guarantee that the log tail does
+        * not move between deciding if a cache flush is required and writing
+        * the LSN into the iclog below.
+        */
+       if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+               tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+
+               if (old_tail_lsn && tail_lsn != old_tail_lsn)
+                       iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+
+               if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) &&
+                   !iclog->ic_header.h_tail_lsn)
+                       iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
         }
  
+       if (!atomic_dec_and_test(&iclog->ic_refcnt))
+               return 0;
+
+       if (iclog->ic_state != XLOG_STATE_WANT_SYNC) {
+               ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+               return 0;
+       }
+
+       iclog->ic_state = XLOG_STATE_SYNCING;
+       if (!iclog->ic_header.h_tail_lsn)
+               iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+       xlog_verify_tail_lsn(log, iclog);
+       trace_xlog_iclog_syncing(iclog, _RET_IP_);
+
+       spin_unlock(&log->l_icloglock);
+       xlog_sync(log, iclog);
+       spin_lock(&log->l_icloglock);
         return 0;
  }
  
@@ -773,6 +801,21 @@ xfs_log_mount_cancel(
         xfs_log_unmount(mp);
  }
  
+/*
+ * Flush out the iclog to disk ensuring that device caches are flushed and
+ * the iclog hits stable storage before any completion waiters are woken.
+ */
+static inline int
+xlog_force_iclog(
+       struct xlog_in_core     *iclog)
+{
+       atomic_inc(&iclog->ic_refcnt);
+       iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+       if (iclog->ic_state == XLOG_STATE_ACTIVE)
+               xlog_state_switch_iclogs(iclog->ic_log, iclog, 0);
+       return xlog_state_release_iclog(iclog->ic_log, iclog, 0);
+}
+
  /*
   * Wait for the iclog and all prior iclogs to be written disk as required by the
   * log force state machine. Waiting on ic_force_wait ensures iclog completions
@@ -827,13 +870,6 @@ xlog_write_unmount_record(
         /* account for space used by record data */
         ticket->t_curr_res -= sizeof(ulf);
  
-       /*
-        * For external log devices, we need to flush the data device cache
-        * first to ensure all metadata writeback is on stable storage before we
-        * stamp the tail LSN into the unmount record.
-        */
-       if (log->l_targ != log->l_mp->m_ddev_targp)
-               blkdev_issue_flush(log->l_targ->bt_bdev);
         return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS);
  }
  
@@ -865,18 +901,7 @@ out_err:
  
         spin_lock(&log->l_icloglock);
         iclog = log->l_iclog;
-       atomic_inc(&iclog->ic_refcnt);
-       if (iclog->ic_state == XLOG_STATE_ACTIVE)
-               xlog_state_switch_iclogs(log, iclog, 0);
-       else
-               ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
-                      iclog->ic_state == XLOG_STATE_IOERROR);
-       /*
-        * Ensure the journal is fully flushed and on stable storage once the
-        * iclog containing the unmount record is written.
-        */
-       iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
-       error = xlog_state_release_iclog(log, iclog);
+       error = xlog_force_iclog(iclog);
         xlog_wait_on_iclog(iclog);
  
         if (tic) {
@@ -1796,10 +1821,20 @@ xlog_write_iclog(
          * metadata writeback and causing priority inversions.
          */
         iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE;
-       if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH)
+       if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) {
                 iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
+               /*
+                * For external log devices, we also need to flush the data
+                * device cache first to ensure all metadata writeback covered
+                * by the LSN in this iclog is on stable storage. This is slow,
+                * but it *must* complete before we issue the external log IO.
+                */
+               if (log->l_targ != log->l_mp->m_ddev_targp)
+                       blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev);
+       }
         if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
                 iclog->ic_bio.bi_opf |= REQ_FUA;
+
         iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
  
         if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) {
@@ -2310,7 +2345,7 @@ xlog_write_copy_finish(
         return 0;
  
  release_iclog:
-       error = xlog_state_release_iclog(log, iclog);
+       error = xlog_state_release_iclog(log, iclog, 0);
         spin_unlock(&log->l_icloglock);
         return error;
  }
@@ -2529,7 +2564,7 @@ next_lv:
                 ASSERT(optype & XLOG_COMMIT_TRANS);
                 *commit_iclog = iclog;
         } else {
-               error = xlog_state_release_iclog(log, iclog);
+               error = xlog_state_release_iclog(log, iclog, 0);
         }
         spin_unlock(&log->l_icloglock);
  
@@ -2567,6 +2602,7 @@ xlog_state_activate_iclog(
         memset(iclog->ic_header.h_cycle_data, 0,
                 sizeof(iclog->ic_header.h_cycle_data));
         iclog->ic_header.h_lsn = 0;
+       iclog->ic_header.h_tail_lsn = 0;
  }
  
  /*
@@ -2967,7 +3003,7 @@ restart:
                  * reference to the iclog.
                  */
                 if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
-                       error = xlog_state_release_iclog(log, iclog);
+                       error = xlog_state_release_iclog(log, iclog, 0);
                 spin_unlock(&log->l_icloglock);
                 if (error)
                         return error;
@@ -3131,6 +3167,35 @@ xlog_state_switch_iclogs(
         log->l_iclog = iclog->ic_next;
  }
  
+/*
+ * Force the iclog to disk and check if the iclog has been completed before
+ * xlog_force_iclog() returns. This can happen on synchronous (e.g.
+ * pmem) or fast async storage because we drop the icloglock to issue the IO.
+ * If completion has already occurred, tell the caller so that it can avoid an
+ * unnecessary wait on the iclog.
+ */
+static int
+xlog_force_and_check_iclog(
+       struct xlog_in_core     *iclog,
+       bool                    *completed)
+{
+       xfs_lsn_t               lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+       int                     error;
+
+       *completed = false;
+       error = xlog_force_iclog(iclog);
+       if (error)
+               return error;
+
+       /*
+        * If the iclog has already been completed and reused the header LSN
+        * will have been rewritten by completion
+        */
+       if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
+               *completed = true;
+       return 0;
+}
+
  /*
   * Write out all data in the in-core log as of this exact moment in time.
   *
@@ -3165,7 +3230,6 @@ xfs_log_force(
  {
         struct xlog             *log = mp->m_log;
         struct xlog_in_core     *iclog;
-       xfs_lsn_t               lsn;
  
         XFS_STATS_INC(mp, xs_log_force);
         trace_xfs_log_force(mp, 0, _RET_IP_);
@@ -3193,39 +3257,33 @@ xfs_log_force(
                 iclog = iclog->ic_prev;
         } else if (iclog->ic_state == XLOG_STATE_ACTIVE) {
                 if (atomic_read(&iclog->ic_refcnt) == 0) {
-                       /*
-                        * We are the only one with access to this iclog.
-                        *
-                        * Flush it out now.  There should be a roundoff of zero
-                        * to show that someone has already taken care of the
-                        * roundoff from the previous sync.
-                        */
-                       atomic_inc(&iclog->ic_refcnt);
-                       lsn = be64_to_cpu(iclog->ic_header.h_lsn);
-                       xlog_state_switch_iclogs(log, iclog, 0);
-                       if (xlog_state_release_iclog(log, iclog))
+                       /* We have exclusive access to this iclog. */
+                       bool    completed;
+
+                       if (xlog_force_and_check_iclog(iclog, &completed))
                                 goto out_error;
  
-                       if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
+                       if (completed)
                                 goto out_unlock;
                 } else {
                         /*
-                        * Someone else is writing to this iclog.
-                        *
-                        * Use its call to flush out the data.  However, the
-                        * other thread may not force out this LR, so we mark
-                        * it WANT_SYNC.
+                        * Someone else is still writing to this iclog, so we
+                        * need to ensure that when they release the iclog it
+                        * gets synced immediately as we may be waiting on it.
                          */
                         xlog_state_switch_iclogs(log, iclog, 0);
                 }
-       } else {
-               /*
-                * If the head iclog is not active nor dirty, we just attach
-                * ourselves to the head and go to sleep if necessary.
-                */
-               ;
         }
  
+       /*
+        * The iclog we are about to wait on may contain the checkpoint pushed
+        * by the above xlog_cil_force() call, but it may not have been pushed
+        * to disk yet. Like the ACTIVE case above, we need to make sure caches
+        * are flushed when this iclog is written.
+        */
+       if (iclog->ic_state == XLOG_STATE_WANT_SYNC)
+               iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+
         if (flags & XFS_LOG_SYNC)
                 return xlog_wait_on_iclog(iclog);
  out_unlock:
@@ -3245,6 +3303,7 @@ xlog_force_lsn(
         bool                    already_slept)
  {
         struct xlog_in_core     *iclog;
+       bool                    completed;
  
         spin_lock(&log->l_icloglock);
         iclog = log->l_iclog;
@@ -3258,7 +3317,8 @@ xlog_force_lsn(
                         goto out_unlock;
         }
  
-       if (iclog->ic_state == XLOG_STATE_ACTIVE) {
+       switch (iclog->ic_state) {
+       case XLOG_STATE_ACTIVE:
                 /*
                  * We sleep here if we haven't already slept (e.g. this is the
                  * first time we've looked at the correct iclog buf) and the
@@ -3281,12 +3341,31 @@ xlog_force_lsn(
                                         &log->l_icloglock);
                         return -EAGAIN;
                 }
-               atomic_inc(&iclog->ic_refcnt);
-               xlog_state_switch_iclogs(log, iclog, 0);
-               if (xlog_state_release_iclog(log, iclog))
+               if (xlog_force_and_check_iclog(iclog, &completed))
                         goto out_error;
                 if (log_flushed)
                         *log_flushed = 1;
+               if (completed)
+                       goto out_unlock;
+               break;
+       case XLOG_STATE_WANT_SYNC:
+               /*
+                * This iclog may contain the checkpoint pushed by the
+                * xlog_cil_force_seq() call, but there are other writers still
+                * accessing it so it hasn't been pushed to disk yet. Like the
+                * ACTIVE case above, we need to make sure caches are flushed
+                * when this iclog is written.
+                */
+               iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
+               break;
+       default:
+               /*
+                * The entire checkpoint was written by the CIL force and is on
+                * its way to disk already. It will be stable when it
+                * completes, so we don't need to manipulate caches here at all.
+                * We just need to wait for completion if necessary.
+                */
+               break;
         }
  
         if (flags & XFS_LOG_SYNC)
@@ -3559,10 +3638,10 @@ xlog_verify_grant_tail(
  STATIC void
  xlog_verify_tail_lsn(
         struct xlog             *log,
-       struct xlog_in_core     *iclog,
-       xfs_lsn_t               tail_lsn)
+       struct xlog_in_core     *iclog)
  {
-    int blocks;
+       xfs_lsn_t       tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn);
+       int             blocks;
  
      if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) {
         blocks =
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index b128aaa..4c44bc3 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -654,8 +654,9 @@ xlog_cil_push_work(
         struct xfs_trans_header thdr;
         struct xfs_log_iovec    lhdr;
         struct xfs_log_vec      lvhdr = { NULL };
+       xfs_lsn_t               preflush_tail_lsn;
         xfs_lsn_t               commit_lsn;
-       xfs_lsn_t               push_seq;
+       xfs_csn_t               push_seq;
         struct bio              bio;
         DECLARE_COMPLETION_ONSTACK(bdev_flush);
  
@@ -730,7 +731,15 @@ xlog_cil_push_work(
          * because we hold the flush lock exclusively. Hence we can now issue
          * a cache flush to ensure all the completed metadata in the journal we
          * are about to overwrite is on stable storage.
+        *
+        * Because we are issuing this cache flush before we've written the
+        * tail lsn to the iclog, we can have metadata IO completions move the
+        * tail forwards between the completion of this flush and the iclog
+        * being written. In this case, we need to re-issue the cache flush
+        * before the iclog write. To detect whether the log tail moves, sample
+        * the tail LSN *before* we issue the flush.
          */
+       preflush_tail_lsn = atomic64_read(&log->l_tail_lsn);
         xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev,
                                 &bdev_flush);
  
@@ -941,7 +950,7 @@ restart:
          * storage.
          */
         commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
-       xlog_state_release_iclog(log, commit_iclog);
+       xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn);
         spin_unlock(&log->l_icloglock);
         return;
  
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h

index 4c41bbf..f3e79a4 100644 (file)
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -59,6 +59,16 @@ enum xlog_iclog_state {
         { XLOG_STATE_DIRTY,     "XLOG_STATE_DIRTY" }, \
         { XLOG_STATE_IOERROR,   "XLOG_STATE_IOERROR" }
  
+/*
+ * In core log flags
+ */
+#define XLOG_ICL_NEED_FLUSH    (1 << 0)        /* iclog needs REQ_PREFLUSH */
+#define XLOG_ICL_NEED_FUA      (1 << 1)        /* iclog needs REQ_FUA */
+
+#define XLOG_ICL_STRINGS \
+       { XLOG_ICL_NEED_FLUSH,  "XLOG_ICL_NEED_FLUSH" }, \
+       { XLOG_ICL_NEED_FUA,    "XLOG_ICL_NEED_FUA" }
+
  
  /*
   * Log ticket flags
@@ -143,9 +153,6 @@ enum xlog_iclog_state {
  
  #define XLOG_COVER_OPS         5
  
-#define XLOG_ICL_NEED_FLUSH    (1 << 0)        /* iclog needs REQ_PREFLUSH */
-#define XLOG_ICL_NEED_FUA      (1 << 1)        /* iclog needs REQ_FUA */
-
  /* Ticket reservation region accounting */ 
  #define XLOG_TIC_LEN_MAX       15
  
@@ -497,7 +504,8 @@ int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
  void   xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
  void   xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
  
-int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog);
+int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog,
+               xfs_lsn_t log_tail_lsn);
  
  /*
   * When we crack an atomic LSN, we sample it first so that the value will not
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index f9d8d60..1926029 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3944,6 +3944,7 @@ DECLARE_EVENT_CLASS(xlog_iclog_class,
                 __field(uint32_t, state)
                 __field(int32_t, refcount)
                 __field(uint32_t, offset)
+               __field(uint32_t, flags)
                 __field(unsigned long long, lsn)
                 __field(unsigned long, caller_ip)
         ),
@@ -3952,15 +3953,17 @@ DECLARE_EVENT_CLASS(xlog_iclog_class,
                 __entry->state = iclog->ic_state;
                 __entry->refcount = atomic_read(&iclog->ic_refcnt);
                 __entry->offset = iclog->ic_offset;
+               __entry->flags = iclog->ic_flags;
                 __entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn);
                 __entry->caller_ip = caller_ip;
         ),
-       TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx caller %pS",
+       TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __print_symbolic(__entry->state, XLOG_STATE_STRINGS),
                   __entry->refcount,
                   __entry->offset,
                   __entry->lsn,
+                 __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS),
                   (char *)__entry->caller_ip)
  
  );
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h

index 1ae993f..13d9337 100644 (file)
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -707,11 +707,6 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv);
   * @hrv: Hardware Revision of the device, pass -1 to not check _HRV
   *
   * The caller is responsible for invoking acpi_dev_put() on the returned device.
- *
- * FIXME: Due to above requirement there is a window that may invalidate @adev
- * and next iteration will use a dangling pointer, e.g. in the case of a
- * hotplug event. That said, the caller should ensure that this will never
- * happen.
   */
  #define for_each_acpi_dev_match(adev, hid, uid, hrv)                   \
         for (adev = acpi_dev_get_first_match_dev(hid, uid, hrv);        \
@@ -725,7 +720,8 @@ static inline struct acpi_device *acpi_dev_get(struct acpi_device *adev)
  
  static inline void acpi_dev_put(struct acpi_device *adev)
  {
-       put_device(&adev->dev);
+       if (adev)
+               put_device(&adev->dev);
  }
  
  struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle);
diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h

index 10100a4..afb27cb 100644 (file)
--- a/include/drm/drm_ioctl.h
+++ b/include/drm/drm_ioctl.h
@@ -68,6 +68,7 @@ typedef int drm_ioctl_compat_t(struct file *filp, unsigned int cmd,
                                unsigned long arg);
  
  #define DRM_IOCTL_NR(n)                _IOC_NR(n)
+#define DRM_IOCTL_TYPE(n)              _IOC_TYPE(n)
  #define DRM_MAJOR       226
  
  /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 3177181..d3afea4 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -57,7 +57,7 @@ struct blk_keyslot_manager;
   * Maximum number of blkcg policies allowed to be registered concurrently.
   * Defined here to simplify include dependency.
   */
-#define BLKCG_MAX_POLS         5
+#define BLKCG_MAX_POLS         6
  
  typedef void (rq_end_io_fn)(struct request *, blk_status_t);
  
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h

index 8b77d08..a74cd1c 100644 (file)
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -27,19 +27,6 @@ struct task_struct;
  extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
  #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
  
-#define BPF_CGROUP_STORAGE_NEST_MAX    8
-
-struct bpf_cgroup_storage_info {
-       struct task_struct *task;
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
-};
-
-/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
- * to use bpf cgroup storage simultaneously.
- */
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
-               bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
-
  #define for_each_cgroup_storage_type(stype) \
         for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
  
@@ -172,44 +159,6 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
         return BPF_CGROUP_STORAGE_SHARED;
  }
  
-static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
-                                        *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
-{
-       enum bpf_cgroup_storage_type stype;
-       int i, err = 0;
-
-       preempt_disable();
-       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
-               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
-                       continue;
-
-               this_cpu_write(bpf_cgroup_storage_info[i].task, current);
-               for_each_cgroup_storage_type(stype)
-                       this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
-                                      storage[stype]);
-               goto out;
-       }
-       err = -EBUSY;
-       WARN_ON_ONCE(1);
-
-out:
-       preempt_enable();
-       return err;
-}
-
-static inline void bpf_cgroup_storage_unset(void)
-{
-       int i;
-
-       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
-               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
-                       continue;
-
-               this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
-               return;
-       }
-}
-
  struct bpf_cgroup_storage *
  cgroup_storage_lookup(struct bpf_cgroup_storage_map *map,
                       void *key, bool locked);
@@ -487,9 +436,6 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
         return -EINVAL;
  }
  
-static inline int bpf_cgroup_storage_set(
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
-static inline void bpf_cgroup_storage_unset(void) {}
  static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
                                             struct bpf_map *map) { return 0; }
  static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index 0edff8f..c8cc090 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1142,38 +1142,40 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                         struct bpf_prog *include_prog,
                         struct bpf_prog_array **new_array);
  
+struct bpf_run_ctx {};
+
+struct bpf_cg_run_ctx {
+       struct bpf_run_ctx run_ctx;
+       struct bpf_prog_array_item *prog_item;
+};
+
  /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
  #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE                   (1 << 0)
  /* BPF program asks to set CN on the packet. */
  #define BPF_RET_SET_CN                                         (1 << 0)
  
-/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
- * if bpf_cgroup_storage_set() failed, the rest of programs
- * will not execute. This should be a really rare scenario
- * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
- * preemptions all between bpf_cgroup_storage_set() and
- * bpf_cgroup_storage_unset() on the same cpu.
- */
  #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)          \
         ({                                                              \
                 struct bpf_prog_array_item *_item;                      \
                 struct bpf_prog *_prog;                                 \
                 struct bpf_prog_array *_array;                          \
+               struct bpf_run_ctx *old_run_ctx;                        \
+               struct bpf_cg_run_ctx run_ctx;                          \
                 u32 _ret = 1;                                           \
                 u32 func_ret;                                           \
                 migrate_disable();                                      \
                 rcu_read_lock();                                        \
                 _array = rcu_dereference(array);                        \
                 _item = &_array->items[0];                              \
+               old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);        \
                 while ((_prog = READ_ONCE(_item->prog))) {              \
-                       if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))    \
-                               break;                                  \
+                       run_ctx.prog_item = _item;                      \
                         func_ret = func(_prog, ctx);                    \
                         _ret &= (func_ret & 1);                         \
-                       *(ret_flags) |= (func_ret >> 1);                        \
-                       bpf_cgroup_storage_unset();                     \
+                       *(ret_flags) |= (func_ret >> 1);                \
                         _item++;                                        \
                 }                                                       \
+               bpf_reset_run_ctx(old_run_ctx);                         \
                 rcu_read_unlock();                                      \
                 migrate_enable();                                       \
                 _ret;                                                   \
@@ -1184,6 +1186,8 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                 struct bpf_prog_array_item *_item;      \
                 struct bpf_prog *_prog;                 \
                 struct bpf_prog_array *_array;          \
+               struct bpf_run_ctx *old_run_ctx;        \
+               struct bpf_cg_run_ctx run_ctx;          \
                 u32 _ret = 1;                           \
                 migrate_disable();                      \
                 rcu_read_lock();                        \
@@ -1191,17 +1195,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                 if (unlikely(check_non_null && !_array))\
                         goto _out;                      \
                 _item = &_array->items[0];              \
-               while ((_prog = READ_ONCE(_item->prog))) {              \
-                       if (!set_cg_storage) {                  \
-                               _ret &= func(_prog, ctx);       \
-                       } else {                                \
-                               if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))    \
-                                       break;                  \
-                               _ret &= func(_prog, ctx);       \
-                               bpf_cgroup_storage_unset();     \
-                       }                               \
+               old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\
+               while ((_prog = READ_ONCE(_item->prog))) {      \
+                       run_ctx.prog_item = _item;      \
+                       _ret &= func(_prog, ctx);       \
                         _item++;                        \
                 }                                       \
+               bpf_reset_run_ctx(old_run_ctx);         \
  _out:                                                  \
                 rcu_read_unlock();                      \
                 migrate_enable();                       \
@@ -1284,6 +1284,20 @@ static inline void bpf_enable_instrumentation(void)
         migrate_enable();
  }
  
+static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
+{
+       struct bpf_run_ctx *old_ctx;
+
+       old_ctx = current->bpf_ctx;
+       current->bpf_ctx = new_ctx;
+       return old_ctx;
+}
+
+static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
+{
+       current->bpf_ctx = old_ctx;
+}
+
  extern const struct file_operations bpf_map_fops;
  extern const struct file_operations bpf_prog_fops;
  extern const struct file_operations bpf_iter_fops;
@@ -1428,6 +1442,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux,
                                         struct seq_file *seq);
  typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux,
                                          struct bpf_link_info *info);
+typedef const struct bpf_func_proto *
+(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id,
+                            const struct bpf_prog *prog);
  
  enum bpf_iter_feature {
         BPF_ITER_RESCHED        = BIT(0),
@@ -1440,6 +1457,7 @@ struct bpf_iter_reg {
         bpf_iter_detach_target_t detach_target;
         bpf_iter_show_fdinfo_t show_fdinfo;
         bpf_iter_fill_link_info_t fill_link_info;
+       bpf_iter_get_func_proto_t get_func_proto;
         u32 ctx_arg_info_size;
         u32 feature;
         struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
@@ -1462,6 +1480,8 @@ struct bpf_iter__bpf_map_elem {
  int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
  void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
  bool bpf_iter_prog_supported(struct bpf_prog *prog);
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
  int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog);
  int bpf_iter_new_fd(struct bpf_link *link);
  bool bpf_link_is_iter(struct bpf_link *link);
@@ -2036,6 +2056,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto;
  extern const struct bpf_func_proto bpf_task_storage_delete_proto;
  extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
  extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
+extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
+extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
  
  const struct bpf_func_proto *bpf_tracing_func_proto(
         enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h

index a9db1ea..ae3ac3a 100644 (file)
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -134,4 +134,5 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
  BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
  #ifdef CONFIG_NET
  BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
+BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
  #endif
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h

index b847e1c..5424124 100644 (file)
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -354,8 +354,8 @@ struct bpf_insn_aux_data {
         };
         u64 map_key_state; /* constant (32 bit) key tracking for maps */
         int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
-       int sanitize_stack_off; /* stack slot to be cleared */
         u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
+       bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
         bool zext_dst; /* this insn zero extends dst reg */
         u8 alu_state; /* used in combination with alu_limit */
  
@@ -429,6 +429,7 @@ struct bpf_verifier_env {
         u32 used_map_cnt;               /* number of used maps */
         u32 used_btf_cnt;               /* number of used BTF objects */
         u32 id_gen;                     /* used to generate unique reg IDs */
+       bool explore_alu_limits;
         bool allow_ptr_leaks;
         bool allow_uninit_stack;
         bool allow_ptr_to_map_access;
diff --git a/include/linux/filter.h b/include/linux/filter.h

index ba36989..1797e85 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -73,6 +73,11 @@ struct ctl_table_header;
  /* unused opcode to mark call to interpreter with arguments */
  #define BPF_CALL_ARGS  0xe0
  
+/* unused opcode to mark speculation barrier for mitigating
+ * Speculative Store Bypass
+ */
+#define BPF_NOSPEC     0xc0
+
  /* As per nm, we expose JITed images as text (code) section for
   * kallsyms. That way, tools like perf can find it to match
   * addresses.
@@ -390,6 +395,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
                 .off   = 0,                                     \
                 .imm   = 0 })
  
+/* Speculation barrier */
+
+#define BPF_ST_NOSPEC()                                                \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ST | BPF_NOSPEC,                   \
+               .dst_reg = 0,                                   \
+               .src_reg = 0,                                   \
+               .off   = 0,                                     \
+               .imm   = 0 })
+
  /* Internal classic blocks for direct assignment */
  
  #define __BPF_STMT(CODE, K)                                    \
@@ -761,6 +776,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
  
  DECLARE_BPF_DISPATCHER(xdp)
  
+DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp);
+
  static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
                                             struct xdp_buff *xdp)
  {
@@ -768,7 +787,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
          * under local_bh_disable(), which provides the needed RCU protection
          * for accessing map entries.
          */
-       return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+       u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+
+       if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
+               if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
+                       act = xdp_master_redirect(xdp);
+       }
+
+       return act;
  }
  
  void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h

index e2bc163..6b54982 100644 (file)
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -141,6 +141,7 @@ extern int vfs_get_tree(struct fs_context *fc);
  extern void put_fs_context(struct fs_context *fc);
  extern int vfs_parse_fs_param_source(struct fs_context *fc,
                                      struct fs_parameter *param);
+extern void fc_drop_locked(struct fs_context *fc);
  
  /*
   * sget() wrappers to be called from the ->get_tree() op.
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h

index 63b56ab..30ece3a 100644 (file)
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -423,7 +423,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev);
  
  void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
  
-struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev);
+struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
+                                         u16 if_id);
  
  extern struct bus_type fsl_mc_bus_type;
  
diff --git a/include/linux/highmem.h b/include/linux/highmem.h

index 8c6e8e9..d9a606a 100644 (file)
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -318,14 +318,16 @@ static inline void memcpy_to_page(struct page *page, size_t offset,
  
         VM_BUG_ON(offset + len > PAGE_SIZE);
         memcpy(to + offset, from, len);
+       flush_dcache_page(page);
         kunmap_local(to);
  }
  
  static inline void memzero_page(struct page *page, size_t offset, size_t len)
  {
-       char *addr = kmap_atomic(page);
+       char *addr = kmap_local_page(page);
         memset(addr + offset, 0, len);
-       kunmap_atomic(addr);
+       flush_dcache_page(page);
+       kunmap_local(addr);
  }
  
  #endif /* _LINUX_HIGHMEM_H */
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h

index 21daed1..509e18c 100644 (file)
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -190,39 +190,4 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev)
  }
  #endif
  
-#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_NET_SWITCHDEV)
-
-int switchdev_bridge_port_offload(struct net_device *brport_dev,
-                                 struct net_device *dev, const void *ctx,
-                                 struct notifier_block *atomic_nb,
-                                 struct notifier_block *blocking_nb,
-                                 bool tx_fwd_offload,
-                                 struct netlink_ext_ack *extack);
-void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
-                                    const void *ctx,
-                                    struct notifier_block *atomic_nb,
-                                    struct notifier_block *blocking_nb);
-
-#else
-
-static inline int
-switchdev_bridge_port_offload(struct net_device *brport_dev,
-                             struct net_device *dev, const void *ctx,
-                             struct notifier_block *atomic_nb,
-                             struct notifier_block *blocking_nb,
-                             bool tx_fwd_offload,
-                             struct netlink_ext_ack *extack)
-{
-       return -EINVAL;
-}
-
-static inline void
-switchdev_bridge_port_unoffload(struct net_device *brport_dev,
-                               const void *ctx,
-                               struct notifier_block *atomic_nb,
-                               struct notifier_block *blocking_nb)
-{
-}
-#endif
-
  #endif
diff --git a/include/linux/igmp.h b/include/linux/igmp.h

index 64ce8cd..93c262e 100644 (file)
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -41,9 +41,6 @@ struct ip_sf_socklist {
         __be32                  sl_addr[];
  };
  
-#define IP_SFLSIZE(count)      (sizeof(struct ip_sf_socklist) + \
-       (count) * sizeof(__be32))
-
  #define IP_SFBLOCK     10      /* allocate this many at once */
  
  /* ip_mc_socklist is real list now. Speed is not argument;
diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h

index 25e2b4e..aee8ff4 100644 (file)
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -81,6 +81,8 @@ int ishtp_register_event_cb(struct ishtp_cl_device *device,
  
  /* Get the device * from ishtp device instance */
  struct device *ishtp_device(struct ishtp_cl_device *cl_device);
+/* wait for IPC resume */
+bool ishtp_wait_resume(struct ishtp_device *dev);
  /* Trace interface for clients */
  ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device);
  /* Get device pointer of PCI device for DMA acces */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h

index cbf46f5..4a53c3c 100644 (file)
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
   */
  #define for_each_mem_range(i, p_start, p_end) \
         __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,   \
-                            MEMBLOCK_NONE, p_start, p_end, NULL)
+                            MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
  
  /**
   * for_each_mem_range_rev - reverse iterate through memblock areas from
@@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
   */
  #define for_each_mem_range_rev(i, p_start, p_end)                      \
         __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \
-                                MEMBLOCK_NONE, p_start, p_end, NULL)
+                                MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
  
  /**
   * for_each_reserved_mem_range - iterate over all reserved memblock areas
diff --git a/include/linux/mhi.h b/include/linux/mhi.h

index beb9183..c493a80 100644 (file)
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -721,8 +721,13 @@ void mhi_device_put(struct mhi_device *mhi_dev);
   *                            host and device execution environments match and
   *                            channels are in a DISABLED state.
   * @mhi_dev: Device associated with the channels
+ * @flags: MHI channel flags
   */
-int mhi_prepare_for_transfer(struct mhi_device *mhi_dev);
+int mhi_prepare_for_transfer(struct mhi_device *mhi_dev,
+                            unsigned int flags);
+
+/* Automatically allocate and queue inbound buffers */
+#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0)
  
  /**
   * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer.
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h

index 1efe374..af4dd6e 100644 (file)
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1138,6 +1138,8 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
  bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
  bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
  bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
  struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
  u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
                            struct net_device *slave);
@@ -1145,6 +1147,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
                                  u64 *values,
                                  int num_counters,
                                  size_t *offsets);
+struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev);
  struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
  void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
  int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h

index bc7db2e..4ab5c1f 100644 (file)
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -29,11 +29,20 @@ enum {
         REP_LOADED,
  };
  
+enum mlx5_switchdev_event {
+       MLX5_SWITCHDEV_EVENT_PAIR,
+       MLX5_SWITCHDEV_EVENT_UNPAIR,
+};
+
  struct mlx5_eswitch_rep;
  struct mlx5_eswitch_rep_ops {
         int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep);
         void (*unload)(struct mlx5_eswitch_rep *rep);
         void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
+       int (*event)(struct mlx5_eswitch *esw,
+                    struct mlx5_eswitch_rep *rep,
+                    enum mlx5_switchdev_event event,
+                    void *data);
  };
  
  struct mlx5_eswitch_rep_data {
@@ -63,6 +72,7 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
  void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
  struct mlx5_flow_handle *
  mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
+                                   struct mlx5_eswitch *from_esw,
                                     struct mlx5_eswitch_rep *rep, u32 sqn);
  
  #ifdef CONFIG_MLX5_ESWITCH
@@ -128,6 +138,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
  
  u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
  u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
+struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
  
  #else  /* CONFIG_MLX5_ESWITCH */
  
@@ -171,6 +182,11 @@ static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
         return 0;
  }
  
+static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw)
+{
+       return NULL;
+}
+
  #endif /* CONFIG_MLX5_ESWITCH */
  
  static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev)
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h

index 77746f7..0106c67 100644 (file)
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -38,6 +38,8 @@
  
  #define MLX5_FS_DEFAULT_FLOW_TAG 0x0
  
+#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+
  enum {
         MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO  = 1 << 16,
         MLX5_FLOW_CONTEXT_ACTION_ENCRYPT        = 1 << 17,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h

index 6bbae0c..fce3cba 100644 (file)
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1652,7 +1652,13 @@ struct mlx5_ifc_cmd_hca_cap_bits {
         u8         max_geneve_tlv_option_data_len[0x5];
         u8         reserved_at_570[0x10];
  
-       u8         reserved_at_580[0x33];
+       u8         reserved_at_580[0xb];
+       u8         log_max_dci_stream_channels[0x5];
+       u8         reserved_at_590[0x3];
+       u8         log_max_dci_errored_streams[0x5];
+       u8         reserved_at_598[0x8];
+
+       u8         reserved_at_5a0[0x13];
         u8         log_max_dek[0x5];
         u8         reserved_at_5b8[0x4];
         u8         mini_cqe_resp_stride_index[0x1];
@@ -3021,10 +3027,12 @@ struct mlx5_ifc_qpc_bits {
         u8         reserved_at_3c0[0x8];
         u8         next_send_psn[0x18];
  
-       u8         reserved_at_3e0[0x8];
+       u8         reserved_at_3e0[0x3];
+       u8         log_num_dci_stream_channels[0x5];
         u8         cqn_snd[0x18];
  
-       u8         reserved_at_400[0x8];
+       u8         reserved_at_400[0x3];
+       u8         log_num_dci_errored_streams[0x5];
         u8         deth_sqpn[0x18];
  
         u8         reserved_at_420[0x20];
@@ -3912,7 +3920,7 @@ struct mlx5_ifc_cqc_bits {
         u8         status[0x4];
         u8         reserved_at_4[0x2];
         u8         dbr_umem_valid[0x1];
-       u8         apu_thread_cq[0x1];
+       u8         apu_cq[0x1];
         u8         cqe_sz[0x3];
         u8         cc[0x1];
         u8         reserved_at_c[0x1];
@@ -3938,8 +3946,7 @@ struct mlx5_ifc_cqc_bits {
         u8         cq_period[0xc];
         u8         cq_max_count[0x10];
  
-       u8         reserved_at_a0[0x18];
-       u8         c_eqn[0x8];
+       u8         c_eqn_or_apu_element[0x20];
  
         u8         reserved_at_c0[0x3];
         u8         log_page_size[0x5];
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 52bbd2b..7f8ee09 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -103,11 +103,19 @@ struct page {
                         unsigned long pp_magic;
                         struct page_pool *pp;
                         unsigned long _pp_mapping_pad;
-                       /**
-                        * @dma_addr: might require a 64-bit value on
-                        * 32-bit architectures.
-                        */
-                       unsigned long dma_addr[2];
+                       unsigned long dma_addr;
+                       union {
+                               /**
+                                * dma_addr_upper: might require a 64-bit
+                                * value on 32-bit architectures.
+                                */
+                               unsigned long dma_addr_upper;
+                               /**
+                                * For frag page support, not supported in
+                                * 32-bit architectures with 64-bit DMA.
+                                */
+                               atomic_long_t pp_frag_count;
+                       };
                 };
                 struct {        /* slab, slob and slub */
                         union {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index d63a94e..bd8d5b8 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -295,18 +295,6 @@ enum netdev_state_t {
  };
  
  
-/*
- * This structure holds boot-time configured netdevice settings. They
- * are then used in the device probing.
- */
-struct netdev_boot_setup {
-       char name[IFNAMSIZ];
-       struct ifmap map;
-};
-#define NETDEV_BOOT_SETUP_MAX 8
-
-int __init netdev_boot_setup(char *str);
-
  struct gro_list {
         struct list_head        list;
         int                     count;
@@ -1330,6 +1318,9 @@ struct netdev_net_notifier {
   *     that got dropped are freed/returned via xdp_return_frame().
   *     Returns negative number, means general error invoking ndo, meaning
   *     no frames were xmit'ed and core-caller will free all frames.
+ * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+ *                                             struct xdp_buff *xdp);
+ *      Get the xmit slave of master device based on the xdp_buff.
   * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
   *      This function is used to wake up the softirq, ksoftirqd or kthread
   *     responsible for sending and/or receiving packets on a specific
@@ -1557,6 +1548,8 @@ struct net_device_ops {
         int                     (*ndo_xdp_xmit)(struct net_device *dev, int n,
                                                 struct xdp_frame **xdp,
                                                 u32 flags);
+       struct net_device *     (*ndo_xdp_get_xmit_slave)(struct net_device *dev,
+                                                         struct xdp_buff *xdp);
         int                     (*ndo_xsk_wakeup)(struct net_device *dev,
                                                   u32 queue_id, u32 flags);
         struct devlink_port *   (*ndo_get_devlink_port)(struct net_device *dev);
@@ -2939,7 +2932,6 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
  }
  
  int netdev_boot_setup_check(struct net_device *dev);
-unsigned long netdev_boot_base(const char *prefix, int unit);
  struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
                                        const char *hwaddr);
  struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
@@ -3929,6 +3921,8 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev,
         return 0;
  }
  #endif
+int netif_set_real_num_queues(struct net_device *dev,
+                             unsigned int txq, unsigned int rxq);
  
  static inline struct netdev_rx_queue *
  __netif_get_rx_queue(struct net_device *dev, unsigned int rxq)
@@ -4087,6 +4081,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
  int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
                       int fd, int expected_fd, u32 flags);
  int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+u8 dev_xdp_prog_count(struct net_device *dev);
  u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
  
  int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
@@ -4154,11 +4149,13 @@ void netdev_run_todo(void);
   */
  static inline void dev_put(struct net_device *dev)
  {
+       if (dev) {
  #ifdef CONFIG_PCPU_DEV_REFCNT
-       this_cpu_dec(*dev->pcpu_refcnt);
+               this_cpu_dec(*dev->pcpu_refcnt);
  #else
-       refcount_dec(&dev->dev_refcnt);
+               refcount_dec(&dev->dev_refcnt);
  #endif
+       }
  }
  
  /**
@@ -4169,11 +4166,13 @@ static inline void dev_put(struct net_device *dev)
   */
  static inline void dev_hold(struct net_device *dev)
  {
+       if (dev) {
  #ifdef CONFIG_PCPU_DEV_REFCNT
-       this_cpu_inc(*dev->pcpu_refcnt);
+               this_cpu_inc(*dev->pcpu_refcnt);
  #else
-       refcount_inc(&dev->dev_refcnt);
+               refcount_inc(&dev->dev_refcnt);
  #endif
+       }
  }
  
  /* Carrier loss detection, dial on demand. The functions netif_carrier_on
diff --git a/include/linux/sched.h b/include/linux/sched.h

index ec8d07d..c64119a 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -42,6 +42,7 @@ struct backing_dev_info;
  struct bio_list;
  struct blk_plug;
  struct bpf_local_storage;
+struct bpf_run_ctx;
  struct capture_control;
  struct cfs_rq;
  struct fs_struct;
@@ -1379,6 +1380,8 @@ struct task_struct {
  #ifdef CONFIG_BPF_SYSCALL
         /* Used by BPF task local storage */
         struct bpf_local_storage __rcu  *bpf_storage;
+       /* Used for BPF run context */
+       struct bpf_run_ctx              *bpf_ctx;
  #endif
  
  #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index 2bcdc8c..6bdb0db 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1183,6 +1183,7 @@ static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
  int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
  struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
                                      unsigned int headroom);
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom);
  struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
                                 int newtailroom, gfp_t priority);
  int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
@@ -4711,11 +4712,9 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
  }
  
  #ifdef CONFIG_PAGE_POOL
-static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page,
-                                       struct page_pool *pp)
+static inline void skb_mark_for_recycle(struct sk_buff *skb)
  {
         skb->pp_recycle = 1;
-       page_pool_store_mem_info(page, pp);
  }
  #endif
  
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h

index 96f3190..14ab0c0 100644 (file)
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
         return rcu_dereference_sk_user_data(sk);
  }
  
+static inline void sk_psock_set_state(struct sk_psock *psock,
+                                     enum sk_psock_state_bits bit)
+{
+       set_bit(bit, &psock->state);
+}
+
+static inline void sk_psock_clear_state(struct sk_psock *psock,
+                                       enum sk_psock_state_bits bit)
+{
+       clear_bit(bit, &psock->state);
+}
+
+static inline bool sk_psock_test_state(const struct sk_psock *psock,
+                                      enum sk_psock_state_bits bit)
+{
+       return test_bit(bit, &psock->state);
+}
+
+static inline void sock_drop(struct sock *sk, struct sk_buff *skb)
+{
+       sk_drops_add(sk, skb);
+       kfree_skb(skb);
+}
+
+static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg)
+{
+       if (msg->skb)
+               sock_drop(psock->sk, msg->skb);
+       kfree(msg);
+}
+
  static inline void sk_psock_queue_msg(struct sk_psock *psock,
                                       struct sk_msg *msg)
  {
         spin_lock_bh(&psock->ingress_lock);
-       list_add_tail(&msg->list, &psock->ingress_msg);
+       if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+               list_add_tail(&msg->list, &psock->ingress_msg);
+       else
+               drop_sk_msg(psock, msg);
         spin_unlock_bh(&psock->ingress_lock);
  }
  
@@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk,
                 psock->psock_update_sk_prot(sk, psock, true);
  }
  
-static inline void sk_psock_set_state(struct sk_psock *psock,
-                                     enum sk_psock_state_bits bit)
-{
-       set_bit(bit, &psock->state);
-}
-
-static inline void sk_psock_clear_state(struct sk_psock *psock,
-                                       enum sk_psock_state_bits bit)
-{
-       clear_bit(bit, &psock->state);
-}
-
-static inline bool sk_psock_test_state(const struct sk_psock *psock,
-                                      enum sk_psock_state_bits bit)
-{
-       return test_bit(bit, &psock->state);
-}
-
  static inline struct sk_psock *sk_psock_get(struct sock *sk)
  {
         struct sk_psock *psock;
diff --git a/include/net/Space.h b/include/net/Space.h

index 9cce0d8..08ca9ce 100644 (file)
--- a/include/net/Space.h
+++ b/include/net/Space.h
@@ -8,23 +8,13 @@ struct net_device *ultra_probe(int unit);
  struct net_device *wd_probe(int unit);
  struct net_device *ne_probe(int unit);
  struct net_device *fmv18x_probe(int unit);
-struct net_device *i82596_probe(int unit);
  struct net_device *ni65_probe(int unit);
  struct net_device *sonic_probe(int unit);
  struct net_device *smc_init(int unit);
-struct net_device *atarilance_probe(int unit);
-struct net_device *sun3lance_probe(int unit);
-struct net_device *sun3_82586_probe(int unit);
-struct net_device *apne_probe(int unit);
  struct net_device *cs89x0_probe(int unit);
-struct net_device *mvme147lance_probe(int unit);
  struct net_device *tc515_probe(int unit);
  struct net_device *lance_probe(int unit);
  struct net_device *cops_probe(int unit);
-struct net_device *ltpc_probe(void);
  
  /* Fibre Channel adapters */
  int iph5526_probe(struct net_device *dev);
-
-/* SBNI adapters */
-int sbni_probe(int unit);
diff --git a/include/net/act_api.h b/include/net/act_api.h

index 086b291..f19f7f4 100644 (file)
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -58,6 +58,14 @@ struct tc_action {
  #define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \
                               TCA_ACT_HW_STATS_DELAYED)
  
+/* Reserve 16 bits for user-space. See TCA_ACT_FLAGS_NO_PERCPU_STATS. */
+#define TCA_ACT_FLAGS_USER_BITS 16
+#define TCA_ACT_FLAGS_USER_MASK 0xffff
+#define TCA_ACT_FLAGS_POLICE   (1U << TCA_ACT_FLAGS_USER_BITS)
+#define TCA_ACT_FLAGS_BIND     (1U << (TCA_ACT_FLAGS_USER_BITS + 1))
+#define TCA_ACT_FLAGS_REPLACE  (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
+#define TCA_ACT_FLAGS_NO_RTNL  (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
+
  /* Update lastuse only if needed, to avoid dirtying a cache line.
   * We use a temp variable to avoid fetching jiffies twice.
   */
@@ -99,8 +107,8 @@ struct tc_action_ops {
         void    (*cleanup)(struct tc_action *);
         int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
         int     (*init)(struct net *net, struct nlattr *nla,
-                       struct nlattr *est, struct tc_action **act, int ovr,
-                       int bind, bool rtnl_held, struct tcf_proto *tp,
+                       struct nlattr *est, struct tc_action **act,
+                       struct tcf_proto *tp,
                         u32 flags, struct netlink_ext_ack *extack);
         int     (*walk)(struct net *, struct sk_buff *,
                         struct netlink_callback *, int,
@@ -179,18 +187,16 @@ int tcf_action_destroy(struct tc_action *actions[], int bind);
  int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
                     int nr_actions, struct tcf_result *res);
  int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
-                   struct nlattr *est, char *name, int ovr, int bind,
+                   struct nlattr *est,
                     struct tc_action *actions[], int init_res[], size_t *attr_size,
-                   bool rtnl_held, struct netlink_ext_ack *extack);
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+                   u32 flags, struct netlink_ext_ack *extack);
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
                                          bool rtnl_held,
                                          struct netlink_ext_ack *extack);
  struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                     struct nlattr *nla, struct nlattr *est,
-                                   char *name, int ovr, int bind,
                                     struct tc_action_ops *a_o, int *init_res,
-                                   bool rtnl_held,
-                                   struct netlink_ext_ack *extack);
+                                   u32 flags, struct netlink_ext_ack *extack);
  int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
                     int ref, bool terse);
  int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/include/net/af_unix.h b/include/net/af_unix.h

index 435a2c3..4757d7f 100644 (file)
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -70,6 +70,9 @@ struct unix_sock {
         struct socket_wq        peer_wq;
         wait_queue_entry_t      peer_wake;
         struct scm_stat         scm_stat;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+       struct sk_buff          *oob_skb;
+#endif
  };
  
  static inline struct unix_sock *unix_sk(const struct sock *sk)
diff --git a/include/net/ax88796.h b/include/net/ax88796.h

index aa52b2e..2ed23a3 100644 (file)
--- a/include/net/ax88796.h
+++ b/include/net/ax88796.h
@@ -38,4 +38,7 @@ struct ax_plat_data {
         int (*check_irq)(struct platform_device *pdev);
  };
  
+/* exported from ax88796.c for xsurf100.c  */
+extern void ax_NS8390_reinit(struct net_device *dev);
+
  #endif /* __NET_AX88796_PLAT_H */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h

index a53e944..db4312e 100644 (file)
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1230,6 +1230,7 @@ struct hci_dev *hci_alloc_dev(void);
  void hci_free_dev(struct hci_dev *hdev);
  int hci_register_dev(struct hci_dev *hdev);
  void hci_unregister_dev(struct hci_dev *hdev);
+void hci_cleanup_dev(struct hci_dev *hdev);
  int hci_suspend_dev(struct hci_dev *hdev);
  int hci_resume_dev(struct hci_dev *hdev);
  int hci_reset_dev(struct hci_dev *hdev);
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h

index c8696a2..38785d4 100644 (file)
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -303,6 +303,7 @@ int  __bond_3ad_get_active_agg_info(struct bonding *bond,
  int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
                          struct slave *slave);
  int bond_3ad_set_carrier(struct bonding *bond);
+void bond_3ad_update_lacp_active(struct bonding *bond);
  void bond_3ad_update_lacp_rate(struct bonding *bond);
  void bond_3ad_update_ad_actor_settings(struct bonding *bond);
  int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
diff --git a/include/net/bond_options.h b/include/net/bond_options.h

index 9d382f2..e64833a 100644 (file)
--- a/include/net/bond_options.h
+++ b/include/net/bond_options.h
@@ -64,6 +64,7 @@ enum {
         BOND_OPT_AD_USER_PORT_KEY,
         BOND_OPT_NUM_PEER_NOTIF_ALIAS,
         BOND_OPT_PEER_NOTIF_DELAY,
+       BOND_OPT_LACP_ACTIVE,
         BOND_OPT_LAST
  };
  
diff --git a/include/net/bonding.h b/include/net/bonding.h

index 625d9c7..9f3fdc1 100644 (file)
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -129,6 +129,7 @@ struct bond_params {
         int updelay;
         int downdelay;
         int peer_notif_delay;
+       int lacp_active;
         int lacp_fast;
         unsigned int min_links;
         int ad_select;
@@ -258,6 +259,7 @@ struct bonding {
         /* protecting ipsec_list */
         spinlock_t ipsec_lock;
  #endif /* CONFIG_XFRM_OFFLOAD */
+       struct bpf_prog *xdp_prog;
  };
  
  #define bond_slave_get_rcu(dev) \
diff --git a/include/net/compat.h b/include/net/compat.h

index 84805bd..595fee0 100644 (file)
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -71,13 +71,26 @@ struct compat_group_source_req {
  } __packed;
  
  struct compat_group_filter {
-       __u32                            gf_interface;
-       struct __kernel_sockaddr_storage gf_group
-               __aligned(4);
-       __u32                            gf_fmode;
-       __u32                            gf_numsrc;
-       struct __kernel_sockaddr_storage gf_slist[1]
-               __aligned(4);
+       union {
+               struct {
+                       __u32                            gf_interface_aux;
+                       struct __kernel_sockaddr_storage gf_group_aux
+                               __aligned(4);
+                       __u32                            gf_fmode_aux;
+                       __u32                            gf_numsrc_aux;
+                       struct __kernel_sockaddr_storage gf_slist[1]
+                               __aligned(4);
+               } __packed;
+               struct {
+                       __u32                            gf_interface;
+                       struct __kernel_sockaddr_storage gf_group
+                               __aligned(4);
+                       __u32                            gf_fmode;
+                       __u32                            gf_numsrc;
+                       struct __kernel_sockaddr_storage gf_slist_flex[]
+                               __aligned(4);
+               } __packed;
+       };
  } __packed;
  
  #endif /* NET_COMPAT_H */
diff --git a/include/net/devlink.h b/include/net/devlink.h

index 08f4c61..0236c77 100644 (file)
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1396,8 +1396,8 @@ struct devlink_ops {
          *
          * Note: @extack can be NULL when port notifier queries the port function.
          */
-       int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port,
-                                        u8 *hw_addr, int *hw_addr_len,
+       int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr,
+                                        int *hw_addr_len,
                                          struct netlink_ext_ack *extack);
         /**
          * @port_function_hw_addr_set: Port function's hardware address set function.
@@ -1406,7 +1406,7 @@ struct devlink_ops {
          * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port
          * function handling for a particular port.
          */
-       int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port,
+       int (*port_function_hw_addr_set)(struct devlink_port *port,
                                          const u8 *hw_addr, int hw_addr_len,
                                          struct netlink_ext_ack *extack);
         /**
@@ -1462,8 +1462,7 @@ struct devlink_ops {
          *
          * Return: 0 on success, negative value otherwise.
          */
-       int (*port_fn_state_get)(struct devlink *devlink,
-                                struct devlink_port *port,
+       int (*port_fn_state_get)(struct devlink_port *port,
                                  enum devlink_port_fn_state *state,
                                  enum devlink_port_fn_opstate *opstate,
                                  struct netlink_ext_ack *extack);
@@ -1478,8 +1477,7 @@ struct devlink_ops {
          *
          * Return: 0 on success, negative value otherwise.
          */
-       int (*port_fn_state_set)(struct devlink *devlink,
-                                struct devlink_port *port,
+       int (*port_fn_state_set)(struct devlink_port *port,
                                  enum devlink_port_fn_state state,
                                  struct netlink_ext_ack *extack);
  
@@ -1546,13 +1544,15 @@ struct net *devlink_net(const struct devlink *devlink);
   * Drivers that operate on real HW must use devlink_alloc() instead.
   */
  struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
-                                size_t priv_size, struct net *net);
+                                size_t priv_size, struct net *net,
+                                struct device *dev);
  static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
-                                           size_t priv_size)
+                                           size_t priv_size,
+                                           struct device *dev)
  {
-       return devlink_alloc_ns(ops, priv_size, &init_net);
+       return devlink_alloc_ns(ops, priv_size, &init_net, dev);
  }
-int devlink_register(struct devlink *devlink, struct device *dev);
+int devlink_register(struct devlink *devlink);
  void devlink_unregister(struct devlink *devlink);
  void devlink_reload_enable(struct devlink *devlink);
  void devlink_reload_disable(struct devlink *devlink);
diff --git a/include/net/dsa.h b/include/net/dsa.h

index 2af6ee2..0c2cba4 100644 (file)
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -79,13 +79,11 @@ enum dsa_tag_protocol {
         DSA_TAG_PROTO_SJA1110           = DSA_TAG_PROTO_SJA1110_VALUE,
  };
  
-struct packet_type;
  struct dsa_switch;
  
  struct dsa_device_ops {
         struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
-       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt);
+       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
         void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto,
                              int *offset);
         unsigned int needed_headroom;
@@ -239,8 +237,7 @@ struct dsa_port {
  
         /* Copies for faster access in master receive hot path */
         struct dsa_switch_tree *dst;
-       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt);
+       struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev);
  
         enum {
                 DSA_PORT_TYPE_UNUSED = 0,
@@ -257,6 +254,8 @@ struct dsa_port {
         struct device_node      *dn;
         unsigned int            ageing_time;
         bool                    vlan_filtering;
+       /* Managed by DSA on user ports and by drivers on CPU and DSA ports */
+       bool                    learning;
         u8                      stp_state;
         struct net_device       *bridge_dev;
         int                     bridge_num;
@@ -717,8 +716,6 @@ struct dsa_switch_ops {
         int     (*port_bridge_flags)(struct dsa_switch *ds, int port,
                                      struct switchdev_brport_flags flags,
                                      struct netlink_ext_ack *extack);
-       int     (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter,
-                                   struct netlink_ext_ack *extack);
  
         /*
          * VLAN support
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h

index 69c9eab..f3c2841 100644 (file)
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -293,7 +293,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action)
  }
  
  /**
- * flow_action_has_one_action() - check if exactly one action is present
+ * flow_offload_has_one_action() - check if exactly one action is present
   * @action: tc filter flow offload action
   *
   * Returns true if exactly one action is present.
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h

index 71bb4cc..42235c1 100644 (file)
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -82,9 +82,6 @@ struct ip6_sf_socklist {
         struct in6_addr         sl_addr[];
  };
  
-#define IP6_SFLSIZE(count)     (sizeof(struct ip6_sf_socklist) + \
-       (count) * sizeof(struct in6_addr))
-
  #define IP6_SFBLOCK    10      /* allocate this many at once */
  
  struct ipv6_mc_socklist {
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h

index ca6a3ea..f72ec11 100644 (file)
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -160,6 +160,12 @@ struct inet_hashinfo {
                                         ____cacheline_aligned_in_smp;
  };
  
+#define inet_lhash2_for_each_icsk_continue(__icsk) \
+       hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node)
+
+#define inet_lhash2_for_each_icsk(__icsk, list) \
+       hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node)
+
  #define inet_lhash2_for_each_icsk_rcu(__icsk, list) \
         hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node)
  
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h

index 820eae3..5efd0b7 100644 (file)
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -265,7 +265,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
  
  static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb)
  {
-       int mtu;
+       unsigned int mtu;
  
         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
                                 inet6_sk(skb->sk) : NULL;
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h

index c0f0a13..49aa79c 100644 (file)
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -15,9 +15,11 @@
  #include <linux/if_ether.h>
  
  /* Lengths of frame formats */
-#define LLC_PDU_LEN_I  4       /* header and 2 control bytes */
-#define LLC_PDU_LEN_S  4
-#define LLC_PDU_LEN_U  3       /* header and 1 control byte */
+#define LLC_PDU_LEN_I          4       /* header and 2 control bytes */
+#define LLC_PDU_LEN_S          4
+#define LLC_PDU_LEN_U          3       /* header and 1 control byte */
+/* header and 1 control byte and XID info */
+#define LLC_PDU_LEN_U_XID      (LLC_PDU_LEN_U + sizeof(struct llc_xid_info))
  /* Known SAP addresses */
  #define LLC_GLOBAL_SAP 0xFF
  #define LLC_NULL_SAP   0x00    /* not network-layer visible */
@@ -50,9 +52,10 @@
  #define LLC_PDU_TYPE_U_MASK    0x03    /* 8-bit control field */
  #define LLC_PDU_TYPE_MASK      0x03
  
-#define LLC_PDU_TYPE_I 0       /* first bit */
-#define LLC_PDU_TYPE_S 1       /* first two bits */
-#define LLC_PDU_TYPE_U 3       /* first two bits */
+#define LLC_PDU_TYPE_I         0       /* first bit */
+#define LLC_PDU_TYPE_S         1       /* first two bits */
+#define LLC_PDU_TYPE_U         3       /* first two bits */
+#define LLC_PDU_TYPE_U_XID     4       /* private type for detecting XID commands */
  
  #define LLC_PDU_TYPE_IS_I(pdu) \
         ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0)
@@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb)
  static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type,
                                        u8 ssap, u8 dsap, u8 cr)
  {
-       const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4;
+       int hlen = 4; /* default value for I and S types */
         struct llc_pdu_un *pdu;
  
+       switch (type) {
+       case LLC_PDU_TYPE_U:
+               hlen = 3;
+               break;
+       case LLC_PDU_TYPE_U_XID:
+               hlen = 6;
+               break;
+       }
+
         skb_push(skb, hlen);
         skb_reset_network_header(skb);
         pdu = llc_pdu_un_hdr(skb);
@@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb,
         xid_info->fmt_id = LLC_XID_FMT_ID;      /* 0x81 */
         xid_info->type   = svcs_supported;
         xid_info->rw     = rx_window << 1;      /* size of receive window */
-       skb_put(skb, sizeof(struct llc_xid_info));
+
+       /* no need to push/put since llc_pdu_header_init() has already
+        * pushed 3 + 3 bytes
+        */
  }
  
  /**
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h

index e946366..1f4e181 100644 (file)
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -75,6 +75,7 @@ struct netns_xfrm {
  #endif
         spinlock_t              xfrm_state_lock;
         seqcount_spinlock_t     xfrm_state_hash_generation;
+       seqcount_spinlock_t     xfrm_policy_hash_generation;
  
         spinlock_t xfrm_policy_lock;
         struct mutex xfrm_cfg_mutex;
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h

index 4770a81..a964dae 100644 (file)
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -276,8 +276,8 @@ int nci_register_device(struct nci_dev *ndev);
  void nci_unregister_device(struct nci_dev *ndev);
  int nci_request(struct nci_dev *ndev,
                 void (*req)(struct nci_dev *ndev,
-                           unsigned long opt),
-               unsigned long opt, __u32 timeout);
+                           const void *opt),
+               const void *opt, __u32 timeout);
  int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len,
                  const __u8 *payload);
  int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
diff --git a/include/net/page_pool.h b/include/net/page_pool.h

index 3dd62dd..a408240 100644 (file)
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -45,7 +45,10 @@
                                         * Please note DMA-sync-for-CPU is still
                                         * device driver responsibility
                                         */
-#define PP_FLAG_ALL            (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
+#define PP_FLAG_PAGE_FRAG      BIT(2) /* for page frag feature */
+#define PP_FLAG_ALL            (PP_FLAG_DMA_MAP |\
+                                PP_FLAG_DMA_SYNC_DEV |\
+                                PP_FLAG_PAGE_FRAG)
  
  /*
   * Fast allocation side cache array/stack
@@ -88,6 +91,9 @@ struct page_pool {
         unsigned long defer_warn;
  
         u32 pages_state_hold_cnt;
+       unsigned int frag_offset;
+       struct page *frag_page;
+       long frag_users;
  
         /*
          * Data structure for allocation side
@@ -137,6 +143,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
         return page_pool_alloc_pages(pool, gfp);
  }
  
+struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
+                                 unsigned int size, gfp_t gfp);
+
+static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
+                                                   unsigned int *offset,
+                                                   unsigned int size)
+{
+       gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+       return page_pool_alloc_frag(pool, offset, size, gfp);
+}
+
  /* get the stored dma direction. A driver might decide to treat this locally and
   * avoid the extra cache line from page_pool to determine the direction
   */
@@ -198,19 +216,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
         page_pool_put_full_page(pool, page, true);
  }
  
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT        \
+               (sizeof(dma_addr_t) > sizeof(unsigned long))
+
  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
  {
-       dma_addr_t ret = page->dma_addr[0];
-       if (sizeof(dma_addr_t) > sizeof(unsigned long))
-               ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+       dma_addr_t ret = page->dma_addr;
+
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+               ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
         return ret;
  }
  
  static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
  {
-       page->dma_addr[0] = addr;
-       if (sizeof(dma_addr_t) > sizeof(unsigned long))
-               page->dma_addr[1] = upper_32_bits(addr);
+       page->dma_addr = addr;
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+               page->dma_addr_upper = upper_32_bits(addr);
+}
+
+static inline void page_pool_set_frag_count(struct page *page, long nr)
+{
+       atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
+                                                         long nr)
+{
+       long ret;
+
+       /* As suggested by Alexander, atomic_long_read() may cover up the
+        * reference count errors, so avoid calling atomic_long_read() in
+        * the cases of freeing or draining the page_frags, where we would
+        * not expect it to match or that are slowpath anyway.
+        */
+       if (__builtin_constant_p(nr) &&
+           atomic_long_read(&page->pp_frag_count) == nr)
+               return 0;
+
+       ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+       WARN_ON(ret < 0);
+       return ret;
  }
  
  static inline bool is_page_pool_compiled_in(void)
@@ -253,11 +300,4 @@ static inline void page_pool_ring_unlock(struct page_pool *pool)
                 spin_unlock_bh(&pool->ring.producer_lock);
  }
  
-/* Store mem_info on struct page and use it while recycling skb frags */
-static inline
-void page_pool_store_mem_info(struct page *page, struct page_pool *pp)
-{
-       page->pp = pp;
-}
-
  #endif /* _NET_PAGE_POOL_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h

index dc28fcb..8fb47fc 100644 (file)
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -319,7 +319,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
  
  int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
                       struct nlattr **tb, struct nlattr *rate_tlv,
-                     struct tcf_exts *exts, bool ovr, bool rtnl_held,
+                     struct tcf_exts *exts, u32 flags,
                       struct netlink_ext_ack *extack);
  void tcf_exts_destroy(struct tcf_exts *exts);
  void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
@@ -329,6 +329,9 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
  
  /**
   * struct tcf_pkt_info - packet information
+ *
+ * @ptr: start of the pkt data
+ * @nexthdr: offset of the next header
   */
  struct tcf_pkt_info {
         unsigned char *         ptr;
@@ -347,6 +350,7 @@ struct tcf_ematch_ops;
   * @ops: the operations lookup table of the corresponding ematch module
   * @datalen: length of the ematch specific configuration data
   * @data: ematch specific data
+ * @net: the network namespace
   */
  struct tcf_ematch {
         struct tcf_ematch_ops * ops;
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h

index 384e800..9f48733 100644 (file)
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -153,7 +153,8 @@ struct rtnl_af_ops {
                                                     u32 ext_filter_mask);
  
         int                     (*validate_link_af)(const struct net_device *dev,
-                                                   const struct nlattr *attr);
+                                                   const struct nlattr *attr,
+                                                   struct netlink_ext_ack *extack);
         int                     (*set_link_af)(struct net_device *dev,
                                                const struct nlattr *attr,
                                                struct netlink_ext_ack *extack);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h

index 9ed33e6..c0069ac 100644 (file)
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -357,7 +357,7 @@ struct tcf_proto_ops {
         int                     (*change)(struct net *net, struct sk_buff *,
                                         struct tcf_proto*, unsigned long,
                                         u32 handle, struct nlattr **,
-                                       void **, bool, bool,
+                                       void **, u32,
                                         struct netlink_ext_ack *);
         int                     (*delete)(struct tcf_proto *tp, void *arg,
                                           bool *last, bool rtnl_held,
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h

index 32fc4a3..651bba6 100644 (file)
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -984,6 +984,7 @@ struct sctp_transport {
         } cacc;
  
         struct {
+               __u32 last_rtx_chunks;
                 __u16 pmtu;
                 __u16 probe_size;
                 __u16 probe_high;
@@ -1024,8 +1025,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
  void sctp_transport_immediate_rtx(struct sctp_transport *);
  void sctp_transport_dst_release(struct sctp_transport *t);
  void sctp_transport_dst_confirm(struct sctp_transport *t);
-void sctp_transport_pl_send(struct sctp_transport *t);
-void sctp_transport_pl_recv(struct sctp_transport *t);
+bool sctp_transport_pl_send(struct sctp_transport *t);
+bool sctp_transport_pl_recv(struct sctp_transport *t);
  
  
  /* This is the structure we use to queue packets as they come into
diff --git a/include/net/sock.h b/include/net/sock.h

index ff1be7e..6e76145 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -68,6 +68,7 @@
  #include <net/tcp_states.h>
  #include <linux/net_tstamp.h>
  #include <net/l3mdev.h>
+#include <uapi/linux/socket.h>
  
  /*
   * This structure really needs to be cleaned up.
@@ -1438,8 +1439,6 @@ static inline int __sk_prot_rehash(struct sock *sk)
  #define RCV_SHUTDOWN   1
  #define SEND_SHUTDOWN  2
  
-#define SOCK_SNDBUF_LOCK       1
-#define SOCK_RCVBUF_LOCK       2
  #define SOCK_BINDADDR_LOCK     4
  #define SOCK_BINDPORT_LOCK     8
  
diff --git a/include/net/switchdev.h b/include/net/switchdev.h

index 66468ff..60d806b 100644 (file)
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -180,6 +180,14 @@ struct switchdev_obj_in_state_mrp {
  
  typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
  
+struct switchdev_brport {
+       struct net_device *dev;
+       const void *ctx;
+       struct notifier_block *atomic_nb;
+       struct notifier_block *blocking_nb;
+       bool tx_fwd_offload;
+};
+
  enum switchdev_notifier_type {
         SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
         SWITCHDEV_FDB_DEL_TO_BRIDGE,
@@ -197,6 +205,9 @@ enum switchdev_notifier_type {
         SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
         SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
         SWITCHDEV_VXLAN_FDB_OFFLOADED,
+
+       SWITCHDEV_BRPORT_OFFLOADED,
+       SWITCHDEV_BRPORT_UNOFFLOADED,
  };
  
  struct switchdev_notifier_info {
@@ -226,6 +237,11 @@ struct switchdev_notifier_port_attr_info {
         bool handled;
  };
  
+struct switchdev_notifier_brport_info {
+       struct switchdev_notifier_info info; /* must be first */
+       const struct switchdev_brport brport;
+};
+
  static inline struct net_device *
  switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
  {
@@ -246,6 +262,17 @@ switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *f
  
  #ifdef CONFIG_NET_SWITCHDEV
  
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+                                 struct net_device *dev, const void *ctx,
+                                 struct notifier_block *atomic_nb,
+                                 struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
+                                 struct netlink_ext_ack *extack);
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+                                    const void *ctx,
+                                    struct notifier_block *atomic_nb,
+                                    struct notifier_block *blocking_nb);
+
  void switchdev_deferred_process(void);
  int switchdev_port_attr_set(struct net_device *dev,
                             const struct switchdev_attr *attr,
@@ -316,6 +343,25 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
                                       struct netlink_ext_ack *extack));
  #else
  
+static inline int
+switchdev_bridge_port_offload(struct net_device *brport_dev,
+                             struct net_device *dev, const void *ctx,
+                             struct notifier_block *atomic_nb,
+                             struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
+                             struct netlink_ext_ack *extack)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void
+switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+                               const void *ctx,
+                               struct notifier_block *atomic_nb,
+                               struct notifier_block *blocking_nb)
+{
+}
+
  static inline void switchdev_deferred_process(void)
  {
  }
diff --git a/include/net/tcp.h b/include/net/tcp.h

index 784d5c3..3166dc1 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1958,7 +1958,6 @@ struct tcp_iter_state {
         struct seq_net_private  p;
         enum tcp_seq_states     state;
         struct sock             *syn_wait_sk;
-       struct tcp_seq_afinfo   *bpf_seq_afinfo;
         int                     bucket, offset, sbucket, num;
         loff_t                  last_pos;
  };
diff --git a/include/sound/soc.h b/include/sound/soc.h

index 675849d..8e6dd8a 100644 (file)
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -712,6 +712,12 @@ struct snd_soc_dai_link {
         /* Do not create a PCM for this DAI link (Backend link) */
         unsigned int ignore:1;
  
+       /* This flag will reorder stop sequence. By enabling this flag
+        * DMA controller stop sequence will be invoked first followed by
+        * CPU DAI driver stop sequence
+        */
+       unsigned int stop_dma_first:1;
+
  #ifdef CONFIG_SND_SOC_TOPOLOGY
         struct snd_soc_dobj dobj; /* For topology */
  #endif
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h

index d588c24..1f0a2b4 100644 (file)
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -124,6 +124,8 @@
  
  #define SO_NETNS_COOKIE                71
  
+#define SO_BUF_LOCK            72
+
  #if !defined(__KERNEL__)
  
  #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h

index df6e821..3893646 100644 (file)
--- a/include/uapi/linux/can/j1939.h
+++ b/include/uapi/linux/can/j1939.h
@@ -78,11 +78,20 @@ enum {
  enum {
         J1939_NLA_PAD,
         J1939_NLA_BYTES_ACKED,
+       J1939_NLA_TOTAL_SIZE,
+       J1939_NLA_PGN,
+       J1939_NLA_SRC_NAME,
+       J1939_NLA_DEST_NAME,
+       J1939_NLA_SRC_ADDR,
+       J1939_NLA_DEST_ADDR,
  };
  
  enum {
         J1939_EE_INFO_NONE,
         J1939_EE_INFO_TX_ABORT,
+       J1939_EE_INFO_RX_RTS,
+       J1939_EE_INFO_RX_DPO,
+       J1939_EE_INFO_RX_ABORT,
  };
  
  struct j1939_filter {
diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h

index e33997b..edc346a 100644 (file)
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */
  /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
  #ifndef _USR_IDXD_H_
  #define _USR_IDXD_H_
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h

index 49b22af..5310003 100644 (file)
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -855,6 +855,7 @@ enum {
         IFLA_BOND_AD_ACTOR_SYSTEM,
         IFLA_BOND_TLB_DYNAMIC_LB,
         IFLA_BOND_PEER_NOTIF_DELAY,
+       IFLA_BOND_AD_LACP_ACTIVE,
         __IFLA_BOND_MAX,
  };
  
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h

index d1b3270..1416822 100644 (file)
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -188,11 +188,22 @@ struct ip_mreq_source {
  };
  
  struct ip_msfilter {
-       __be32          imsf_multiaddr;
-       __be32          imsf_interface;
-       __u32           imsf_fmode;
-       __u32           imsf_numsrc;
-       __be32          imsf_slist[1];
+       union {
+               struct {
+                       __be32          imsf_multiaddr_aux;
+                       __be32          imsf_interface_aux;
+                       __u32           imsf_fmode_aux;
+                       __u32           imsf_numsrc_aux;
+                       __be32          imsf_slist[1];
+               };
+               struct {
+                       __be32          imsf_multiaddr;
+                       __be32          imsf_interface;
+                       __u32           imsf_fmode;
+                       __u32           imsf_numsrc;
+                       __be32          imsf_slist_flex[];
+               };
+       };
  };
  
  #define IP_MSFILTER_SIZE(numsrc) \
@@ -211,11 +222,22 @@ struct group_source_req {
  };
  
  struct group_filter {
-       __u32                            gf_interface;  /* interface index */
-       struct __kernel_sockaddr_storage gf_group;      /* multicast address */
-       __u32                            gf_fmode;      /* filter mode */
-       __u32                            gf_numsrc;     /* number of sources */
-       struct __kernel_sockaddr_storage gf_slist[1];   /* interface index */
+       union {
+               struct {
+                       __u32                            gf_interface_aux; /* interface index */
+                       struct __kernel_sockaddr_storage gf_group_aux;     /* multicast address */
+                       __u32                            gf_fmode_aux;     /* filter mode */
+                       __u32                            gf_numsrc_aux;    /* number of sources */
+                       struct __kernel_sockaddr_storage gf_slist[1];      /* interface index */
+               };
+               struct {
+                       __u32                            gf_interface;    /* interface index */
+                       struct __kernel_sockaddr_storage gf_group;        /* multicast address */
+                       __u32                            gf_fmode;        /* filter mode */
+                       __u32                            gf_numsrc;       /* number of sources */
+                       struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */
+               };
+       };
  };
  
  #define GROUP_FILTER_SIZE(numsrc) \
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h

index 025c40f..6836ccb 100644 (file)
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -22,6 +22,7 @@ enum {
         __TCA_ACT_MAX
  };
  
+/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */
  #define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for
                                          * actions stats.
                                          */
diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h

index c3409c8..eb0a9a5 100644 (file)
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage {
         };
  };
  
+#define SOCK_SNDBUF_LOCK       1
+#define SOCK_RCVBUF_LOCK       2
+
+#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK)
+
  #endif /* _UAPI_LINUX_SOCKET_H */
diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h

index 26b638a..a7085e0 100644 (file)
--- a/include/uapi/rdma/irdma-abi.h
+++ b/include/uapi/rdma/irdma-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB */
  /*
   * Copyright (c) 2006 - 2021 Intel Corporation.  All rights reserved.
   * Copyright (c) 2005 Topspin Communications.  All rights reserved.
diff --git a/init/main.c b/init/main.c

index f5b8246..11cbbec 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -1221,7 +1221,7 @@ trace_initcall_start_cb(void *data, initcall_t fn)
  {
         ktime_t *calltime = (ktime_t *)data;
  
-       printk(KERN_DEBUG "calling  %pS @ %i\n", fn, task_pid_nr(current));
+       printk(KERN_DEBUG "calling  %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled());
         *calltime = ktime_get();
  }
  
@@ -1235,8 +1235,8 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
         rettime = ktime_get();
         delta = ktime_sub(rettime, *calltime);
         duration = (unsigned long long) ktime_to_ns(delta) >> 10;
-       printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
-                fn, ret, duration);
+       printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n",
+                fn, ret, duration, irqs_disabled());
  }
  
  static ktime_t initcall_calltime;
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c

index 2d4fbdb..2e9d47b 100644 (file)
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -360,6 +360,28 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
         return supported;
  }
  
+const struct bpf_func_proto *
+bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+       const struct bpf_iter_target_info *tinfo;
+       const struct bpf_func_proto *fn = NULL;
+
+       mutex_lock(&targets_mutex);
+       list_for_each_entry(tinfo, &targets, list) {
+               if (tinfo->btf_id == prog->aux->attach_btf_id) {
+                       const struct bpf_iter_reg *reg_info;
+
+                       reg_info = tinfo->reg_info;
+                       if (reg_info->get_func_proto)
+                               fn = reg_info->get_func_proto(func_id, prog);
+                       break;
+               }
+       }
+       mutex_unlock(&targets_mutex);
+
+       return fn;
+}
+
  static void bpf_iter_link_release(struct bpf_link *link)
  {
         struct bpf_iter_link *iter_link =
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c

index 7780131..c395024 100644 (file)
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -4825,6 +4825,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
                 const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
  
                 if (ctx_arg_info->offset == off) {
+                       if (!ctx_arg_info->btf_id) {
+                               bpf_log(log,"invalid btf_id for context argument offset %u\n", off);
+                               return false;
+                       }
+
                         info->reg_type = ctx_arg_info->reg_type;
                         info->btf = btf_vmlinux;
                         info->btf_id = ctx_arg_info->btf_id;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index 9b15774..fe807b2 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -32,6 +32,8 @@
  #include <linux/perf_event.h>
  #include <linux/extable.h>
  #include <linux/log2.h>
+
+#include <asm/barrier.h>
  #include <asm/unaligned.h>
  
  /* Registers */
@@ -1377,6 +1379,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
                 /* Non-UAPI available opcodes. */
                 [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
                 [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
+               [BPF_ST  | BPF_NOSPEC] = &&ST_NOSPEC,
                 [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B,
                 [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H,
                 [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W,
@@ -1559,7 +1562,7 @@ select_insn:
  
                 if (unlikely(index >= array->map.max_entries))
                         goto out;
-               if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
+               if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT))
                         goto out;
  
                 tail_call_cnt++;
@@ -1621,7 +1624,21 @@ out:
         COND_JMP(s, JSGE, >=)
         COND_JMP(s, JSLE, <=)
  #undef COND_JMP
-       /* STX and ST and LDX*/
+       /* ST, STX and LDX*/
+       ST_NOSPEC:
+               /* Speculation barrier for mitigating Speculative Store Bypass.
+                * In case of arm64, we rely on the firmware mitigation as
+                * controlled via the ssbd kernel parameter. Whenever the
+                * mitigation is enabled, it works for all of the kernel code
+                * with no need to provide any additional instructions here.
+                * In case of x86, we use 'lfence' insn for mitigation. We
+                * reuse preexisting logic from Spectre v1 mitigation that
+                * happens to produce the required code on x86 for v4 as well.
+                */
+#ifdef CONFIG_X86
+               barrier_nospec();
+#endif
+               CONT;
  #define LDST(SIZEOP, SIZE)                                             \
         STX_MEM_##SIZEOP:                                               \
                 *(SIZE *)(unsigned long) (DST + insn->off) = SRC;       \
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c

index 542e94f..f02d045 100644 (file)
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -534,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
         return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
  }
  
-static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
-                        int exclude_ifindex)
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
  {
-       if (!obj || obj->dev->ifindex == exclude_ifindex ||
+       if (!obj ||
             !obj->dev->netdev_ops->ndo_xdp_xmit)
                 return false;
  
@@ -562,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
         return 0;
  }
  
+static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex)
+{
+       while (num_excluded--) {
+               if (ifindex == excluded[num_excluded])
+                       return true;
+       }
+       return false;
+}
+
+/* Get ifindex of each upper device. 'indexes' must be able to hold at
+ * least MAX_NEST_DEV elements.
+ * Returns the number of ifindexes added.
+ */
+static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+{
+       struct net_device *upper;
+       struct list_head *iter;
+       int n = 0;
+
+       netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+               indexes[n++] = upper->ifindex;
+       }
+       return n;
+}
+
  int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
                           struct bpf_map *map, bool exclude_ingress)
  {
         struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-       int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
         struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       int excluded_devices[1+MAX_NEST_DEV];
         struct hlist_head *head;
         struct xdp_frame *xdpf;
+       int num_excluded = 0;
         unsigned int i;
         int err;
  
+       if (exclude_ingress) {
+               num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+               excluded_devices[num_excluded++] = dev_rx->ifindex;
+       }
+
         xdpf = xdp_convert_buff_to_frame(xdp);
         if (unlikely(!xdpf))
                 return -EOVERFLOW;
@@ -581,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
                 for (i = 0; i < map->max_entries; i++) {
                         dst = rcu_dereference_check(dtab->netdev_map[i],
                                                     rcu_read_lock_bh_held());
-                       if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                       if (!is_valid_dst(dst, xdp))
+                               continue;
+
+                       if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
                                 continue;
  
                         /* we only need n-1 clones; last_dst enqueued below */
@@ -601,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
                         head = dev_map_index_hash(dtab, i);
                         hlist_for_each_entry_rcu(dst, head, index_hlist,
                                                  lockdep_is_held(&dtab->index_lock)) {
-                               if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                               if (!is_valid_dst(dst, xdp))
+                                       continue;
+
+                               if (is_ifindex_excluded(excluded_devices, num_excluded,
+                                                       dst->dev->ifindex))
                                         continue;
  
                                 /* we only need n-1 clones; last_dst enqueued below */
@@ -675,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
                            bool exclude_ingress)
  {
         struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
-       int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
         struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       int excluded_devices[1+MAX_NEST_DEV];
         struct hlist_head *head;
         struct hlist_node *next;
+       int num_excluded = 0;
         unsigned int i;
         int err;
  
+       if (exclude_ingress) {
+               num_excluded = get_upper_ifindexes(dev, excluded_devices);
+               excluded_devices[num_excluded++] = dev->ifindex;
+       }
+
         if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
                 for (i = 0; i < map->max_entries; i++) {
                         dst = rcu_dereference_check(dtab->netdev_map[i],
                                                     rcu_read_lock_bh_held());
-                       if (!dst || dst->dev->ifindex == exclude_ifindex)
+                       if (!dst)
+                               continue;
+
+                       if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
                                 continue;
  
                         /* we only need n-1 clones; last_dst enqueued below */
@@ -700,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
                                 return err;
  
                         last_dst = dst;
+
                 }
         } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
                 for (i = 0; i < dtab->n_buckets; i++) {
                         head = dev_map_index_hash(dtab, i);
                         hlist_for_each_entry_safe(dst, next, head, index_hlist) {
-                               if (!dst || dst->dev->ifindex == exclude_ifindex)
+                               if (!dst)
+                                       continue;
+
+                               if (is_ifindex_excluded(excluded_devices, num_excluded,
+                                                       dst->dev->ifindex))
                                         continue;
  
                                 /* we only need n-1 clones; last_dst enqueued below */
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c

index bbfc6bb..ca3cd9a 100644 (file)
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@ -206,15 +206,17 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
                         verbose(cbs->private_data, "BUG_%02x\n", insn->code);
                 }
         } else if (class == BPF_ST) {
-               if (BPF_MODE(insn->code) != BPF_MEM) {
+               if (BPF_MODE(insn->code) == BPF_MEM) {
+                       verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
+                               insn->code,
+                               bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+                               insn->dst_reg,
+                               insn->off, insn->imm);
+               } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) {
+                       verbose(cbs->private_data, "(%02x) nospec\n", insn->code);
+               } else {
                         verbose(cbs->private_data, "BUG_st_%02x\n", insn->code);
-                       return;
                 }
-               verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n",
-                       insn->code,
-                       bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
-                       insn->dst_reg,
-                       insn->off, insn->imm);
         } else if (class == BPF_LDX) {
                 if (BPF_MODE(insn->code) != BPF_MEM) {
                         verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c

index 9fe846e..15746f7 100644 (file)
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -393,8 +393,6 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = {
  };
  
  #ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
-               bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
  
  BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
  {
@@ -403,17 +401,13 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
          * verifier checks that its value is correct.
          */
         enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
-       struct bpf_cgroup_storage *storage = NULL;
+       struct bpf_cgroup_storage *storage;
+       struct bpf_cg_run_ctx *ctx;
         void *ptr;
-       int i;
  
-       for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
-               if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
-                       continue;
-
-               storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]);
-               break;
-       }
+       /* get current cgroup storage from BPF run context */
+       ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
+       storage = ctx->prog_item->cgroup_storage[stype];
  
         if (stype == BPF_CGROUP_STORAGE_SHARED)
                 ptr = &READ_ONCE(storage->buf)->data[0];
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c

index 95d70a0..035e9e3 100644 (file)
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -1,6 +1,7 @@
  //SPDX-License-Identifier: GPL-2.0
  #include <linux/bpf-cgroup.h>
  #include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
  #include <linux/btf.h>
  #include <linux/bug.h>
  #include <linux/filter.h>
@@ -11,9 +12,6 @@
  
  #ifdef CONFIG_CGROUP_BPF
  
-DEFINE_PER_CPU(struct bpf_cgroup_storage_info,
-              bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);
-
  #include "../cgroup/cgroup-internal.h"
  
  #define LOCAL_STORAGE_CREATE_FLAG_MASK                                 \
@@ -286,9 +284,17 @@ enoent:
  
  static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
  {
+       __u32 max_value_size = BPF_LOCAL_STORAGE_MAX_VALUE_SIZE;
         int numa_node = bpf_map_attr_numa_node(attr);
         struct bpf_cgroup_storage_map *map;
  
+       /* percpu is bound by PCPU_MIN_UNIT_SIZE, non-percu
+        * is the same as other local storages.
+        */
+       if (attr->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+               max_value_size = min_t(__u32, max_value_size,
+                                      PCPU_MIN_UNIT_SIZE);
+
         if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) &&
             attr->key_size != sizeof(__u64))
                 return ERR_PTR(-EINVAL);
@@ -296,7 +302,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
         if (attr->value_size == 0)
                 return ERR_PTR(-EINVAL);
  
-       if (attr->value_size > PAGE_SIZE)
+       if (attr->value_size > max_value_size)
                 return ERR_PTR(-E2BIG);
  
         if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK ||
@@ -409,7 +415,7 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
  static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key,
                                          struct seq_file *m)
  {
-       enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
+       enum bpf_cgroup_storage_type stype;
         struct bpf_cgroup_storage *storage;
         int cpu;
  
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 475c28e..5ea2238 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2667,6 +2667,19 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
         cur = env->cur_state->frame[env->cur_state->curframe];
         if (value_regno >= 0)
                 reg = &cur->regs[value_regno];
+       if (!env->bypass_spec_v4) {
+               bool sanitize = reg && is_spillable_regtype(reg->type);
+
+               for (i = 0; i < size; i++) {
+                       if (state->stack[spi].slot_type[i] == STACK_INVALID) {
+                               sanitize = true;
+                               break;
+                       }
+               }
+
+               if (sanitize)
+                       env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
+       }
  
         if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
             !register_is_null(reg) && env->bpf_capable) {
@@ -2689,47 +2702,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
                         verbose(env, "invalid size of register spill\n");
                         return -EACCES;
                 }
-
                 if (state != cur && reg->type == PTR_TO_STACK) {
                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
                         return -EINVAL;
                 }
-
-               if (!env->bypass_spec_v4) {
-                       bool sanitize = false;
-
-                       if (state->stack[spi].slot_type[0] == STACK_SPILL &&
-                           register_is_const(&state->stack[spi].spilled_ptr))
-                               sanitize = true;
-                       for (i = 0; i < BPF_REG_SIZE; i++)
-                               if (state->stack[spi].slot_type[i] == STACK_MISC) {
-                                       sanitize = true;
-                                       break;
-                               }
-                       if (sanitize) {
-                               int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
-                               int soff = (-spi - 1) * BPF_REG_SIZE;
-
-                               /* detected reuse of integer stack slot with a pointer
-                                * which means either llvm is reusing stack slot or
-                                * an attacker is trying to exploit CVE-2018-3639
-                                * (speculative store bypass)
-                                * Have to sanitize that slot with preemptive
-                                * store of zero.
-                                */
-                               if (*poff && *poff != soff) {
-                                       /* disallow programs where single insn stores
-                                        * into two different stack slots, since verifier
-                                        * cannot sanitize them
-                                        */
-                                       verbose(env,
-                                               "insn %d cannot access two stack slots fp%d and fp%d",
-                                               insn_idx, *poff, soff);
-                                       return -EINVAL;
-                               }
-                               *poff = soff;
-                       }
-               }
                 save_register_state(state, spi, reg);
         } else {
                 u8 type = STACK_MISC;
@@ -6804,6 +6780,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
                 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
                 alu_state |= ptr_is_dst_reg ?
                              BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+
+               /* Limit pruning on unknown scalars to enable deep search for
+                * potential masking differences from other program paths.
+                */
+               if (!off_is_imm)
+                       env->explore_alu_limits = true;
         }
  
         err = update_alu_sanitation_state(aux, alu_state, alu_limit);
@@ -10207,8 +10189,8 @@ next:
  }
  
  /* Returns true if (rold safe implies rcur safe) */
-static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
-                   struct bpf_id_pair *idmap)
+static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
+                   struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
  {
         bool equal;
  
@@ -10234,6 +10216,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
                 return false;
         switch (rold->type) {
         case SCALAR_VALUE:
+               if (env->explore_alu_limits)
+                       return false;
                 if (rcur->type == SCALAR_VALUE) {
                         if (!rold->precise && !rcur->precise)
                                 return true;
@@ -10324,9 +10308,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
         return false;
  }
  
-static bool stacksafe(struct bpf_func_state *old,
-                     struct bpf_func_state *cur,
-                     struct bpf_id_pair *idmap)
+static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
+                     struct bpf_func_state *cur, struct bpf_id_pair *idmap)
  {
         int i, spi;
  
@@ -10371,9 +10354,8 @@ static bool stacksafe(struct bpf_func_state *old,
                         continue;
                 if (old->stack[spi].slot_type[0] != STACK_SPILL)
                         continue;
-               if (!regsafe(&old->stack[spi].spilled_ptr,
-                            &cur->stack[spi].spilled_ptr,
-                            idmap))
+               if (!regsafe(env, &old->stack[spi].spilled_ptr,
+                            &cur->stack[spi].spilled_ptr, idmap))
                         /* when explored and current stack slot are both storing
                          * spilled registers, check that stored pointers types
                          * are the same as well.
@@ -10430,10 +10412,11 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
  
         memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
         for (i = 0; i < MAX_BPF_REG; i++)
-               if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch))
+               if (!regsafe(env, &old->regs[i], &cur->regs[i],
+                            env->idmap_scratch))
                         return false;
  
-       if (!stacksafe(old, cur, env->idmap_scratch))
+       if (!stacksafe(env, old, cur, env->idmap_scratch))
                 return false;
  
         if (!refsafe(old, cur))
@@ -12198,35 +12181,33 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
  
         for (i = 0; i < insn_cnt; i++, insn++) {
                 bpf_convert_ctx_access_t convert_ctx_access;
+               bool ctx_access;
  
                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
-                   insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
+                   insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
                         type = BPF_READ;
-               else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
-                        insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
-                        insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
-                        insn->code == (BPF_STX | BPF_MEM | BPF_DW))
+                       ctx_access = true;
+               } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+                          insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+                          insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+                          insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
+                          insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
+                          insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
+                          insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
+                          insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
                         type = BPF_WRITE;
-               else
+                       ctx_access = BPF_CLASS(insn->code) == BPF_STX;
+               } else {
                         continue;
+               }
  
                 if (type == BPF_WRITE &&
-                   env->insn_aux_data[i + delta].sanitize_stack_off) {
+                   env->insn_aux_data[i + delta].sanitize_stack_spill) {
                         struct bpf_insn patch[] = {
-                               /* Sanitize suspicious stack slot with zero.
-                                * There are no memory dependencies for this store,
-                                * since it's only using frame pointer and immediate
-                                * constant of zero
-                                */
-                               BPF_ST_MEM(BPF_DW, BPF_REG_FP,
-                                          env->insn_aux_data[i + delta].sanitize_stack_off,
-                                          0),
-                               /* the original STX instruction will immediately
-                                * overwrite the same stack slot with appropriate value
-                                */
                                 *insn,
+                               BPF_ST_NOSPEC(),
                         };
  
                         cnt = ARRAY_SIZE(patch);
@@ -12240,6 +12221,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
                         continue;
                 }
  
+               if (!ctx_access)
+                       continue;
+
                 switch (env->insn_aux_data[i + delta].ptr_type) {
                 case PTR_TO_CTX:
                         if (!ops->convert_ctx_access)
@@ -13093,37 +13077,6 @@ static void free_states(struct bpf_verifier_env *env)
         }
  }
  
-/* The verifier is using insn_aux_data[] to store temporary data during
- * verification and to store information for passes that run after the
- * verification like dead code sanitization. do_check_common() for subprogram N
- * may analyze many other subprograms. sanitize_insn_aux_data() clears all
- * temporary data after do_check_common() finds that subprogram N cannot be
- * verified independently. pass_cnt counts the number of times
- * do_check_common() was run and insn->aux->seen tells the pass number
- * insn_aux_data was touched. These variables are compared to clear temporary
- * data from failed pass. For testing and experiments do_check_common() can be
- * run multiple times even when prior attempt to verify is unsuccessful.
- *
- * Note that special handling is needed on !env->bypass_spec_v1 if this is
- * ever called outside of error path with subsequent program rejection.
- */
-static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
-{
-       struct bpf_insn *insn = env->prog->insnsi;
-       struct bpf_insn_aux_data *aux;
-       int i, class;
-
-       for (i = 0; i < env->prog->len; i++) {
-               class = BPF_CLASS(insn[i].code);
-               if (class != BPF_LDX && class != BPF_STX)
-                       continue;
-               aux = &env->insn_aux_data[i];
-               if (aux->seen != env->pass_cnt)
-                       continue;
-               memset(aux, 0, offsetof(typeof(*aux), orig_idx));
-       }
-}
-
  static int do_check_common(struct bpf_verifier_env *env, int subprog)
  {
         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
@@ -13200,9 +13153,6 @@ out:
         if (!ret && pop_log)
                 bpf_vlog_reset(&env->log, 0);
         free_states(env);
-       if (ret)
-               /* clean aux data in case subprog was rejected */
-               sanitize_insn_aux_data(env);
         return ret;
  }
  
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c

index 8d6bf56..de2c432 100644 (file)
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1221,9 +1221,7 @@ int cgroup1_get_tree(struct fs_context *fc)
                 ret = cgroup_do_get_tree(fc);
  
         if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
-               struct super_block *sb = fc->root->d_sb;
-               dput(fc->root);
-               deactivate_locked_super(sb);
+               fc_drop_locked(fc);
                 ret = 1;
         }
  
diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c

index 910ae69..af4a6ef 100644 (file)
--- a/kernel/dma/ops_helpers.c
+++ b/kernel/dma/ops_helpers.c
@@ -5,6 +5,13 @@
   */
  #include <linux/dma-map-ops.h>
  
+static struct page *dma_common_vaddr_to_page(void *cpu_addr)
+{
+       if (is_vmalloc_addr(cpu_addr))
+               return vmalloc_to_page(cpu_addr);
+       return virt_to_page(cpu_addr);
+}
+
  /*
   * Create scatter-list for the already allocated DMA buffer.
   */
@@ -12,7 +19,7 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
                  void *cpu_addr, dma_addr_t dma_addr, size_t size,
                  unsigned long attrs)
  {
-       struct page *page = virt_to_page(cpu_addr);
+       struct page *page = dma_common_vaddr_to_page(cpu_addr);
         int ret;
  
         ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
@@ -32,6 +39,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
         unsigned long user_count = vma_pages(vma);
         unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
         unsigned long off = vma->vm_pgoff;
+       struct page *page = dma_common_vaddr_to_page(cpu_addr);
         int ret = -ENXIO;
  
         vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
@@ -43,7 +51,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
                 return -ENXIO;
  
         return remap_pfn_range(vma, vma->vm_start,
-                       page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff,
+                       page_to_pfn(page) + vma->vm_pgoff,
                         user_count << PAGE_SHIFT, vma->vm_page_prot);
  #else
         return -ENXIO;
diff --git a/kernel/fork.c b/kernel/fork.c

index bc94b2c..e8b41e2 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2083,6 +2083,7 @@ static __latent_entropy struct task_struct *copy_process(
  #endif
  #ifdef CONFIG_BPF_SYSCALL
         RCU_INIT_POINTER(p->bpf_storage, NULL);
+       p->bpf_ctx = NULL;
  #endif
  
         /* Perform scheduler related setup. Assign this task to a CPU. */
diff --git a/kernel/smpboot.c b/kernel/smpboot.c

index e416304..cf6acab 100644 (file)
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -47,7 +47,7 @@ void __init idle_thread_set_boot_cpu(void)
   *
   * Creates the thread if it does not exist.
   */
-static inline void idle_init(unsigned int cpu)
+static __always_inline void idle_init(unsigned int cpu)
  {
         struct task_struct *tsk = per_cpu(idle_threads, cpu);
  
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c

index 29a5e54..517be7f 100644 (file)
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -991,6 +991,11 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
         if (!p)
                 goto out;
  
+       /* Protect timer list r/w in arm_timer() */
+       sighand = lock_task_sighand(p, &flags);
+       if (unlikely(sighand == NULL))
+               goto out;
+
         /*
          * Fetch the current sample and update the timer's expiry time.
          */
@@ -1001,11 +1006,6 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
  
         bump_cpu_timer(timer, now);
  
-       /* Protect timer list r/w in arm_timer() */
-       sighand = lock_task_sighand(p, &flags);
-       if (unlikely(sighand == NULL))
-               goto out;
-
         /*
          * Now re-arm for the new expiry time.
          */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c

index 3fadb58..9eb11c2 100644 (file)
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -207,6 +207,7 @@ struct timer_base {
         unsigned int            cpu;
         bool                    next_expiry_recalc;
         bool                    is_idle;
+       bool                    timers_pending;
         DECLARE_BITMAP(pending_map, WHEEL_SIZE);
         struct hlist_head       vectors[WHEEL_SIZE];
  } ____cacheline_aligned;
@@ -595,6 +596,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
                  * can reevaluate the wheel:
                  */
                 base->next_expiry = bucket_expiry;
+               base->timers_pending = true;
                 base->next_expiry_recalc = false;
                 trigger_dyntick_cpu(base, timer);
         }
@@ -1582,6 +1584,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base)
         }
  
         base->next_expiry_recalc = false;
+       base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
  
         return next;
  }
@@ -1633,7 +1636,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
         struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
         u64 expires = KTIME_MAX;
         unsigned long nextevt;
-       bool is_max_delta;
  
         /*
          * Pretend that there is no timer pending if the cpu is offline.
@@ -1646,7 +1648,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
         if (base->next_expiry_recalc)
                 base->next_expiry = __next_timer_interrupt(base);
         nextevt = base->next_expiry;
-       is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
  
         /*
          * We have a fresh next event. Check whether we can forward the
@@ -1664,7 +1665,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
                 expires = basem;
                 base->is_idle = false;
         } else {
-               if (!is_max_delta)
+               if (base->timers_pending)
                         expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
                 /*
                  * If we expect to sleep more than a tick, mark the base idle.
@@ -1947,6 +1948,7 @@ int timers_prepare_cpu(unsigned int cpu)
                 base = per_cpu_ptr(&timer_bases[b], cpu);
                 base->clk = jiffies;
                 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+               base->timers_pending = false;
                 base->is_idle = false;
         }
         return 0;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index 0890600..c5e0b6a 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -965,7 +965,7 @@ BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
  {
         struct kprobe *kp = kprobe_running();
  
-       return kp ? (u64) kp->addr : 0;
+       return kp ? (uintptr_t)kp->addr : 0;
  }
  
  static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
@@ -1461,6 +1461,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  const struct bpf_func_proto *
  tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  {
+       const struct bpf_func_proto *fn;
+
         switch (func_id) {
  #ifdef CONFIG_NET
         case BPF_FUNC_skb_output:
@@ -1501,7 +1503,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
         case BPF_FUNC_d_path:
                 return &bpf_d_path_proto;
         default:
-               return raw_tp_prog_func_proto(func_id, prog);
+               fn = raw_tp_prog_func_proto(func_id, prog);
+               if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
+                       fn = bpf_iter_get_func_proto(func_id, prog);
+               return fn;
         }
  }
  
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c

index e6fb3e6..7b180f6 100644 (file)
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5985,7 +5985,8 @@ ftrace_graph_release(struct inode *inode, struct file *file)
                  * infrastructure to do the synchronization, thus we must do it
                  * ourselves.
                  */
-               synchronize_rcu_tasks_rude();
+               if (old_hash != EMPTY_HASH)
+                       synchronize_rcu_tasks_rude();
  
                 free_ftrace_hash(old_hash);
         }
@@ -7544,7 +7545,7 @@ int ftrace_is_dead(void)
   */
  int register_ftrace_function(struct ftrace_ops *ops)
  {
-       int ret = -1;
+       int ret;
  
         ftrace_ops_init(ops);
  
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c

index d1463ea..e592d1d 100644 (file)
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3880,10 +3880,30 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
         if (unlikely(!head))
                 return true;
  
-       return reader->read == rb_page_commit(reader) &&
-               (commit == reader ||
-                (commit == head &&
-                 head->read == rb_page_commit(commit)));
+       /* Reader should exhaust content in reader page */
+       if (reader->read != rb_page_commit(reader))
+               return false;
+
+       /*
+        * If writers are committing on the reader page, knowing all
+        * committed content has been read, the ring buffer is empty.
+        */
+       if (commit == reader)
+               return true;
+
+       /*
+        * If writers are committing on a page other than reader page
+        * and head page, there should always be content to read.
+        */
+       if (commit != head)
+               return false;
+
+       /*
+        * Writers are committing on the head page, we just need
+        * to care about there're committed data, and the reader will
+        * swap reader page with head page when it is to read data.
+        */
+       return rb_page_commit(commit) == 0;
  }
  
  /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index f8b80b5..33899a7 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5609,6 +5609,10 @@ static const char readme_msg[] =
         "\t            [:name=histname1]\n"
         "\t            [:<handler>.<action>]\n"
         "\t            [if <filter>]\n\n"
+       "\t    Note, special fields can be used as well:\n"
+       "\t            common_timestamp - to record current timestamp\n"
+       "\t            common_cpu - to record the CPU the event happened on\n"
+       "\n"
         "\t    When a matching event is hit, an entry is added to a hash\n"
         "\t    table using the key(s) and value(s) named, and the value of a\n"
         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
@@ -9131,8 +9135,10 @@ static int trace_array_create_dir(struct trace_array *tr)
                 return -EINVAL;
  
         ret = event_trace_add_tracer(tr->dir, tr);
-       if (ret)
+       if (ret) {
                 tracefs_remove(tr->dir);
+               return ret;
+       }
  
         init_tracer_tracefs(tr, tr->dir);
         __update_tracer_options(tr);
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c

index 16a9dfc..949ef09 100644 (file)
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -65,7 +65,8 @@
         C(INVALID_SORT_MODIFIER,"Invalid sort modifier"),               \
         C(EMPTY_SORT_FIELD,     "Empty sort field"),                    \
         C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"),      \
-       C(INVALID_SORT_FIELD,   "Sort field must be a key or a val"),
+       C(INVALID_SORT_FIELD,   "Sort field must be a key or a val"),   \
+       C(INVALID_STR_OPERAND,  "String type can not be an operand in expression"),
  
  #undef C
  #define C(a, b)                HIST_ERR_##a
@@ -1111,7 +1112,7 @@ static const char *hist_field_name(struct hist_field *field,
                  field->flags & HIST_FIELD_FL_ALIAS)
                 field_name = hist_field_name(field->operands[0], ++level);
         else if (field->flags & HIST_FIELD_FL_CPU)
-               field_name = "cpu";
+               field_name = "common_cpu";
         else if (field->flags & HIST_FIELD_FL_EXPR ||
                  field->flags & HIST_FIELD_FL_VAR_REF) {
                 if (field->system) {
@@ -1991,14 +1992,24 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
                 hist_data->enable_timestamps = true;
                 if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
                         hist_data->attrs->ts_in_usecs = true;
-       } else if (strcmp(field_name, "cpu") == 0)
+       } else if (strcmp(field_name, "common_cpu") == 0)
                 *flags |= HIST_FIELD_FL_CPU;
         else {
                 field = trace_find_event_field(file->event_call, field_name);
                 if (!field || !field->size) {
-                       hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name));
-                       field = ERR_PTR(-EINVAL);
-                       goto out;
+                       /*
+                        * For backward compatibility, if field_name
+                        * was "cpu", then we treat this the same as
+                        * common_cpu.
+                        */
+                       if (strcmp(field_name, "cpu") == 0) {
+                               *flags |= HIST_FIELD_FL_CPU;
+                       } else {
+                               hist_err(tr, HIST_ERR_FIELD_NOT_FOUND,
+                                        errpos(field_name));
+                               field = ERR_PTR(-EINVAL);
+                               goto out;
+                       }
                 }
         }
   out:
@@ -2146,6 +2157,13 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
                 ret = PTR_ERR(operand1);
                 goto free;
         }
+       if (operand1->flags & HIST_FIELD_FL_STRING) {
+               /* String type can not be the operand of unary operator. */
+               hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+               destroy_hist_field(operand1, 0);
+               ret = -EINVAL;
+               goto free;
+       }
  
         expr->flags |= operand1->flags &
                 (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
@@ -2247,6 +2265,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
                 operand1 = NULL;
                 goto free;
         }
+       if (operand1->flags & HIST_FIELD_FL_STRING) {
+               hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str));
+               ret = -EINVAL;
+               goto free;
+       }
  
         /* rest of string could be another expression e.g. b+c in a+b+c */
         operand_flags = 0;
@@ -2256,6 +2279,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
                 operand2 = NULL;
                 goto free;
         }
+       if (operand2->flags & HIST_FIELD_FL_STRING) {
+               hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str));
+               ret = -EINVAL;
+               goto free;
+       }
  
         ret = check_expr_operands(file->tr, operand1, operand2);
         if (ret)
@@ -2277,6 +2305,10 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
  
         expr->operands[0] = operand1;
         expr->operands[1] = operand2;
+
+       /* The operand sizes should be the same, so just pick one */
+       expr->size = operand1->size;
+
         expr->operator = field_op;
         expr->name = expr_str(expr, 0);
         expr->type = kstrdup(operand1->type, GFP_KERNEL);
@@ -5085,7 +5117,7 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
                 seq_printf(m, "%s=", hist_field->var.name);
  
         if (hist_field->flags & HIST_FIELD_FL_CPU)
-               seq_puts(m, "cpu");
+               seq_puts(m, "common_cpu");
         else if (field_name) {
                 if (hist_field->flags & HIST_FIELD_FL_VAR_REF ||
                     hist_field->flags & HIST_FIELD_FL_ALIAS)
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c

index 2ac75eb..9315fc0 100644 (file)
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -893,15 +893,13 @@ static struct synth_event *alloc_synth_event(const char *name, int n_fields,
         dyn_event_init(&event->devent, &synth_event_ops);
  
         for (i = 0, j = 0; i < n_fields; i++) {
+               fields[i]->field_pos = i;
                 event->fields[i] = fields[i];
  
-               if (fields[i]->is_dynamic) {
-                       event->dynamic_fields[j] = fields[i];
-                       event->dynamic_fields[j]->field_pos = i;
+               if (fields[i]->is_dynamic)
                         event->dynamic_fields[j++] = fields[i];
-                       event->n_dynamic_fields++;
-               }
         }
+       event->n_dynamic_fields = j;
         event->n_fields = n_fields;
   out:
         return event;
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c

index a6c0cda..14f46aa 100644 (file)
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -327,7 +327,7 @@ static void move_to_next_cpu(void)
  
         get_online_cpus();
         cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
-       next_cpu = cpumask_next(smp_processor_id(), current_mask);
+       next_cpu = cpumask_next(raw_smp_processor_id(), current_mask);
         put_online_cpus();
  
         if (next_cpu >= nr_cpu_ids)
diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h

index 6e146b9..4007fe9 100644 (file)
--- a/kernel/trace/trace_synth.h
+++ b/kernel/trace/trace_synth.h
@@ -14,10 +14,10 @@ struct synth_field {
         char *name;
         size_t size;
         unsigned int offset;
+       unsigned int field_pos;
         bool is_signed;
         bool is_string;
         bool is_dynamic;
-       bool field_pos;
  };
  
  struct synth_event {
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c

index 976bf8c..fc32821 100644 (file)
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -299,8 +299,8 @@ static int tracepoint_add_func(struct tracepoint *tp,
          * a pointer to it.  This array is referenced by __DO_TRACE from
          * include/linux/tracepoint.h using rcu_dereference_sched().
          */
-       rcu_assign_pointer(tp->funcs, tp_funcs);
         tracepoint_update_call(tp, tp_funcs, false);
+       rcu_assign_pointer(tp->funcs, tp_funcs);
         static_key_enable(&tp->key);
  
         release_probes(old);
diff --git a/kernel/ucount.c b/kernel/ucount.c

index 87799e2..77be3bb 100644 (file)
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -160,6 +160,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
  {
         struct hlist_head *hashent = ucounts_hashentry(ns, uid);
         struct ucounts *ucounts, *new;
+       long overflow;
  
         spin_lock_irq(&ucounts_lock);
         ucounts = find_ucounts(ns, uid, hashent);
@@ -184,8 +185,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
                         return new;
                 }
         }
+       overflow = atomic_add_negative(1, &ucounts->count);
         spin_unlock_irq(&ucounts_lock);
-       ucounts = get_ucounts(ucounts);
+       if (overflow) {
+               put_ucounts(ucounts);
+               return NULL;
+       }
         return ucounts;
  }
  
@@ -193,8 +198,7 @@ void put_ucounts(struct ucounts *ucounts)
  {
         unsigned long flags;
  
-       if (atomic_dec_and_test(&ucounts->count)) {
-               spin_lock_irqsave(&ucounts_lock, flags);
+       if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
                 hlist_del_init(&ucounts->node);
                 spin_unlock_irqrestore(&ucounts_lock, flags);
                 kfree(ucounts);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c

index 50142fc..f148eac 100644 (file)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3676,15 +3676,21 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
                                                   unbound_release_work);
         struct workqueue_struct *wq = pwq->wq;
         struct worker_pool *pool = pwq->pool;
-       bool is_last;
+       bool is_last = false;
  
-       if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
-               return;
+       /*
+        * when @pwq is not linked, it doesn't hold any reference to the
+        * @wq, and @wq is invalid to access.
+        */
+       if (!list_empty(&pwq->pwqs_node)) {
+               if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
+                       return;
  
-       mutex_lock(&wq->mutex);
-       list_del_rcu(&pwq->pwqs_node);
-       is_last = list_empty(&wq->pwqs);
-       mutex_unlock(&wq->mutex);
+               mutex_lock(&wq->mutex);
+               list_del_rcu(&pwq->pwqs_node);
+               is_last = list_empty(&wq->pwqs);
+               mutex_unlock(&wq->mutex);
+       }
  
         mutex_lock(&wq_pool_mutex);
         put_unbound_pool(pool);
diff --git a/lib/Kconfig b/lib/Kconfig

index d241fe4..5c9c068 100644 (file)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -683,9 +683,6 @@ config PARMAN
  config OBJAGG
         tristate "objagg" if COMPILE_TEST
  
-config STRING_SELFTEST
-       tristate "Test string functions"
-
  endmenu
  
  config GENERIC_IOREMAP
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index 8312127..5ddd575 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2180,6 +2180,9 @@ config ASYNC_RAID6_TEST
  config TEST_HEXDUMP
         tristate "Test functions located in the hexdump module at runtime"
  
+config STRING_SELFTEST
+       tristate "Test string functions at runtime"
+
  config TEST_STRING_HELPERS
         tristate "Test functions located in the string_helpers module at runtime"
  
diff --git a/lib/test_bpf.c b/lib/test_bpf.c

index d500320..44d8197 100644 (file)
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -461,6 +461,41 @@ static int bpf_fill_stxdw(struct bpf_test *self)
         return __bpf_fill_stxdw(self, BPF_DW);
  }
  
+static int bpf_fill_long_jmp(struct bpf_test *self)
+{
+       unsigned int len = BPF_MAXINSNS;
+       struct bpf_insn *insn;
+       int i;
+
+       insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+       if (!insn)
+               return -ENOMEM;
+
+       insn[0] = BPF_ALU64_IMM(BPF_MOV, R0, 1);
+       insn[1] = BPF_JMP_IMM(BPF_JEQ, R0, 1, len - 2 - 1);
+
+       /*
+        * Fill with a complex 64-bit operation that expands to a lot of
+        * instructions on 32-bit JITs. The large jump offset can then
+        * overflow the conditional branch field size, triggering a branch
+        * conversion mechanism in some JITs.
+        *
+        * Note: BPF_MAXINSNS of ALU64 MUL is enough to trigger such branch
+        * conversion on the 32-bit MIPS JIT. For other JITs, the instruction
+        * count and/or operation may need to be modified to trigger the
+        * branch conversion.
+        */
+       for (i = 2; i < len - 1; i++)
+               insn[i] = BPF_ALU64_IMM(BPF_MUL, R0, (i << 16) + i);
+
+       insn[len - 1] = BPF_EXIT_INSN();
+
+       self->u.ptr.insns = insn;
+       self->u.ptr.len = len;
+
+       return 0;
+}
+
  static struct bpf_test tests[] = {
         {
                 "TAX",
@@ -1916,6 +1951,163 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, -1 } }
         },
+       {
+               /*
+                * Register (non-)clobbering test, in the case where a 32-bit
+                * JIT implements complex ALU64 operations via function calls.
+                * If so, the function call must be invisible in the eBPF
+                * registers. The JIT must then save and restore relevant
+                * registers during the call. The following tests check that
+                * the eBPF registers retain their values after such a call.
+                */
+               "INT: Register clobbering, R1 updated",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 123456789),
+                       BPF_ALU32_IMM(BPF_MOV, R2, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 3),
+                       BPF_ALU32_IMM(BPF_MOV, R4, 4),
+                       BPF_ALU32_IMM(BPF_MOV, R5, 5),
+                       BPF_ALU32_IMM(BPF_MOV, R6, 6),
+                       BPF_ALU32_IMM(BPF_MOV, R7, 7),
+                       BPF_ALU32_IMM(BPF_MOV, R8, 8),
+                       BPF_ALU32_IMM(BPF_MOV, R9, 9),
+                       BPF_ALU64_IMM(BPF_DIV, R1, 123456789),
+                       BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+                       BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+                       BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+                       BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+                       BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+                       BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+                       BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+                       BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+                       BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+                       BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "INT: Register clobbering, R2 updated",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 3),
+                       BPF_ALU32_IMM(BPF_MOV, R4, 4),
+                       BPF_ALU32_IMM(BPF_MOV, R5, 5),
+                       BPF_ALU32_IMM(BPF_MOV, R6, 6),
+                       BPF_ALU32_IMM(BPF_MOV, R7, 7),
+                       BPF_ALU32_IMM(BPF_MOV, R8, 8),
+                       BPF_ALU32_IMM(BPF_MOV, R9, 9),
+                       BPF_ALU64_IMM(BPF_DIV, R2, 123456789),
+                       BPF_JMP_IMM(BPF_JNE, R0, 0, 10),
+                       BPF_JMP_IMM(BPF_JNE, R1, 1, 9),
+                       BPF_JMP_IMM(BPF_JNE, R2, 2, 8),
+                       BPF_JMP_IMM(BPF_JNE, R3, 3, 7),
+                       BPF_JMP_IMM(BPF_JNE, R4, 4, 6),
+                       BPF_JMP_IMM(BPF_JNE, R5, 5, 5),
+                       BPF_JMP_IMM(BPF_JNE, R6, 6, 4),
+                       BPF_JMP_IMM(BPF_JNE, R7, 7, 3),
+                       BPF_JMP_IMM(BPF_JNE, R8, 8, 2),
+                       BPF_JMP_IMM(BPF_JNE, R9, 9, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               /*
+                * Test 32-bit JITs that implement complex ALU64 operations as
+                * function calls R0 = f(R1, R2), and must re-arrange operands.
+                */
+#define NUMER 0xfedcba9876543210ULL
+#define DENOM 0x0123456789abcdefULL
+               "ALU64_DIV X: Operand register permutations",
+               .u.insns_int = {
+                       /* R0 / R2 */
+                       BPF_LD_IMM64(R0, NUMER),
+                       BPF_LD_IMM64(R2, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R0, R2),
+                       BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R1 / R0 */
+                       BPF_LD_IMM64(R1, NUMER),
+                       BPF_LD_IMM64(R0, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R1, R0),
+                       BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R0 / R1 */
+                       BPF_LD_IMM64(R0, NUMER),
+                       BPF_LD_IMM64(R1, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R0, R1),
+                       BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R2 / R0 */
+                       BPF_LD_IMM64(R2, NUMER),
+                       BPF_LD_IMM64(R0, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R2, R0),
+                       BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R2 / R1 */
+                       BPF_LD_IMM64(R2, NUMER),
+                       BPF_LD_IMM64(R1, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R2, R1),
+                       BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R1 / R2 */
+                       BPF_LD_IMM64(R1, NUMER),
+                       BPF_LD_IMM64(R2, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R1, R2),
+                       BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* R1 / R1 */
+                       BPF_LD_IMM64(R1, NUMER),
+                       BPF_ALU64_REG(BPF_DIV, R1, R1),
+                       BPF_JMP_IMM(BPF_JEQ, R1, 1, 1),
+                       BPF_EXIT_INSN(),
+                       /* R2 / R2 */
+                       BPF_LD_IMM64(R2, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R2, R2),
+                       BPF_JMP_IMM(BPF_JEQ, R2, 1, 1),
+                       BPF_EXIT_INSN(),
+                       /* R3 / R4 */
+                       BPF_LD_IMM64(R3, NUMER),
+                       BPF_LD_IMM64(R4, DENOM),
+                       BPF_ALU64_REG(BPF_DIV, R3, R4),
+                       BPF_JMP_IMM(BPF_JEQ, R3, NUMER / DENOM, 1),
+                       BPF_EXIT_INSN(),
+                       /* Successful return */
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+#undef NUMER
+#undef DENOM
+       },
+#ifdef CONFIG_32BIT
+       {
+               "INT: 32-bit context pointer word order and zero-extension",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_JMP32_IMM(BPF_JEQ, R1, 0, 3),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_JMP32_IMM(BPF_JNE, R1, 0, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+#endif
         {
                 "check: missing ret",
                 .u.insns = {
@@ -2360,6 +2552,48 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0x1 } },
         },
+       {
+               "ALU_MOV_K: small negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       {
+               "ALU_MOV_K: small negative zero extension",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU_MOV_K: large negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123456789 } }
+       },
+       {
+               "ALU_MOV_K: large negative zero extension",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123456789),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
         {
                 "ALU64_MOV_K: dst = 2",
                 .u.insns_int = {
@@ -2412,6 +2646,48 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0x1 } },
         },
+       {
+               "ALU64_MOV_K: small negative",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       {
+               "ALU64_MOV_K: small negative sign extension",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } }
+       },
+       {
+               "ALU64_MOV_K: large negative",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123456789 } }
+       },
+       {
+               "ALU64_MOV_K: large negative sign extension",
+               .u.insns_int = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -123456789),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } }
+       },
         /* BPF_ALU | BPF_ADD | BPF_X */
         {
                 "ALU_ADD_X: 1 + 2 = 3",
@@ -2967,6 +3243,31 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 2147483647 } },
         },
+       {
+               "ALU64_MUL_X: 64x64 multiply, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+                       BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+                       BPF_ALU64_REG(BPF_MUL, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xe5618cf0 } }
+       },
+       {
+               "ALU64_MUL_X: 64x64 multiply, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0fedcba987654321LL),
+                       BPF_LD_IMM64(R1, 0x123456789abcdef0LL),
+                       BPF_ALU64_REG(BPF_MUL, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x2236d88f } }
+       },
         /* BPF_ALU | BPF_MUL | BPF_K */
         {
                 "ALU_MUL_K: 2 * 3 = 6",
@@ -3077,6 +3378,29 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0x1 } },
         },
+       {
+               "ALU64_MUL_K: 64x32 multiply, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xe242d208 } }
+       },
+       {
+               "ALU64_MUL_K: 64x32 multiply, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xc28f5c28 } }
+       },
         /* BPF_ALU | BPF_DIV | BPF_X */
         {
                 "ALU_DIV_X: 6 / 2 = 3",
@@ -3430,6 +3754,44 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0xffffffff } },
         },
+       {
+               "ALU_AND_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_AND, R0, 15),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 4 } }
+       },
+       {
+               "ALU_AND_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+                       BPF_ALU32_IMM(BPF_AND, R0, 0xafbfcfdf),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xa1b2c3d4 } }
+       },
+       {
+               "ALU_AND_K: Zero extension",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x0000000080a0c0e0LL),
+                       BPF_ALU32_IMM(BPF_AND, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
         {
                 "ALU64_AND_K: 3 & 2 = 2",
                 .u.insns_int = {
@@ -3453,7 +3815,7 @@ static struct bpf_test tests[] = {
                 { { 0, 0xffffffff } },
         },
         {
-               "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000",
+               "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000000000000000",
                 .u.insns_int = {
                         BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
                         BPF_LD_IMM64(R3, 0x0000000000000000LL),
@@ -3469,7 +3831,7 @@ static struct bpf_test tests[] = {
                 { { 0, 0x1 } },
         },
         {
-               "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff",
+               "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffff0000",
                 .u.insns_int = {
                         BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
                         BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
@@ -3500,6 +3862,38 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0x1 } },
         },
+       {
+               "ALU64_AND_K: Sign extension 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x00000000090b0d0fLL),
+                       BPF_ALU64_IMM(BPF_AND, R0, 0x0f0f0f0f),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "ALU64_AND_K: Sign extension 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x0123456780a0c0e0LL),
+                       BPF_ALU64_IMM(BPF_AND, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
         /* BPF_ALU | BPF_OR | BPF_X */
         {
                 "ALU_OR_X: 1 | 2 = 3",
@@ -3572,6 +3966,44 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0xffffffff } },
         },
+       {
+               "ALU_OR_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_OR, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01020305 } }
+       },
+       {
+               "ALU_OR_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_OR, R0, 0xa0b0c0d0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xa1b2c3d4 } }
+       },
+       {
+               "ALU_OR_K: Zero extension",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x00000000f9fbfdffLL),
+                       BPF_ALU32_IMM(BPF_OR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
         {
                 "ALU64_OR_K: 1 | 2 = 3",
                 .u.insns_int = {
@@ -3595,7 +4027,7 @@ static struct bpf_test tests[] = {
                 { { 0, 0xffffffff } },
         },
         {
-               "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000",
+               "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffffffff0000",
                 .u.insns_int = {
                         BPF_LD_IMM64(R2, 0x0000ffffffff0000LL),
                         BPF_LD_IMM64(R3, 0x0000ffffffff0000LL),
@@ -3642,9 +4074,41 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0x1 } },
         },
-       /* BPF_ALU | BPF_XOR | BPF_X */
         {
-               "ALU_XOR_X: 5 ^ 6 = 3",
+               "ALU64_OR_K: Sign extension 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x012345678fafcfefLL),
+                       BPF_ALU64_IMM(BPF_OR, R0, 0x0f0f0f0f),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "ALU64_OR_K: Sign extension 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0xfffffffff9fbfdffLL),
+                       BPF_ALU64_IMM(BPF_OR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       /* BPF_ALU | BPF_XOR | BPF_X */
+       {
+               "ALU_XOR_X: 5 ^ 6 = 3",
                 .u.insns_int = {
                         BPF_LD_IMM64(R0, 5),
                         BPF_ALU32_IMM(BPF_MOV, R1, 6),
@@ -3714,6 +4178,44 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0xfffffffe } },
         },
+       {
+               "ALU_XOR_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304),
+                       BPF_ALU32_IMM(BPF_XOR, R0, 15),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x0102030b } }
+       },
+       {
+               "ALU_XOR_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4),
+                       BPF_ALU32_IMM(BPF_XOR, R0, 0xafbfcfdf),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x5e4d3c2b } }
+       },
+       {
+               "ALU_XOR_K: Zero extension",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x00000000795b3d1fLL),
+                       BPF_ALU32_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
         {
                 "ALU64_XOR_K: 5 ^ 6 = 3",
                 .u.insns_int = {
@@ -3726,7 +4228,7 @@ static struct bpf_test tests[] = {
                 { { 0, 3 } },
         },
         {
-               "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe",
+               "ALU64_XOR_K: 1 ^ 0xffffffff = 0xfffffffe",
                 .u.insns_int = {
                         BPF_LD_IMM64(R0, 1),
                         BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff),
@@ -3784,6 +4286,38 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0x1 } },
         },
+       {
+               "ALU64_XOR_K: Sign extension 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0x0123456786a4c2e0LL),
+                       BPF_ALU64_IMM(BPF_XOR, R0, 0x0f0f0f0f),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "ALU64_XOR_K: Sign extension 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_LD_IMM64(R1, 0xfedcba98795b3d1fLL),
+                       BPF_ALU64_IMM(BPF_XOR, R0, 0xf0f0f0f0),
+                       BPF_JMP_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
         /* BPF_ALU | BPF_LSH | BPF_X */
         {
                 "ALU_LSH_X: 1 << 1 = 2",
@@ -3809,6 +4343,18 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0x80000000 } },
         },
+       {
+               "ALU_LSH_X: 0x12345678 << 12 = 0x45678000",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU32_REG(BPF_LSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x45678000 } }
+       },
         {
                 "ALU64_LSH_X: 1 << 1 = 2",
                 .u.insns_int = {
@@ -3833,570 +4379,1993 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 0x80000000 } },
         },
-       /* BPF_ALU | BPF_LSH | BPF_K */
         {
-               "ALU_LSH_K: 1 << 1 = 2",
+               "ALU64_LSH_X: Shift < 32, low word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU32_IMM(BPF_LSH, R0, 1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 2 } },
+               { { 0, 0xbcdef000 } }
         },
         {
-               "ALU_LSH_K: 1 << 31 = 0x80000000",
+               "ALU64_LSH_X: Shift < 32, high word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU32_IMM(BPF_LSH, R0, 31),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x80000000 } },
+               { { 0, 0x3456789a } }
         },
         {
-               "ALU64_LSH_K: 1 << 1 = 2",
+               "ALU64_LSH_X: Shift > 32, low word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU64_IMM(BPF_LSH, R0, 1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 2 } },
+               { { 0, 0 } }
         },
         {
-               "ALU64_LSH_K: 1 << 31 = 0x80000000",
+               "ALU64_LSH_X: Shift > 32, high word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 1),
-                       BPF_ALU64_IMM(BPF_LSH, R0, 31),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x80000000 } },
+               { { 0, 0x9abcdef0 } }
         },
-       /* BPF_ALU | BPF_RSH | BPF_X */
         {
-               "ALU_RSH_X: 2 >> 1 = 1",
+               "ALU64_LSH_X: Shift == 32, low word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
-                       BPF_ALU32_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 1 } },
+               { { 0, 0 } }
         },
         {
-               "ALU_RSH_X: 0x80000000 >> 31 = 1",
+               "ALU64_LSH_X: Shift == 32, high word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
-                       BPF_ALU32_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 1 } },
+               { { 0, 0x89abcdef } }
         },
         {
-               "ALU64_RSH_X: 2 >> 1 = 1",
+               "ALU64_LSH_X: Zero shift, low word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
-                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 1 } },
+               { { 0, 0x89abcdef } }
         },
         {
-               "ALU64_RSH_X: 0x80000000 >> 31 = 1",
+               "ALU64_LSH_X: Zero shift, high word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
-                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_LSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 1 } },
+               { { 0, 0x01234567 } }
         },
-       /* BPF_ALU | BPF_RSH | BPF_K */
+       /* BPF_ALU | BPF_LSH | BPF_K */
         {
-               "ALU_RSH_K: 2 >> 1 = 1",
+               "ALU_LSH_K: 1 << 1 = 2",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU32_IMM(BPF_RSH, R0, 1),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 1),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 1 } },
+               { { 0, 2 } },
         },
         {
-               "ALU_RSH_K: 0x80000000 >> 31 = 1",
+               "ALU_LSH_K: 1 << 31 = 0x80000000",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU32_IMM(BPF_RSH, R0, 31),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 31),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 1 } },
+               { { 0, 0x80000000 } },
         },
         {
-               "ALU64_RSH_K: 2 >> 1 = 1",
+               "ALU_LSH_K: 0x12345678 << 12 = 0x45678000",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 2),
-                       BPF_ALU64_IMM(BPF_RSH, R0, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 12),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 1 } },
+               { { 0, 0x45678000 } }
         },
         {
-               "ALU64_RSH_K: 0x80000000 >> 31 = 1",
+               "ALU_LSH_K: 0x12345678 << 0 = 0x12345678",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x80000000),
-                       BPF_ALU64_IMM(BPF_RSH, R0, 31),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_LSH, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 1 } },
+               { { 0, 0x12345678 } }
         },
-       /* BPF_ALU | BPF_ARSH | BPF_X */
         {
-               "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               "ALU64_LSH_K: 1 << 1 = 2",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
-                       BPF_ALU32_IMM(BPF_MOV, R1, 40),
-                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 1),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xffff00ff } },
+               { { 0, 2 } },
         },
-       /* BPF_ALU | BPF_ARSH | BPF_K */
         {
-               "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               "ALU64_LSH_K: 1 << 31 = 0x80000000",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
-                       BPF_ALU64_IMM(BPF_ARSH, R0, 40),
+                       BPF_LD_IMM64(R0, 1),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 31),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xffff00ff } },
+               { { 0, 0x80000000 } },
         },
-       /* BPF_ALU | BPF_NEG */
         {
-               "ALU_NEG: -(3) = -3",
+               "ALU64_LSH_K: Shift < 32, low word",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 3),
-                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 12),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, -3 } },
+               { { 0, 0xbcdef000 } }
         },
         {
-               "ALU_NEG: -(-3) = 3",
+               "ALU64_LSH_K: Shift < 32, high word",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, -3),
-                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 12),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 3 } },
+               { { 0, 0x3456789a } }
         },
         {
-               "ALU64_NEG: -(3) = -3",
+               "ALU64_LSH_K: Shift > 32, low word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 3),
-                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 36),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, -3 } },
+               { { 0, 0 } }
         },
         {
-               "ALU64_NEG: -(-3) = 3",
+               "ALU64_LSH_K: Shift > 32, high word",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, -3),
-                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 36),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 3 } },
+               { { 0, 0x9abcdef0 } }
         },
-       /* BPF_ALU | BPF_END | BPF_FROM_BE */
         {
-               "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef",
+               "ALU64_LSH_K: Shift == 32, low word",
                 .u.insns_int = {
                         BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_BE, R0, 16),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 32),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0,  cpu_to_be16(0xcdef) } },
+               { { 0, 0 } }
         },
         {
-               "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef",
+               "ALU64_LSH_K: Shift == 32, high word",
                 .u.insns_int = {
                         BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_BE, R0, 32),
-                       BPF_ALU64_REG(BPF_MOV, R1, R0),
-                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
-                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_ALU64_IMM(BPF_LSH, R0, 32),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, cpu_to_be32(0x89abcdef) } },
+               { { 0, 0x89abcdef } }
         },
         {
-               "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef",
+               "ALU64_LSH_K: Zero shift",
                 .u.insns_int = {
                         BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_BE, R0, 64),
+                       BPF_ALU64_IMM(BPF_LSH, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } },
+               { { 0, 0x89abcdef } }
         },
-       /* BPF_ALU | BPF_END | BPF_FROM_LE */
+       /* BPF_ALU | BPF_RSH | BPF_X */
         {
-               "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd",
+               "ALU_RSH_X: 2 >> 1 = 1",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_LE, R0, 16),
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
+                       BPF_ALU32_REG(BPF_RSH, R0, R1),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, cpu_to_le16(0xcdef) } },
+               { { 0, 1 } },
         },
         {
-               "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89",
+               "ALU_RSH_X: 0x80000000 >> 31 = 1",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_LE, R0, 32),
-                       BPF_ALU64_REG(BPF_MOV, R1, R0),
-                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
-                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
+                       BPF_ALU32_REG(BPF_RSH, R0, R1),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, cpu_to_le32(0x89abcdef) } },
+               { { 0, 1 } },
         },
         {
-               "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301",
+               "ALU_RSH_X: 0x12345678 >> 20 = 0x123",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
-                       BPF_ENDIAN(BPF_FROM_LE, R0, 64),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 20),
+                       BPF_ALU32_REG(BPF_RSH, R0, R1),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } },
+               { { 0, 0x123 } }
         },
-       /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */
         {
-               "ST_MEM_B: Store/Load byte: max negative",
+               "ALU64_RSH_X: 2 >> 1 = 1",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_B, R10, -40, 0xff),
-                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_X: 0x80000000 >> 31 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 31),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_X: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_RSH_X: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x00081234 } }
+       },
+       {
+               "ALU64_RSH_X: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x08123456 } }
+       },
+       {
+               "ALU64_RSH_X: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_X: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_RSH_X: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_X: Zero shift, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       {
+               "ALU64_RSH_X: Zero shift, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_RSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       /* BPF_ALU | BPF_RSH | BPF_K */
+       {
+               "ALU_RSH_K: 2 >> 1 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU_RSH_K: 0x80000000 >> 31 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 31),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU_RSH_K: 0x12345678 >> 20 = 0x123",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 20),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x123 } }
+       },
+       {
+               "ALU_RSH_K: 0x12345678 >> 0 = 0x12345678",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678),
+                       BPF_ALU32_IMM(BPF_RSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x12345678 } }
+       },
+       {
+               "ALU64_RSH_K: 2 >> 1 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 2),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_K: 0x80000000 >> 31 = 1",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x80000000),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 31),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } },
+       },
+       {
+               "ALU64_RSH_K: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 12),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_RSH_K: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 12),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x00081234 } }
+       },
+       {
+               "ALU64_RSH_K: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 36),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x08123456 } }
+       },
+       {
+               "ALU64_RSH_K: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 36),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_K: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_RSH_K: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } }
+       },
+       {
+               "ALU64_RSH_K: Zero shift",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       /* BPF_ALU | BPF_ARSH | BPF_X */
+       {
+               "ALU32_ARSH_X: -1234 >> 7 = -10",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 7),
+                       BPF_ALU32_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -10 } }
+       },
+       {
+               "ALU64_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 40),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff00ff } },
+       },
+       {
+               "ALU64_ARSH_X: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_ARSH_X: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 12),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfff81234 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xf8123456 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 36),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_ARSH_X: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 32),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_X: Zero shift, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       {
+               "ALU64_ARSH_X: Zero shift, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0),
+                       BPF_ALU64_REG(BPF_ARSH, R0, R1),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       /* BPF_ALU | BPF_ARSH | BPF_K */
+       {
+               "ALU32_ARSH_K: -1234 >> 7 = -10",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+                       BPF_ALU32_IMM(BPF_ARSH, R0, 7),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -10 } }
+       },
+       {
+               "ALU32_ARSH_K: -1234 >> 0 = -1234",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -1234),
+                       BPF_ALU32_IMM(BPF_ARSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1234 } }
+       },
+       {
+               "ALU64_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0xff00ff0000000000LL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff00ff } },
+       },
+       {
+               "ALU64_ARSH_K: Shift < 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 12),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x56789abc } }
+       },
+       {
+               "ALU64_ARSH_K: Shift < 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 12),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfff81234 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift > 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xf8123456 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift > 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0xf123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 36),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift == 32, low word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x81234567 } }
+       },
+       {
+               "ALU64_ARSH_K: Shift == 32, high word",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 32),
+                       BPF_ALU64_IMM(BPF_RSH, R0, 32),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -1 } }
+       },
+       {
+               "ALU64_ARSH_K: Zero shoft",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x8123456789abcdefLL),
+                       BPF_ALU64_IMM(BPF_ARSH, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } }
+       },
+       /* BPF_ALU | BPF_NEG */
+       {
+               "ALU_NEG: -(3) = -3",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 3),
+                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -3 } },
+       },
+       {
+               "ALU_NEG: -(-3) = 3",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -3),
+                       BPF_ALU32_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 3 } },
+       },
+       {
+               "ALU64_NEG: -(3) = -3",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 3),
+                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -3 } },
+       },
+       {
+               "ALU64_NEG: -(-3) = 3",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, -3),
+                       BPF_ALU64_IMM(BPF_NEG, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 3 } },
+       },
+       /* BPF_ALU | BPF_END | BPF_FROM_BE */
+       {
+               "ALU_END_FROM_BE 16: 0x0123456789abcdef -> 0xcdef",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_BE, R0, 16),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0,  cpu_to_be16(0xcdef) } },
+       },
+       {
+               "ALU_END_FROM_BE 32: 0x0123456789abcdef -> 0x89abcdef",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_BE, R0, 32),
+                       BPF_ALU64_REG(BPF_MOV, R1, R0),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, cpu_to_be32(0x89abcdef) } },
+       },
+       {
+               "ALU_END_FROM_BE 64: 0x0123456789abcdef -> 0x89abcdef",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_BE, R0, 64),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, (u32) cpu_to_be64(0x0123456789abcdefLL) } },
+       },
+       /* BPF_ALU | BPF_END | BPF_FROM_LE */
+       {
+               "ALU_END_FROM_LE 16: 0x0123456789abcdef -> 0xefcd",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_LE, R0, 16),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, cpu_to_le16(0xcdef) } },
+       },
+       {
+               "ALU_END_FROM_LE 32: 0x0123456789abcdef -> 0xefcdab89",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_LE, R0, 32),
+                       BPF_ALU64_REG(BPF_MOV, R1, R0),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, cpu_to_le32(0x89abcdef) } },
+       },
+       {
+               "ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
+                       BPF_ENDIAN(BPF_FROM_LE, R0, 64),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, (u32) cpu_to_le64(0x0123456789abcdefLL) } },
+       },
+       /* BPF_ST(X) | BPF_MEM | BPF_B/H/W/DW */
+       {
+               "ST_MEM_B: Store/Load byte: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_B, R10, -40, 0xff),
+                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_B: Store/Load byte: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_H, R10, -40, 0x7f),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7f } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_B: Store/Load byte: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffLL),
+                       BPF_STX_MEM(BPF_B, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_H: Store/Load half word: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_H, R10, -40, 0xffff),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_H: Store/Load half word: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_H, R10, -40, 0x7fff),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7fff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_H: Store/Load half word: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffffLL),
+                       BPF_STX_MEM(BPF_H, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_W: Store/Load word: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_W: Store/Load word: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7fffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_W: Store/Load word: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffffffffLL),
+                       BPF_STX_MEM(BPF_W, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_DW: Store/Load double word: max negative",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_DW: Store/Load double word: max negative 2",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R2, 0xffff00000000ffffLL),
+                       BPF_LD_IMM64(R3, 0xffffffffffffffffLL),
+                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
+                       BPF_LDX_MEM(BPF_DW, R2, R10, -40),
+                       BPF_JMP_REG(BPF_JEQ, R2, R3, 2),
+                       BPF_MOV32_IMM(R0, 2),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV32_IMM(R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x1 } },
+               .stack_depth = 40,
+       },
+       {
+               "ST_MEM_DW: Store/Load double word: max positive",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x7fffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_DW: Store/Load double word: max negative",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xffffffff } },
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_DW: Store double word: first word in memory",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+#ifdef __BIG_ENDIAN
+               { { 0, 0x01234567 } },
+#else
+               { { 0, 0x89abcdef } },
+#endif
+               .stack_depth = 40,
+       },
+       {
+               "STX_MEM_DW: Store double word: second word in memory",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R0, 0),
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefLL),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -36),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+#ifdef __BIG_ENDIAN
+               { { 0, 0x89abcdef } },
+#else
+               { { 0, 0x01234567 } },
+#endif
+               .stack_depth = 40,
+       },
+       /* BPF_STX | BPF_ATOMIC | BPF_W/DW */
+       {
+               "STX_XADD_W: X + 1 + 1 + 1 + ...",
+               { },
+               INTERNAL,
+               { },
+               { { 0, 4134 } },
+               .fill_helper = bpf_fill_stxw,
+       },
+       {
+               "STX_XADD_DW: X + 1 + 1 + 1 + ...",
+               { },
+               INTERNAL,
+               { },
+               { { 0, 4134 } },
+               .fill_helper = bpf_fill_stxdw,
+       },
+       /*
+        * Exhaustive tests of atomic operation variants.
+        * Individual tests are expanded from template macros for all
+        * combinations of ALU operation, word size and fetching.
+        */
+#define BPF_ATOMIC_OP_TEST1(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test: "                      \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU32_IMM(BPF_MOV, R5, update),                     \
+               BPF_ST_MEM(width, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R5, -40),                 \
+               BPF_LDX_MEM(width, R0, R10, -40),                       \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, result } },                                              \
+       .stack_depth = 40,                                              \
+}
+#define BPF_ATOMIC_OP_TEST2(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test side effects, r10: "    \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU64_REG(BPF_MOV, R1, R10),                        \
+               BPF_ALU32_IMM(BPF_MOV, R0, update),                     \
+               BPF_ST_MEM(BPF_W, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R0, -40),                 \
+               BPF_ALU64_REG(BPF_MOV, R0, R10),                        \
+               BPF_ALU64_REG(BPF_SUB, R0, R1),                         \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, 0 } },                                                   \
+       .stack_depth = 40,                                              \
+}
+#define BPF_ATOMIC_OP_TEST3(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test side effects, r0: "     \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU64_REG(BPF_MOV, R0, R10),                        \
+               BPF_ALU32_IMM(BPF_MOV, R1, update),                     \
+               BPF_ST_MEM(width, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R1, -40),                 \
+               BPF_ALU64_REG(BPF_SUB, R0, R10),                        \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, 0 } },                                                   \
+       .stack_depth = 40,                                              \
+}
+#define BPF_ATOMIC_OP_TEST4(width, op, logic, old, update, result)     \
+{                                                                      \
+       "BPF_ATOMIC | " #width ", " #op ": Test fetch: "                \
+               #old " " #logic " " #update " = " #result,              \
+       .u.insns_int = {                                                \
+               BPF_ALU32_IMM(BPF_MOV, R3, update),                     \
+               BPF_ST_MEM(width, R10, -40, old),                       \
+               BPF_ATOMIC_OP(width, op, R10, R3, -40),                 \
+               BPF_ALU64_REG(BPF_MOV, R0, R3),                         \
+               BPF_EXIT_INSN(),                                        \
+       },                                                              \
+       INTERNAL,                                                       \
+       { },                                                            \
+       { { 0, (op) & BPF_FETCH ? old : update } },                     \
+       .stack_depth = 40,                                              \
+}
+       /* BPF_ATOMIC | BPF_W: BPF_ADD */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_W: BPF_ADD | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_DW: BPF_ADD */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_DW: BPF_ADD | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd),
+       /* BPF_ATOMIC | BPF_W: BPF_AND */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_W: BPF_AND | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_DW: BPF_AND */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_DW: BPF_AND | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02),
+       /* BPF_ATOMIC | BPF_W: BPF_OR */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_W: BPF_OR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_DW: BPF_OR */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_DW: BPF_OR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb),
+       /* BPF_ATOMIC | BPF_W: BPF_XOR */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_W: BPF_XOR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_DW: BPF_XOR */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_DW: BPF_XOR | BPF_FETCH */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9),
+       /* BPF_ATOMIC | BPF_W: BPF_XCHG */
+       BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       /* BPF_ATOMIC | BPF_DW: BPF_XCHG */
+       BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+       BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab),
+#undef BPF_ATOMIC_OP_TEST1
+#undef BPF_ATOMIC_OP_TEST2
+#undef BPF_ATOMIC_OP_TEST3
+#undef BPF_ATOMIC_OP_TEST4
+       /* BPF_ATOMIC | BPF_W, BPF_CMPXCHG */
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful return",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01234567 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful store",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure return",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01234567 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure store",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x01234567 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test side effects",
+               .u.insns_int = {
+                       BPF_ST_MEM(BPF_W, R10, -40, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567),
+                       BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40),
+                       BPF_ALU32_REG(BPF_MOV, R0, R3),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x89abcdef } },
+               .stack_depth = 40,
+       },
+       /* BPF_ATOMIC | BPF_DW, BPF_CMPXCHG */
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful return",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful store",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_STX_MEM(BPF_DW, R10, R0, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R2),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure return",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_ALU64_IMM(BPF_ADD, R0, 1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure store",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_ALU64_IMM(BPF_ADD, R0, 1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_JMP_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       {
+               "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test side effects",
+               .u.insns_int = {
+                       BPF_LD_IMM64(R1, 0x0123456789abcdefULL),
+                       BPF_LD_IMM64(R2, 0xfecdba9876543210ULL),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_STX_MEM(BPF_DW, R10, R1, -40),
+                       BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40),
+                       BPF_LD_IMM64(R0, 0xfecdba9876543210ULL),
+                       BPF_JMP_REG(BPF_JNE, R0, R2, 1),
+                       BPF_ALU64_REG(BPF_SUB, R0, R2),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0 } },
+               .stack_depth = 40,
+       },
+       /* BPF_JMP32 | BPF_JEQ | BPF_K */
+       {
+               "JMP32_JEQ_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 321, 1),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 123 } }
+       },
+       {
+               "JMP32_JEQ_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 12345678 & 0xffff, 1),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, 12345678, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 12345678 } }
+       },
+       {
+               "JMP32_JEQ_K: negative immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JEQ, R0,  123, 1),
+                       BPF_JMP32_IMM(BPF_JEQ, R0, -123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       /* BPF_JMP32 | BPF_JEQ | BPF_X */
+       {
+               "JMP32_JEQ_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+                       BPF_JMP32_REG(BPF_JEQ, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+                       BPF_JMP32_REG(BPF_JEQ, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1234 } }
+       },
+       /* BPF_JMP32 | BPF_JNE | BPF_K */
+       {
+               "JMP32_JNE_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 123, 1),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 321, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 123 } }
+       },
+       {
+               "JMP32_JNE_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 12345678),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 12345678, 1),
+                       BPF_JMP32_IMM(BPF_JNE, R0, 12345678 & 0xffff, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 12345678 } }
+       },
+       {
+               "JMP32_JNE_K: negative immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JNE, R0, -123, 1),
+                       BPF_JMP32_IMM(BPF_JNE, R0,  123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       /* BPF_JMP32 | BPF_JNE | BPF_X */
+       {
+               "JMP32_JNE_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1234),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 1234),
+                       BPF_JMP32_REG(BPF_JNE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 4321),
+                       BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1234 } }
+       },
+       /* BPF_JMP32 | BPF_JSET | BPF_K */
+       {
+               "JMP32_JSET_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 2, 1),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 3, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 1 } }
+       },
+       {
+               "JMP32_JSET_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0x40000000),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 0x3fffffff, 1),
+                       BPF_JMP32_IMM(BPF_JSET, R0, 0x60000000, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0x40000000 } }
+       },
+       {
+               "JMP32_JSET_K: negative immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSET, R0, -1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, -123 } }
+       },
+       /* BPF_JMP32 | BPF_JSET | BPF_X */
+       {
+               "JMP32_JSET_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 8),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 7),
+                       BPF_JMP32_REG(BPF_JSET, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 8 | 2),
+                       BPF_JMP32_REG(BPF_JNE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 8 } }
+       },
+       /* BPF_JMP32 | BPF_JGT | BPF_K */
+       {
+               "JMP32_JGT_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 123, 1),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 122, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 123 } }
+       },
+       {
+               "JMP32_JGT_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 0xffffffff, 1),
+                       BPF_JMP32_IMM(BPF_JGT, R0, 0xfffffffd, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfffffffe } }
+       },
+       /* BPF_JMP32 | BPF_JGT | BPF_X */
+       {
+               "JMP32_JGT_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+                       BPF_JMP32_REG(BPF_JGT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+                       BPF_JMP32_REG(BPF_JGT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               INTERNAL,
+               { },
+               { { 0, 0xfffffffe } }
+       },
+       /* BPF_JMP32 | BPF_JGE | BPF_K */
+       {
+               "JMP32_JGE_K: Small immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 124, 1),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xff } },
-               .stack_depth = 40,
+               { { 0, 123 } }
         },
         {
-               "ST_MEM_B: Store/Load byte: max positive",
+               "JMP32_JGE_K: Large immediate",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_H, R10, -40, 0x7f),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 0xffffffff, 1),
+                       BPF_JMP32_IMM(BPF_JGE, R0, 0xfffffffe, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x7f } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
         },
+       /* BPF_JMP32 | BPF_JGE | BPF_X */
         {
-               "STX_MEM_B: Store/Load byte: max negative",
+               "JMP32_JGE_X",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffLL),
-                       BPF_STX_MEM(BPF_B, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_B, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+                       BPF_JMP32_REG(BPF_JGE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+                       BPF_JMP32_REG(BPF_JGE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
         },
+       /* BPF_JMP32 | BPF_JLT | BPF_K */
         {
-               "ST_MEM_H: Store/Load half word: max negative",
+               "JMP32_JLT_K: Small immediate",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_H, R10, -40, 0xffff),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 123, 1),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 124, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xffff } },
-               .stack_depth = 40,
+               { { 0, 123 } }
         },
         {
-               "ST_MEM_H: Store/Load half word: max positive",
+               "JMP32_JLT_K: Large immediate",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_H, R10, -40, 0x7fff),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 0xfffffffd, 1),
+                       BPF_JMP32_IMM(BPF_JLT, R0, 0xffffffff, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x7fff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
         },
+       /* BPF_JMP32 | BPF_JLT | BPF_X */
         {
-               "STX_MEM_H: Store/Load half word: max negative",
+               "JMP32_JLT_X",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffffLL),
-                       BPF_STX_MEM(BPF_H, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_H, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+                       BPF_JMP32_REG(BPF_JLT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff),
+                       BPF_JMP32_REG(BPF_JLT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xffff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
         },
+       /* BPF_JMP32 | BPF_JLE | BPF_K */
         {
-               "ST_MEM_W: Store/Load word: max negative",
+               "JMP32_JLE_K: Small immediate",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0xffffffff),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 123),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 122, 1),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, 123 } }
         },
         {
-               "ST_MEM_W: Store/Load word: max positive",
+               "JMP32_JLE_K: Large immediate",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x7fffffff),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffd, 1),
+                       BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffe, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x7fffffff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
         },
+       /* BPF_JMP32 | BPF_JLE | BPF_X */
         {
-               "STX_MEM_W: Store/Load word: max negative",
+               "JMP32_JLE_X",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffffffffLL),
-                       BPF_STX_MEM(BPF_W, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd),
+                       BPF_JMP32_REG(BPF_JLE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe),
+                       BPF_JMP32_REG(BPF_JLE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, 0xfffffffe } }
         },
+       /* BPF_JMP32 | BPF_JSGT | BPF_K */
         {
-               "ST_MEM_DW: Store/Load double word: max negative",
+               "JMP32_JSGT_K: Small immediate",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
-                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -123, 1),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -124, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, -123 } }
         },
         {
-               "ST_MEM_DW: Store/Load double word: max negative 2",
+               "JMP32_JSGT_K: Large immediate",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R2, 0xffff00000000ffffLL),
-                       BPF_LD_IMM64(R3, 0xffffffffffffffffLL),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0xffffffff),
-                       BPF_LDX_MEM(BPF_DW, R2, R10, -40),
-                       BPF_JMP_REG(BPF_JEQ, R2, R3, 2),
-                       BPF_MOV32_IMM(R0, 2),
-                       BPF_EXIT_INSN(),
-                       BPF_MOV32_IMM(R0, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -12345678, 1),
+                       BPF_JMP32_IMM(BPF_JSGT, R0, -12345679, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x1 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
         },
+       /* BPF_JMP32 | BPF_JSGT | BPF_X */
         {
-               "ST_MEM_DW: Store/Load double word: max positive",
+               "JMP32_JSGT_X",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 1),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x7fffffff),
-                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSGT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+                       BPF_JMP32_REG(BPF_JSGT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x7fffffff } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
         },
+       /* BPF_JMP32 | BPF_JSGE | BPF_K */
         {
-               "STX_MEM_DW: Store/Load double word: max negative",
+               "JMP32_JSGE_K: Small immediate",
                 .u.insns_int = {
-                       BPF_LD_IMM64(R0, 0),
-                       BPF_LD_IMM64(R1, 0xffffffffffffffffLL),
-                       BPF_STX_MEM(BPF_W, R10, R1, -40),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -122, 1),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0xffffffff } },
-               .stack_depth = 40,
+               { { 0, -123 } }
         },
-       /* BPF_STX | BPF_ATOMIC | BPF_W/DW */
         {
-               "STX_XADD_W: Test: 0x12 + 0x10 = 0x22",
+               "JMP32_JSGE_K: Large immediate",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
-                       BPF_LDX_MEM(BPF_W, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -12345677, 1),
+                       BPF_JMP32_IMM(BPF_JSGE, R0, -12345678, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x22 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
         },
+       /* BPF_JMP32 | BPF_JSGE | BPF_X */
         {
-               "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+               "JMP32_JSGE_X",
                 .u.insns_int = {
-                       BPF_ALU64_REG(BPF_MOV, R1, R10),
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
-                       BPF_ALU64_REG(BPF_MOV, R0, R10),
-                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+                       BPF_JMP32_REG(BPF_JSGE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSGE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
         },
+       /* BPF_JMP32 | BPF_JSLT | BPF_K */
         {
-               "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLT_K: Small immediate",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -123, 1),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -122, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x12 } },
-               .stack_depth = 40,
+               { { 0, -123 } }
         },
         {
-               "STX_XADD_W: X + 1 + 1 + 1 + ...",
-               { },
+               "JMP32_JSLT_K: Large immediate",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -12345678, 1),
+                       BPF_JMP32_IMM(BPF_JSLT, R0, -12345677, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
                 INTERNAL,
                 { },
-               { { 0, 4134 } },
-               .fill_helper = bpf_fill_stxw,
+               { { 0, -12345678 } }
         },
+       /* BPF_JMP32 | BPF_JSLT | BPF_X */
         {
-               "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLT_X",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
-                       BPF_LDX_MEM(BPF_DW, R0, R10, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSLT, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345677),
+                       BPF_JMP32_REG(BPF_JSLT, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x22 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
         },
+       /* BPF_JMP32 | BPF_JSLE | BPF_K */
         {
-               "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLE_K: Small immediate",
                 .u.insns_int = {
-                       BPF_ALU64_REG(BPF_MOV, R1, R10),
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
-                       BPF_ALU64_REG(BPF_MOV, R0, R10),
-                       BPF_ALU64_REG(BPF_SUB, R0, R1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -123),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -124, 1),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -123, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0 } },
-               .stack_depth = 40,
+               { { 0, -123 } }
         },
         {
-               "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22",
+               "JMP32_JSLE_K: Large immediate",
                 .u.insns_int = {
-                       BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
-                       BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-                       BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -12345679, 1),
+                       BPF_JMP32_IMM(BPF_JSLE, R0, -12345678, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
                         BPF_EXIT_INSN(),
                 },
                 INTERNAL,
                 { },
-               { { 0, 0x12 } },
-               .stack_depth = 40,
+               { { 0, -12345678 } }
         },
+       /* BPF_JMP32 | BPF_JSLE | BPF_K */
         {
-               "STX_XADD_DW: X + 1 + 1 + 1 + ...",
-               { },
+               "JMP32_JSLE_X",
+               .u.insns_int = {
+                       BPF_ALU32_IMM(BPF_MOV, R0, -12345678),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345679),
+                       BPF_JMP32_REG(BPF_JSLE, R0, R1, 2),
+                       BPF_ALU32_IMM(BPF_MOV, R1, -12345678),
+                       BPF_JMP32_REG(BPF_JSLE, R0, R1, 1),
+                       BPF_ALU32_IMM(BPF_MOV, R0, 0),
+                       BPF_EXIT_INSN(),
+               },
                 INTERNAL,
                 { },
-               { { 0, 4134 } },
-               .fill_helper = bpf_fill_stxdw,
+               { { 0, -12345678 } }
         },
         /* BPF_JMP | BPF_EXIT */
         {
@@ -5223,6 +7192,14 @@ static struct bpf_test tests[] = {
                 { },
                 { { 0, 1 } },
         },
+       {       /* Mainly checking JIT here. */
+               "BPF_MAXINSNS: Very long conditional jump",
+               { },
+               INTERNAL | FLAG_NO_DATA,
+               { },
+               { { 0, 1 } },
+               .fill_helper = bpf_fill_long_jmp,
+       },
         {
                 "JMP_JA: Jump, gap, jump, ...",
                 { },
@@ -6659,7 +8636,14 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test)
                 u64 duration;
                 u32 ret;
  
-               if (test->test[i].data_size == 0 &&
+               /*
+                * NOTE: Several sub-tests may be present, in which case
+                * a zero {data_size, result} tuple indicates the end of
+                * the sub-test array. The first test is always run,
+                * even if both data_size and result happen to be zero.
+                */
+               if (i > 0 &&
+                   test->test[i].data_size == 0 &&
                     test->test[i].result == 0)
                         break;
  
@@ -7005,8 +8989,248 @@ static __init int test_bpf(void)
         return err_cnt ? -EINVAL : 0;
  }
  
+struct tail_call_test {
+       const char *descr;
+       struct bpf_insn insns[MAX_INSNS];
+       int result;
+       int stack_depth;
+};
+
+/*
+ * Magic marker used in test snippets for tail calls below.
+ * BPF_LD/MOV to R2 and R2 with this immediate value is replaced
+ * with the proper values by the test runner.
+ */
+#define TAIL_CALL_MARKER 0x7a11ca11
+
+/* Special offset to indicate a NULL call target */
+#define TAIL_CALL_NULL 0x7fff
+
+/* Special offset to indicate an out-of-range index */
+#define TAIL_CALL_INVALID 0x7ffe
+
+#define TAIL_CALL(offset)                             \
+       BPF_LD_IMM64(R2, TAIL_CALL_MARKER),            \
+       BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_K, R3, 0, \
+                    offset, TAIL_CALL_MARKER),        \
+       BPF_JMP_IMM(BPF_TAIL_CALL, 0, 0, 0)
+
+/*
+ * Tail call tests. Each test case may call any other test in the table,
+ * including itself, specified as a relative index offset from the calling
+ * test. The index TAIL_CALL_NULL can be used to specify a NULL target
+ * function to test the JIT error path. Similarly, the index TAIL_CALL_INVALID
+ * results in a target index that is out of range.
+ */
+static struct tail_call_test tail_call_tests[] = {
+       {
+               "Tail call leaf",
+               .insns = {
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       BPF_ALU64_IMM(BPF_ADD, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 1,
+       },
+       {
+               "Tail call 2",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 2),
+                       TAIL_CALL(-1),
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 3,
+       },
+       {
+               "Tail call 3",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 3),
+                       TAIL_CALL(-1),
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 6,
+       },
+       {
+               "Tail call 4",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 4),
+                       TAIL_CALL(-1),
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 10,
+       },
+       {
+               "Tail call error path, max count reached",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_ADD, R1, 1),
+                       BPF_ALU64_REG(BPF_MOV, R0, R1),
+                       TAIL_CALL(0),
+                       BPF_EXIT_INSN(),
+               },
+               .result = MAX_TAIL_CALL_CNT + 1,
+       },
+       {
+               "Tail call error path, NULL target",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       TAIL_CALL(TAIL_CALL_NULL),
+                       BPF_ALU64_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 1,
+       },
+       {
+               "Tail call error path, index out of range",
+               .insns = {
+                       BPF_ALU64_IMM(BPF_MOV, R0, -1),
+                       TAIL_CALL(TAIL_CALL_INVALID),
+                       BPF_ALU64_IMM(BPF_MOV, R0, 1),
+                       BPF_EXIT_INSN(),
+               },
+               .result = 1,
+       },
+};
+
+static void __init destroy_tail_call_tests(struct bpf_array *progs)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++)
+               if (progs->ptrs[i])
+                       bpf_prog_free(progs->ptrs[i]);
+       kfree(progs);
+}
+
+static __init int prepare_tail_call_tests(struct bpf_array **pprogs)
+{
+       int ntests = ARRAY_SIZE(tail_call_tests);
+       struct bpf_array *progs;
+       int which, err;
+
+       /* Allocate the table of programs to be used for tall calls */
+       progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]),
+                       GFP_KERNEL);
+       if (!progs)
+               goto out_nomem;
+
+       /* Create all eBPF programs and populate the table */
+       for (which = 0; which < ntests; which++) {
+               struct tail_call_test *test = &tail_call_tests[which];
+               struct bpf_prog *fp;
+               int len, i;
+
+               /* Compute the number of program instructions */
+               for (len = 0; len < MAX_INSNS; len++) {
+                       struct bpf_insn *insn = &test->insns[len];
+
+                       if (len < MAX_INSNS - 1 &&
+                           insn->code == (BPF_LD | BPF_DW | BPF_IMM))
+                               len++;
+                       if (insn->code == 0)
+                               break;
+               }
+
+               /* Allocate and initialize the program */
+               fp = bpf_prog_alloc(bpf_prog_size(len), 0);
+               if (!fp)
+                       goto out_nomem;
+
+               fp->len = len;
+               fp->type = BPF_PROG_TYPE_SOCKET_FILTER;
+               fp->aux->stack_depth = test->stack_depth;
+               memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn));
+
+               /* Relocate runtime tail call offsets and addresses */
+               for (i = 0; i < len; i++) {
+                       struct bpf_insn *insn = &fp->insnsi[i];
+
+                       if (insn->imm != TAIL_CALL_MARKER)
+                               continue;
+
+                       switch (insn->code) {
+                       case BPF_LD | BPF_DW | BPF_IMM:
+                               insn[0].imm = (u32)(long)progs;
+                               insn[1].imm = ((u64)(long)progs) >> 32;
+                               break;
+
+                       case BPF_ALU | BPF_MOV | BPF_K:
+                               if (insn->off == TAIL_CALL_NULL)
+                                       insn->imm = ntests;
+                               else if (insn->off == TAIL_CALL_INVALID)
+                                       insn->imm = ntests + 1;
+                               else
+                                       insn->imm = which + insn->off;
+                               insn->off = 0;
+                       }
+               }
+
+               fp = bpf_prog_select_runtime(fp, &err);
+               if (err)
+                       goto out_err;
+
+               progs->ptrs[which] = fp;
+       }
+
+       /* The last entry contains a NULL program pointer */
+       progs->map.max_entries = ntests + 1;
+       *pprogs = progs;
+       return 0;
+
+out_nomem:
+       err = -ENOMEM;
+
+out_err:
+       if (progs)
+               destroy_tail_call_tests(progs);
+       return err;
+}
+
+static __init int test_tail_calls(struct bpf_array *progs)
+{
+       int i, err_cnt = 0, pass_cnt = 0;
+       int jit_cnt = 0, run_cnt = 0;
+
+       for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) {
+               struct tail_call_test *test = &tail_call_tests[i];
+               struct bpf_prog *fp = progs->ptrs[i];
+               u64 duration;
+               int ret;
+
+               cond_resched();
+
+               pr_info("#%d %s ", i, test->descr);
+               if (!fp) {
+                       err_cnt++;
+                       continue;
+               }
+               pr_cont("jited:%u ", fp->jited);
+
+               run_cnt++;
+               if (fp->jited)
+                       jit_cnt++;
+
+               ret = __run_one(fp, NULL, MAX_TESTRUNS, &duration);
+               if (ret == test->result) {
+                       pr_cont("%lld PASS", duration);
+                       pass_cnt++;
+               } else {
+                       pr_cont("ret %d != %d FAIL", ret, test->result);
+                       err_cnt++;
+               }
+       }
+
+       pr_info("%s: Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n",
+               __func__, pass_cnt, err_cnt, jit_cnt, run_cnt);
+
+       return err_cnt ? -EINVAL : 0;
+}
+
  static int __init test_bpf_init(void)
  {
+       struct bpf_array *progs = NULL;
         int ret;
  
         ret = prepare_bpf_tests();
@@ -7018,6 +9242,14 @@ static int __init test_bpf_init(void)
         if (ret)
                 return ret;
  
+       ret = prepare_tail_call_tests(&progs);
+       if (ret)
+               return ret;
+       ret = test_tail_calls(progs);
+       destroy_tail_call_tests(progs);
+       if (ret)
+               return ret;
+
         return test_skb_segment();
  }
  
diff --git a/mm/backing-dev.c b/mm/backing-dev.c

index 271f2ca..f5561ea 100644 (file)
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work)
         blkcg_unpin_online(blkcg);
  
         fprop_local_destroy_percpu(&wb->memcg_completions);
-       percpu_ref_exit(&wb->refcnt);
  
         spin_lock_irq(&cgwb_lock);
         list_del(&wb->offline_node);
         spin_unlock_irq(&cgwb_lock);
  
+       percpu_ref_exit(&wb->refcnt);
         wb_exit(wb);
         WARN_ON_ONCE(!list_empty(&wb->b_attached));
         kfree_rcu(wb, rcu);
diff --git a/mm/kfence/core.c b/mm/kfence/core.c

index d7666ac..575c685 100644 (file)
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -733,6 +733,22 @@ void kfence_shutdown_cache(struct kmem_cache *s)
  
  void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
  {
+       /*
+        * Perform size check before switching kfence_allocation_gate, so that
+        * we don't disable KFENCE without making an allocation.
+        */
+       if (size > PAGE_SIZE)
+               return NULL;
+
+       /*
+        * Skip allocations from non-default zones, including DMA. We cannot
+        * guarantee that pages in the KFENCE pool will have the requested
+        * properties (e.g. reside in DMAable memory).
+        */
+       if ((flags & GFP_ZONEMASK) ||
+           (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32)))
+               return NULL;
+
         /*
          * allocation_gate only needs to become non-zero, so it doesn't make
          * sense to continue writing to it and pay the associated contention
@@ -757,9 +773,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
         if (!READ_ONCE(kfence_enabled))
                 return NULL;
  
-       if (size > PAGE_SIZE)
-               return NULL;
-
         return kfence_guarded_alloc(s, size, flags);
  }
  
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c

index 7f24b9b..942cbc1 100644 (file)
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -852,7 +852,7 @@ static void kfence_test_exit(void)
         tracepoint_synchronize_unregister();
  }
  
-late_initcall(kfence_test_init);
+late_initcall_sync(kfence_test_init);
  module_exit(kfence_test_exit);
  
  MODULE_LICENSE("GPL v2");
diff --git a/mm/memblock.c b/mm/memblock.c

index 0041ff6..de7b553 100644 (file)
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type,
                 return true;
  
         /* skip hotpluggable memory regions if needed */
-       if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
+       if (movable_node_is_enabled() && memblock_is_hotpluggable(m) &&
+           !(flags & MEMBLOCK_HOTPLUG))
                 return true;
  
         /* if we want mirror memory skip non-mirror memory regions */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 1bbf239..8ef06f9 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3574,7 +3574,8 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
         unsigned long val;
  
         if (mem_cgroup_is_root(memcg)) {
-               cgroup_rstat_flush(memcg->css.cgroup);
+               /* mem_cgroup_threshold() calls here from irqsafe context */
+               cgroup_rstat_flush_irqsafe(memcg->css.cgroup);
                 val = memcg_page_state(memcg, NR_FILE_PAGES) +
                         memcg_page_state(memcg, NR_ANON_MAPPED);
                 if (swap)
diff --git a/mm/memory.c b/mm/memory.c

index 747a01d..25fc46e 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4026,8 +4026,17 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
                                 return ret;
                 }
  
-               if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd)))
+               if (vmf->prealloc_pte) {
+                       vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+                       if (likely(pmd_none(*vmf->pmd))) {
+                               mm_inc_nr_ptes(vma->vm_mm);
+                               pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
+                               vmf->prealloc_pte = NULL;
+                       }
+                       spin_unlock(vmf->ptl);
+               } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) {
                         return VM_FAULT_OOM;
+               }
         }
  
         /* See comment in handle_pte_fault() */
diff --git a/mm/migrate.c b/mm/migrate.c

index 34a9ad3..7e24043 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2068,7 +2068,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
         LIST_HEAD(migratepages);
         new_page_t *new;
         bool compound;
-       unsigned int nr_pages = thp_nr_pages(page);
+       int nr_pages = thp_nr_pages(page);
  
         /*
          * PTE mapped THP or HugeTLB page can't reach here so the page could
diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c

index f5852a0..1854850 100644 (file)
--- a/mm/mmap_lock.c
+++ b/mm/mmap_lock.c
@@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void)
  #define TRACE_MMAP_LOCK_EVENT(type, mm, ...)                                   \
         do {                                                                   \
                 const char *memcg_path;                                        \
-               preempt_disable();                                             \
+               local_lock(&memcg_paths.lock);                                 \
                 memcg_path = get_mm_memcg_path(mm);                            \
                 trace_mmap_lock_##type(mm,                                     \
                                        memcg_path != NULL ? memcg_path : "",   \
                                        ##__VA_ARGS__);                         \
                 if (likely(memcg_path != NULL))                                \
                         put_memcg_path_buf();                                  \
-               preempt_enable();                                              \
+               local_unlock(&memcg_paths.lock);                               \
         } while (0)
  
  #else /* !CONFIG_MEMCG */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 3e97e68..856b175 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -840,21 +840,24 @@ void init_mem_debugging_and_hardening(void)
         }
  #endif
  
-       if (_init_on_alloc_enabled_early) {
-               if (page_poisoning_requested)
-                       pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
-                               "will take precedence over init_on_alloc\n");
-               else
-                       static_branch_enable(&init_on_alloc);
-       }
-       if (_init_on_free_enabled_early) {
-               if (page_poisoning_requested)
-                       pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
-                               "will take precedence over init_on_free\n");
-               else
-                       static_branch_enable(&init_on_free);
+       if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) &&
+           page_poisoning_requested) {
+               pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
+                       "will take precedence over init_on_alloc and init_on_free\n");
+               _init_on_alloc_enabled_early = false;
+               _init_on_free_enabled_early = false;
         }
  
+       if (_init_on_alloc_enabled_early)
+               static_branch_enable(&init_on_alloc);
+       else
+               static_branch_disable(&init_on_alloc);
+
+       if (_init_on_free_enabled_early)
+               static_branch_enable(&init_on_free);
+       else
+               static_branch_disable(&init_on_free);
+
  #ifdef CONFIG_DEBUG_PAGEALLOC
         if (!debug_pagealloc_enabled())
                 return;
diff --git a/mm/secretmem.c b/mm/secretmem.c

index f77d254..030f02d 100644 (file)
--- a/mm/secretmem.c
+++ b/mm/secretmem.c
@@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page)
  }
  
  const struct address_space_operations secretmem_aops = {
+       .set_page_dirty = __set_page_dirty_no_writeback,
         .freepage       = secretmem_freepage,
         .migratepage    = secretmem_migratepage,
         .isolate_page   = secretmem_isolate_page,
diff --git a/mm/slab.h b/mm/slab.h

index f997fd5..58c01a3 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -346,7 +346,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig,
                         continue;
  
                 page = virt_to_head_page(p[i]);
-               objcgs = page_objcgs(page);
+               objcgs = page_objcgs_check(page);
                 if (!objcgs)
                         continue;
  
diff --git a/mm/slub.c b/mm/slub.c

index 090fa14..af984e4 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3236,6 +3236,16 @@ struct detached_freelist {
         struct kmem_cache *s;
  };
  
+static inline void free_nonslab_page(struct page *page)
+{
+       unsigned int order = compound_order(page);
+
+       VM_BUG_ON_PAGE(!PageCompound(page), page);
+       kfree_hook(page_address(page));
+       mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order));
+       __free_pages(page, order);
+}
+
  /*
   * This function progressively scans the array with free objects (with
   * a limited look ahead) and extract objects belonging to the same
@@ -3272,9 +3282,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
         if (!s) {
                 /* Handle kalloc'ed objects */
                 if (unlikely(!PageSlab(page))) {
-                       BUG_ON(!PageCompound(page));
-                       kfree_hook(object);
-                       __free_pages(page, compound_order(page));
+                       free_nonslab_page(page);
                         p[size] = NULL; /* mark object processed */
                         return size;
                 }
@@ -4250,13 +4258,7 @@ void kfree(const void *x)
  
         page = virt_to_head_page(x);
         if (unlikely(!PageSlab(page))) {
-               unsigned int order = compound_order(page);
-
-               BUG_ON(!PageCompound(page));
-               kfree_hook(object);
-               mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
-                                     -(PAGE_SIZE << order));
-               __free_pages(page, order);
+               free_nonslab_page(page);
                 return;
         }
         slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c

index e4f63dd..3624977 100644 (file)
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
         skb_pull(skb, AX25_KISS_HEADER_LEN);
  
         if (digipeat != NULL) {
-               if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) {
-                       kfree_skb(skb);
+               if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL)
                         goto put;
-               }
  
                 skb = ourskb;
         }
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c

index f53751b..22f2f66 100644 (file)
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25)
  
  void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
  {
-       struct sk_buff *skbn;
         unsigned char *ptr;
         int headroom;
  
@@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type)
  
         headroom = ax25_addr_size(ax25->digipeat);
  
-       if (skb_headroom(skb) < headroom) {
-               if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) {
+       if (unlikely(skb_headroom(skb) < headroom)) {
+               skb = skb_expand_head(skb, headroom);
+               if (!skb) {
                         printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n");
-                       kfree_skb(skb);
                         return;
                 }
-
-               if (skb->sk != NULL)
-                       skb_set_owner_w(skbn, skb->sk);
-
-               consume_skb(skb);
-               skb = skbn;
         }
  
         ptr = skb_push(skb, headroom);
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c

index b40e0bc..d0b2e09 100644 (file)
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -441,24 +441,17 @@ put:
  struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
         ax25_address *dest, ax25_digi *digi)
  {
-       struct sk_buff *skbn;
         unsigned char *bp;
         int len;
  
         len = digi->ndigi * AX25_ADDR_LEN;
  
-       if (skb_headroom(skb) < len) {
-               if ((skbn = skb_realloc_headroom(skb, len)) == NULL) {
+       if (unlikely(skb_headroom(skb) < len)) {
+               skb = skb_expand_head(skb, len);
+               if (!skb) {
                         printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n");
                         return NULL;
                 }
-
-               if (skb->sk != NULL)
-                       skb_set_owner_w(skbn, skb->sk);
-
-               consume_skb(skb);
-
-               skb = skbn;
         }
  
         bp = skb_push(skb, len);
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c

index 63d42dc..2b639c8 100644 (file)
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -2274,8 +2274,7 @@ out:
         if (primary_if)
                 batadv_hardif_put(primary_if);
  
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
  
         return ret;
  }
@@ -2446,8 +2445,7 @@ out:
         if (primary_if)
                 batadv_hardif_put(primary_if);
  
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
  
         return ret;
  }
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c

index 8c95a11..7976a04 100644 (file)
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -984,8 +984,7 @@ out:
         if (primary_if)
                 batadv_hardif_put(primary_if);
  
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
  
         return ret;
  }
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c

index 007f282..36a98d3 100644 (file)
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -557,8 +557,7 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
  out:
         if (primary_if)
                 batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
  
         return ret;
  }
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c

index 923e219..0158f26 100644 (file)
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -91,8 +91,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface)
                 upper = netdev_master_upper_dev_get_rcu(upper);
         } while (upper && !(upper->priv_flags & IFF_EBRIDGE));
  
-       if (upper)
-               dev_hold(upper);
+       dev_hold(upper);
         rcu_read_unlock();
  
         return upper;
@@ -509,8 +508,7 @@ batadv_mcast_mla_softif_get(struct net_device *dev,
         }
  
  out:
-       if (bridge)
-               dev_put(bridge);
+       dev_put(bridge);
  
         return ret4 + ret6;
  }
@@ -2239,8 +2237,7 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb,
         }
  
  out:
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
  
         if (!ret && primary_if)
                 *primary_if = hard_iface;
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c

index da72494..6a4d3f4 100644 (file)
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -799,12 +799,10 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
   out:
         if (hardif)
                 batadv_hardif_put(hardif);
-       if (hard_iface)
-               dev_put(hard_iface);
+       dev_put(hard_iface);
         if (primary_if)
                 batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
  
         return ret;
  }
@@ -1412,12 +1410,10 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
   out:
         if (hardif)
                 batadv_hardif_put(hardif);
-       if (hard_iface)
-               dev_put(hard_iface);
+       dev_put(hard_iface);
         if (primary_if)
                 batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
  
         return ret;
  }
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c

index 434b4f0..711fe5a 100644 (file)
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -820,8 +820,7 @@ check_roaming:
  out:
         if (in_hardif)
                 batadv_hardif_put(in_hardif);
-       if (in_dev)
-               dev_put(in_dev);
+       dev_put(in_dev);
         if (tt_local)
                 batadv_tt_local_entry_put(tt_local);
         if (tt_global)
@@ -1217,8 +1216,7 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
   out:
         if (primary_if)
                 batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
  
         cb->args[0] = bucket;
         cb->args[1] = idx;
@@ -2005,8 +2003,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
   out:
         if (primary_if)
                 batadv_hardif_put(primary_if);
-       if (soft_iface)
-               dev_put(soft_iface);
+       dev_put(soft_iface);
  
         cb->args[0] = bucket;
         cb->args[1] = idx;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c

index 2560ed2..e1a545c 100644 (file)
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3996,14 +3996,10 @@ EXPORT_SYMBOL(hci_register_dev);
  /* Unregister HCI device */
  void hci_unregister_dev(struct hci_dev *hdev)
  {
-       int id;
-
         BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
  
         hci_dev_set_flag(hdev, HCI_UNREGISTER);
  
-       id = hdev->id;
-
         write_lock(&hci_dev_list_lock);
         list_del(&hdev->list);
         write_unlock(&hci_dev_list_lock);
@@ -4038,7 +4034,14 @@ void hci_unregister_dev(struct hci_dev *hdev)
         }
  
         device_del(&hdev->dev);
+       /* Actual cleanup is deferred until hci_cleanup_dev(). */
+       hci_dev_put(hdev);
+}
+EXPORT_SYMBOL(hci_unregister_dev);
  
+/* Cleanup HCI device */
+void hci_cleanup_dev(struct hci_dev *hdev)
+{
         debugfs_remove_recursive(hdev->debugfs);
         kfree_const(hdev->hw_info);
         kfree_const(hdev->fw_info);
@@ -4063,11 +4066,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
         hci_blocked_keys_clear(hdev);
         hci_dev_unlock(hdev);
  
-       hci_dev_put(hdev);
-
-       ida_simple_remove(&hci_index_ida, id);
+       ida_simple_remove(&hci_index_ida, hdev->id);
  }
-EXPORT_SYMBOL(hci_unregister_dev);
  
  /* Suspend HCI device */
  int hci_suspend_dev(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c

index b04a5a0..f1128c2 100644 (file)
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -59,6 +59,17 @@ struct hci_pinfo {
         char              comm[TASK_COMM_LEN];
  };
  
+static struct hci_dev *hci_hdev_from_sock(struct sock *sk)
+{
+       struct hci_dev *hdev = hci_pi(sk)->hdev;
+
+       if (!hdev)
+               return ERR_PTR(-EBADFD);
+       if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+               return ERR_PTR(-EPIPE);
+       return hdev;
+}
+
  void hci_sock_set_flag(struct sock *sk, int nr)
  {
         set_bit(nr, &hci_pi(sk)->flags);
@@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
         if (event == HCI_DEV_UNREG) {
                 struct sock *sk;
  
-               /* Detach sockets from device */
+               /* Wake up sockets using this dead device */
                 read_lock(&hci_sk_list.lock);
                 sk_for_each(sk, &hci_sk_list.head) {
-                       lock_sock(sk);
                         if (hci_pi(sk)->hdev == hdev) {
-                               hci_pi(sk)->hdev = NULL;
                                 sk->sk_err = EPIPE;
-                               sk->sk_state = BT_OPEN;
                                 sk->sk_state_change(sk);
-
-                               hci_dev_put(hdev);
                         }
-                       release_sock(sk);
                 }
                 read_unlock(&hci_sk_list.lock);
         }
@@ -930,10 +935,10 @@ static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg)
  static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
                                 unsigned long arg)
  {
-       struct hci_dev *hdev = hci_pi(sk)->hdev;
+       struct hci_dev *hdev = hci_hdev_from_sock(sk);
  
-       if (!hdev)
-               return -EBADFD;
+       if (IS_ERR(hdev))
+               return PTR_ERR(hdev);
  
         if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
                 return -EBUSY;
@@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
  
         lock_sock(sk);
  
+       /* Allow detaching from dead device and attaching to alive device, if
+        * the caller wants to re-bind (instead of close) this socket in
+        * response to hci_sock_dev_event(HCI_DEV_UNREG) notification.
+        */
+       hdev = hci_pi(sk)->hdev;
+       if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+               hci_pi(sk)->hdev = NULL;
+               sk->sk_state = BT_OPEN;
+               hci_dev_put(hdev);
+       }
+       hdev = NULL;
+
         if (sk->sk_state == BT_BOUND) {
                 err = -EALREADY;
                 goto done;
@@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
  
         lock_sock(sk);
  
-       hdev = hci_pi(sk)->hdev;
-       if (!hdev) {
-               err = -EBADFD;
+       hdev = hci_hdev_from_sock(sk);
+       if (IS_ERR(hdev)) {
+               err = PTR_ERR(hdev);
                 goto done;
         }
  
@@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
                 goto done;
         }
  
-       hdev = hci_pi(sk)->hdev;
-       if (!hdev) {
-               err = -EBADFD;
+       hdev = hci_hdev_from_sock(sk);
+       if (IS_ERR(hdev)) {
+               err = PTR_ERR(hdev);
                 goto done;
         }
  
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c

index 9874844..b69d88b 100644 (file)
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -83,6 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
  static void bt_host_release(struct device *dev)
  {
         struct hci_dev *hdev = to_hci_dev(dev);
+
+       if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+               hci_cleanup_dev(hdev);
         kfree(hdev);
         module_put(THIS_MODULE);
  }
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c

index b488e27..335e1d8 100644 (file)
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -88,17 +88,19 @@ reset:
  static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
                         u32 *retval, u32 *time, bool xdp)
  {
-       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
+       struct bpf_prog_array_item item = {.prog = prog};
+       struct bpf_run_ctx *old_ctx;
+       struct bpf_cg_run_ctx run_ctx;
         struct bpf_test_timer t = { NO_MIGRATE };
         enum bpf_cgroup_storage_type stype;
         int ret;
  
         for_each_cgroup_storage_type(stype) {
-               storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
-               if (IS_ERR(storage[stype])) {
-                       storage[stype] = NULL;
+               item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+               if (IS_ERR(item.cgroup_storage[stype])) {
+                       item.cgroup_storage[stype] = NULL;
                         for_each_cgroup_storage_type(stype)
-                               bpf_cgroup_storage_free(storage[stype]);
+                               bpf_cgroup_storage_free(item.cgroup_storage[stype]);
                         return -ENOMEM;
                 }
         }
@@ -107,22 +109,19 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
                 repeat = 1;
  
         bpf_test_timer_enter(&t);
+       old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
         do {
-               ret = bpf_cgroup_storage_set(storage);
-               if (ret)
-                       break;
-
+               run_ctx.prog_item = &item;
                 if (xdp)
                         *retval = bpf_prog_run_xdp(prog, ctx);
                 else
                         *retval = BPF_PROG_RUN(prog, ctx);
-
-               bpf_cgroup_storage_unset();
         } while (bpf_test_timer_continue(&t, repeat, &ret, time));
+       bpf_reset_run_ctx(old_ctx);
         bpf_test_timer_leave(&t);
  
         for_each_cgroup_storage_type(stype)
-               bpf_cgroup_storage_free(storage[stype]);
+               bpf_cgroup_storage_free(item.cgroup_storage[stype]);
  
         return ret;
  }
@@ -764,8 +763,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
         if (prog->expected_attach_type == BPF_XDP_DEVMAP ||
             prog->expected_attach_type == BPF_XDP_CPUMAP)
                 return -EINVAL;
-       if (kattr->test.ctx_in || kattr->test.ctx_out)
-               return -EINVAL;
+
         ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
         if (IS_ERR(ctx))
                 return PTR_ERR(ctx);
diff --git a/net/bridge/br.c b/net/bridge/br.c

index 8fb5dca..c8ae823 100644 (file)
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -166,7 +166,8 @@ static int br_switchdev_event(struct notifier_block *unused,
         case SWITCHDEV_FDB_ADD_TO_BRIDGE:
                 fdb_info = ptr;
                 err = br_fdb_external_learn_add(br, p, fdb_info->addr,
-                                               fdb_info->vid, false);
+                                               fdb_info->vid,
+                                               fdb_info->is_local, false);
                 if (err) {
                         err = notifier_from_errno(err);
                         break;
@@ -201,6 +202,48 @@ static struct notifier_block br_switchdev_notifier = {
         .notifier_call = br_switchdev_event,
  };
  
+/* called under rtnl_mutex */
+static int br_switchdev_blocking_event(struct notifier_block *nb,
+                                      unsigned long event, void *ptr)
+{
+       struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+       struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+       struct switchdev_notifier_brport_info *brport_info;
+       const struct switchdev_brport *b;
+       struct net_bridge_port *p;
+       int err = NOTIFY_DONE;
+
+       p = br_port_get_rtnl(dev);
+       if (!p)
+               goto out;
+
+       switch (event) {
+       case SWITCHDEV_BRPORT_OFFLOADED:
+               brport_info = ptr;
+               b = &brport_info->brport;
+
+               err = br_switchdev_port_offload(p, b->dev, b->ctx,
+                                               b->atomic_nb, b->blocking_nb,
+                                               b->tx_fwd_offload, extack);
+               err = notifier_from_errno(err);
+               break;
+       case SWITCHDEV_BRPORT_UNOFFLOADED:
+               brport_info = ptr;
+               b = &brport_info->brport;
+
+               br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb,
+                                           b->blocking_nb);
+               break;
+       }
+
+out:
+       return err;
+}
+
+static struct notifier_block br_switchdev_blocking_notifier = {
+       .notifier_call = br_switchdev_blocking_event,
+};
+
  /* br_boolopt_toggle - change user-controlled boolean option
   *
   * @br: bridge device
@@ -355,10 +398,14 @@ static int __init br_init(void)
         if (err)
                 goto err_out4;
  
-       err = br_netlink_init();
+       err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
         if (err)
                 goto err_out5;
  
+       err = br_netlink_init();
+       if (err)
+               goto err_out6;
+
         brioctl_set(br_ioctl_stub);
  
  #if IS_ENABLED(CONFIG_ATM_LANE)
@@ -373,6 +420,8 @@ static int __init br_init(void)
  
         return 0;
  
+err_out6:
+       unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
  err_out5:
         unregister_switchdev_notifier(&br_switchdev_notifier);
  err_out4:
@@ -392,6 +441,7 @@ static void __exit br_deinit(void)
  {
         stp_proto_unregister(&br_stp_proto);
         br_netlink_fini();
+       unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier);
         unregister_switchdev_notifier(&br_switchdev_notifier);
         unregister_netdevice_notifier(&br_device_notifier);
         brioctl_set(NULL);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c

index 4ff8c67..ddd09f5 100644 (file)
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -745,7 +745,7 @@ static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb,
         item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
         item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags);
         item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags);
-       item.info.dev = item.is_local ? br->dev : p->dev;
+       item.info.dev = (!p || item.is_local) ? br->dev : p->dev;
         item.info.ctx = ctx;
  
         err = nb->notifier_call(nb, action, &item);
@@ -1011,7 +1011,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
  
  static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
                         struct net_bridge_port *p, const unsigned char *addr,
-                       u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[])
+                       u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[],
+                       struct netlink_ext_ack *extack)
  {
         int err = 0;
  
@@ -1030,7 +1031,15 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
                 rcu_read_unlock();
                 local_bh_enable();
         } else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
-               err = br_fdb_external_learn_add(br, p, addr, vid, true);
+               if (!p && !(ndm->ndm_state & NUD_PERMANENT)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "FDB entry towards bridge must be permanent");
+                       return -EINVAL;
+               }
+
+               err = br_fdb_external_learn_add(br, p, addr, vid,
+                                               ndm->ndm_state & NUD_PERMANENT,
+                                               true);
         } else {
                 spin_lock_bh(&br->hash_lock);
                 err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
@@ -1102,9 +1111,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                 }
  
                 /* VID was specified, so use it. */
-               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb);
+               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb,
+                                  extack);
         } else {
-               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb);
+               err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb,
+                                  extack);
                 if (err || !vg || !vg->num_vlans)
                         goto out;
  
@@ -1116,7 +1127,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
                         if (!br_vlan_should_use(v))
                                 continue;
                         err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
-                                          nfea_tb);
+                                          nfea_tb, extack);
                         if (err)
                                 goto out;
                 }
@@ -1256,7 +1267,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
  }
  
  int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
-                             const unsigned char *addr, u16 vid,
+                             const unsigned char *addr, u16 vid, bool is_local,
                               bool swdev_notify)
  {
         struct net_bridge_fdb_entry *fdb;
@@ -1273,6 +1284,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
  
                 if (swdev_notify)
                         flags |= BIT(BR_FDB_ADDED_BY_USER);
+
+               if (is_local)
+                       flags |= BIT(BR_FDB_LOCAL);
+
                 fdb = fdb_create(br, p, addr, vid, flags);
                 if (!fdb) {
                         err = -ENOMEM;
@@ -1299,6 +1314,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
                 if (swdev_notify)
                         set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
  
+               if (is_local)
+                       set_bit(BR_FDB_LOCAL, &fdb->flags);
+
                 if (modified)
                         fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
         }
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c

index 86f6d7e..67c6024 100644 (file)
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -456,7 +456,7 @@ int br_add_bridge(struct net *net, const char *name)
         dev_net_set(dev, net);
         dev->rtnl_link_ops = &br_link_ops;
  
-       res = register_netdev(dev);
+       res = register_netdevice(dev);
         if (res)
                 free_netdev(dev);
         return res;
@@ -467,7 +467,6 @@ int br_del_bridge(struct net *net, const char *name)
         struct net_device *dev;
         int ret = 0;
  
-       rtnl_lock();
         dev = __dev_get_by_name(net, name);
         if (dev == NULL)
                 ret =  -ENXIO;  /* Could not find device */
@@ -485,7 +484,6 @@ int br_del_bridge(struct net *net, const char *name)
         else
                 br_dev_delete(dev, NULL);
  
-       rtnl_unlock();
         return ret;
  }
  
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c

index 46a24c2..793b0db 100644 (file)
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -351,7 +351,7 @@ static int old_deviceless(struct net *net, void __user *uarg)
                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
                         return -EPERM;
  
-               if (copy_from_user(buf, uarg, IFNAMSIZ))
+               if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ))
                         return -EFAULT;
  
                 buf[IFNAMSIZ-1] = 0;
@@ -369,33 +369,44 @@ static int old_deviceless(struct net *net, void __user *uarg)
  int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd,
                   struct ifreq *ifr, void __user *uarg)
  {
+       int ret = -EOPNOTSUPP;
+
+       rtnl_lock();
+
         switch (cmd) {
         case SIOCGIFBR:
         case SIOCSIFBR:
-               return old_deviceless(net, uarg);
-
+               ret = old_deviceless(net, uarg);
+               break;
         case SIOCBRADDBR:
         case SIOCBRDELBR:
         {
                 char buf[IFNAMSIZ];
  
-               if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-                       return -EPERM;
+               if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
+                       ret = -EPERM;
+                       break;
+               }
  
-               if (copy_from_user(buf, uarg, IFNAMSIZ))
-                       return -EFAULT;
+               if (copy_from_user(buf, uarg, IFNAMSIZ)) {
+                       ret = -EFAULT;
+                       break;
+               }
  
                 buf[IFNAMSIZ-1] = 0;
                 if (cmd == SIOCBRADDBR)
-                       return br_add_bridge(net, buf);
-
-               return br_del_bridge(net, buf);
+                       ret = br_add_bridge(net, buf);
+               else
+                       ret = br_del_bridge(net, buf);
         }
-
+               break;
         case SIOCBRADDIF:
         case SIOCBRDELIF:
-               return add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF);
-
+               ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF);
+               break;
         }
-       return -EOPNOTSUPP;
+
+       rtnl_unlock();
+
+       return ret;
  }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h

index c939631..51991f1 100644 (file)
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -770,7 +770,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
  int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
  void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
  int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
-                             const unsigned char *addr, u16 vid,
+                             const unsigned char *addr, u16 vid, bool is_local,
                               bool swdev_notify);
  int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
                               const unsigned char *addr, u16 vid,
@@ -1880,6 +1880,17 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; }
  
  /* br_switchdev.c */
  #ifdef CONFIG_NET_SWITCHDEV
+int br_switchdev_port_offload(struct net_bridge_port *p,
+                             struct net_device *dev, const void *ctx,
+                             struct notifier_block *atomic_nb,
+                             struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
+                             struct netlink_ext_ack *extack);
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+                                struct notifier_block *atomic_nb,
+                                struct notifier_block *blocking_nb);
+
  bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
  
  void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb);
@@ -1908,6 +1919,24 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
         skb->offload_fwd_mark = 0;
  }
  #else
+static inline int
+br_switchdev_port_offload(struct net_bridge_port *p,
+                         struct net_device *dev, const void *ctx,
+                         struct notifier_block *atomic_nb,
+                         struct notifier_block *blocking_nb,
+                         bool tx_fwd_offload,
+                         struct netlink_ext_ack *extack)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline void
+br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+                           struct notifier_block *atomic_nb,
+                           struct notifier_block *blocking_nb)
+{
+}
+
  static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
  {
         return false;
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c

index 023de0e..6bf518d 100644 (file)
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -134,7 +134,7 @@ br_switchdev_fdb_notify(struct net_bridge *br,
                 .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
                 .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
         };
-       struct net_device *dev = info.is_local ? br->dev : dst->dev;
+       struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev;
  
         switch (type) {
         case RTM_DELNEIGH:
@@ -312,23 +312,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
  /* Let the bridge know that this port is offloaded, so that it can assign a
   * switchdev hardware domain to it.
   */
-int switchdev_bridge_port_offload(struct net_device *brport_dev,
-                                 struct net_device *dev, const void *ctx,
-                                 struct notifier_block *atomic_nb,
-                                 struct notifier_block *blocking_nb,
-                                 bool tx_fwd_offload,
-                                 struct netlink_ext_ack *extack)
+int br_switchdev_port_offload(struct net_bridge_port *p,
+                             struct net_device *dev, const void *ctx,
+                             struct notifier_block *atomic_nb,
+                             struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
+                             struct netlink_ext_ack *extack)
  {
         struct netdev_phys_item_id ppid;
-       struct net_bridge_port *p;
         int err;
  
-       ASSERT_RTNL();
-
-       p = br_port_get_rtnl(brport_dev);
-       if (!p)
-               return -ENODEV;
-
         err = dev_get_port_parent_id(dev, &ppid, false);
         if (err)
                 return err;
@@ -348,23 +341,12 @@ out_switchdev_del:
  
         return err;
  }
-EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
  
-void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
-                                    const void *ctx,
-                                    struct notifier_block *atomic_nb,
-                                    struct notifier_block *blocking_nb)
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+                                struct notifier_block *atomic_nb,
+                                struct notifier_block *blocking_nb)
  {
-       struct net_bridge_port *p;
-
-       ASSERT_RTNL();
-
-       p = br_port_get_rtnl(brport_dev);
-       if (!p)
-               return;
-
         nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
  
         nbp_switchdev_del(p);
  }
-EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h

index 12369b6..f6df208 100644 (file)
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -20,9 +20,12 @@
  
  struct j1939_session;
  enum j1939_sk_errqueue_type {
-       J1939_ERRQUEUE_ACK,
-       J1939_ERRQUEUE_SCHED,
-       J1939_ERRQUEUE_ABORT,
+       J1939_ERRQUEUE_TX_ACK,
+       J1939_ERRQUEUE_TX_SCHED,
+       J1939_ERRQUEUE_TX_ABORT,
+       J1939_ERRQUEUE_RX_RTS,
+       J1939_ERRQUEUE_RX_DPO,
+       J1939_ERRQUEUE_RX_ABORT,
  };
  
  /* j1939 devices */
@@ -87,6 +90,7 @@ struct j1939_priv {
         struct list_head j1939_socks;
  
         struct kref rx_kref;
+       u32 rx_tskey;
  };
  
  void j1939_ecu_put(struct j1939_ecu *ecu);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c

index b904c06..6dff451 100644 (file)
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -905,20 +905,33 @@ failure:
         return NULL;
  }
  
-static size_t j1939_sk_opt_stats_get_size(void)
+static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type)
  {
-       return
-               nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
-               0;
+       switch (type) {
+       case J1939_ERRQUEUE_RX_RTS:
+               return
+                       nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */
+                       nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */
+                       nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */
+                       nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */
+                       nla_total_size(sizeof(u8)) +  /* J1939_NLA_SRC_ADDR */
+                       nla_total_size(sizeof(u8)) +  /* J1939_NLA_DEST_ADDR */
+                       0;
+       default:
+               return
+                       nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+                       0;
+       }
  }
  
  static struct sk_buff *
-j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session,
+                                   enum j1939_sk_errqueue_type type)
  {
         struct sk_buff *stats;
         u32 size;
  
-       stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+       stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC);
         if (!stats)
                 return NULL;
  
@@ -928,32 +941,67 @@ j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
                 size = min(session->pkt.tx_acked * 7,
                            session->total_message_size);
  
-       nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+       switch (type) {
+       case J1939_ERRQUEUE_RX_RTS:
+               nla_put_u32(stats, J1939_NLA_TOTAL_SIZE,
+                           session->total_message_size);
+               nla_put_u32(stats, J1939_NLA_PGN,
+                           session->skcb.addr.pgn);
+               nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME,
+                                 session->skcb.addr.src_name, J1939_NLA_PAD);
+               nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME,
+                                 session->skcb.addr.dst_name, J1939_NLA_PAD);
+               nla_put_u8(stats, J1939_NLA_SRC_ADDR,
+                          session->skcb.addr.sa);
+               nla_put_u8(stats, J1939_NLA_DEST_ADDR,
+                          session->skcb.addr.da);
+               break;
+       default:
+               nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+       }
  
         return stats;
  }
  
-void j1939_sk_errqueue(struct j1939_session *session,
-                      enum j1939_sk_errqueue_type type)
+static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk,
+                               enum j1939_sk_errqueue_type type)
  {
         struct j1939_priv *priv = session->priv;
-       struct sock *sk = session->sk;
         struct j1939_sock *jsk;
         struct sock_exterr_skb *serr;
         struct sk_buff *skb;
         char *state = "UNK";
         int err;
  
-       /* currently we have no sk for the RX session */
-       if (!sk)
-               return;
-
         jsk = j1939_sk(sk);
  
         if (!(jsk->state & J1939_SOCK_ERRQUEUE))
                 return;
  
-       skb = j1939_sk_get_timestamping_opt_stats(session);
+       switch (type) {
+       case J1939_ERRQUEUE_TX_ACK:
+               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_SCHED:
+               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+                       return;
+               break;
+       case J1939_ERRQUEUE_TX_ABORT:
+               break;
+       case J1939_ERRQUEUE_RX_RTS:
+               fallthrough;
+       case J1939_ERRQUEUE_RX_DPO:
+               fallthrough;
+       case J1939_ERRQUEUE_RX_ABORT:
+               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+                       return;
+               break;
+       default:
+               netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+       }
+
+       skb = j1939_sk_get_timestamping_opt_stats(session, type);
         if (!skb)
                 return;
  
@@ -964,36 +1012,42 @@ void j1939_sk_errqueue(struct j1939_session *session,
         serr = SKB_EXT_ERR(skb);
         memset(serr, 0, sizeof(*serr));
         switch (type) {
-       case J1939_ERRQUEUE_ACK:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) {
-                       kfree_skb(skb);
-                       return;
-               }
-
+       case J1939_ERRQUEUE_TX_ACK:
                 serr->ee.ee_errno = ENOMSG;
                 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
                 serr->ee.ee_info = SCM_TSTAMP_ACK;
-               state = "ACK";
+               state = "TX ACK";
                 break;
-       case J1939_ERRQUEUE_SCHED:
-               if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) {
-                       kfree_skb(skb);
-                       return;
-               }
-
+       case J1939_ERRQUEUE_TX_SCHED:
                 serr->ee.ee_errno = ENOMSG;
                 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
                 serr->ee.ee_info = SCM_TSTAMP_SCHED;
-               state = "SCH";
+               state = "TX SCH";
                 break;
-       case J1939_ERRQUEUE_ABORT:
+       case J1939_ERRQUEUE_TX_ABORT:
                 serr->ee.ee_errno = session->err;
                 serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
                 serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
-               state = "ABT";
+               state = "TX ABT";
+               break;
+       case J1939_ERRQUEUE_RX_RTS:
+               serr->ee.ee_errno = ENOMSG;
+               serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+               serr->ee.ee_info = J1939_EE_INFO_RX_RTS;
+               state = "RX RTS";
+               break;
+       case J1939_ERRQUEUE_RX_DPO:
+               serr->ee.ee_errno = ENOMSG;
+               serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+               serr->ee.ee_info = J1939_EE_INFO_RX_DPO;
+               state = "RX DPO";
+               break;
+       case J1939_ERRQUEUE_RX_ABORT:
+               serr->ee.ee_errno = session->err;
+               serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+               serr->ee.ee_info = J1939_EE_INFO_RX_ABORT;
+               state = "RX ABT";
                 break;
-       default:
-               netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
         }
  
         serr->opt_stats = true;
@@ -1008,6 +1062,27 @@ void j1939_sk_errqueue(struct j1939_session *session,
                 kfree_skb(skb);
  };
  
+void j1939_sk_errqueue(struct j1939_session *session,
+                      enum j1939_sk_errqueue_type type)
+{
+       struct j1939_priv *priv = session->priv;
+       struct j1939_sock *jsk;
+
+       if (session->sk) {
+               /* send TX notifications to the socket of origin  */
+               __j1939_sk_errqueue(session, session->sk, type);
+               return;
+       }
+
+       /* spread RX notifications to all sockets subscribed to this session */
+       spin_lock_bh(&priv->j1939_socks_lock);
+       list_for_each_entry(jsk, &priv->j1939_socks, list) {
+               if (j1939_sk_recv_match_one(jsk, &session->skcb))
+                       __j1939_sk_errqueue(session, &jsk->sk, type);
+       }
+       spin_unlock_bh(&priv->j1939_socks_lock);
+};
+
  void j1939_sk_send_loop_abort(struct sock *sk, int err)
  {
         sk->sk_err = err;
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c

index efdf79d..bb5c4b8 100644 (file)
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -260,10 +260,14 @@ static void __j1939_session_drop(struct j1939_session *session)
  
  static void j1939_session_destroy(struct j1939_session *session)
  {
-       if (session->err)
-               j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
-       else
-               j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+       if (session->transmission) {
+               if (session->err)
+                       j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT);
+               else
+                       j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK);
+       } else if (session->err) {
+                       j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
+       }
  
         netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
  
@@ -822,7 +826,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
                 memcpy(&dat[1], &tpdat[offset], len);
                 ret = j1939_tp_tx_dat(session, dat, len + 1);
                 if (ret < 0) {
-                       /* ENOBUS == CAN interface TX queue is full */
+                       /* ENOBUFS == CAN interface TX queue is full */
                         if (ret != -ENOBUFS)
                                 netdev_alert(priv->ndev,
                                              "%s: 0x%p: queue data error: %i\n",
@@ -1044,7 +1048,7 @@ static int j1939_simple_txnext(struct j1939_session *session)
         if (ret)
                 goto out_free;
  
-       j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+       j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED);
         j1939_sk_queue_activate_next(session);
  
   out_free:
@@ -1076,11 +1080,16 @@ static bool j1939_session_deactivate_locked(struct j1939_session *session)
  
  static bool j1939_session_deactivate(struct j1939_session *session)
  {
+       struct j1939_priv *priv = session->priv;
         bool active;
  
-       j1939_session_list_lock(session->priv);
+       j1939_session_list_lock(priv);
+       /* This function should be called with a session ref-count of at
+        * least 2.
+        */
+       WARN_ON_ONCE(kref_read(&session->kref) < 2);
         active = j1939_session_deactivate_locked(session);
-       j1939_session_list_unlock(session->priv);
+       j1939_session_list_unlock(priv);
  
         return active;
  }
@@ -1111,6 +1120,8 @@ static void __j1939_session_cancel(struct j1939_session *session,
  
         if (session->sk)
                 j1939_sk_send_loop_abort(session->sk, session->err);
+       else
+               j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
  }
  
  static void j1939_session_cancel(struct j1939_session *session,
@@ -1325,6 +1336,8 @@ static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb,
         session->err = j1939_xtp_abort_to_errno(priv, abort);
         if (session->sk)
                 j1939_sk_send_loop_abort(session->sk, session->err);
+       else
+               j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT);
         j1939_session_deactivate_activate_next(session);
  
  abort_put:
@@ -1433,7 +1446,7 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
                 if (session->transmission) {
                         if (session->pkt.tx_acked)
                                 j1939_sk_errqueue(session,
-                                                 J1939_ERRQUEUE_SCHED);
+                                                 J1939_ERRQUEUE_TX_SCHED);
                         j1939_session_txtimer_cancel(session);
                         j1939_tp_schedule_txtimer(session, 0);
                 }
@@ -1625,6 +1638,9 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
         session->pkt.rx = 0;
         session->pkt.tx = 0;
  
+       session->tskey = priv->rx_tskey++;
+       j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS);
+
         WARN_ON_ONCE(j1939_session_activate(session));
  
         return session;
@@ -1747,6 +1763,9 @@ static void j1939_xtp_rx_dpo_one(struct j1939_session *session,
         session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
         session->last_cmd = dat[0];
         j1939_tp_set_rxtimeout(session, 750);
+
+       if (!session->transmission)
+               j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO);
  }
  
  static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
@@ -1874,7 +1893,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
                 if (!session->transmission)
                         j1939_tp_schedule_txtimer(session, 0);
         } else {
-               j1939_tp_set_rxtimeout(session, 250);
+               j1939_tp_set_rxtimeout(session, 750);
         }
         session->last_cmd = 0xff;
         consume_skb(se_skb);
diff --git a/net/can/raw.c b/net/can/raw.c

index ed4fcb7..7105fa4 100644 (file)
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -546,10 +546,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                                 return -EFAULT;
                 }
  
+               rtnl_lock();
                 lock_sock(sk);
  
-               if (ro->bound && ro->ifindex)
+               if (ro->bound && ro->ifindex) {
                         dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+                       if (!dev) {
+                               if (count > 1)
+                                       kfree(filter);
+                               err = -ENODEV;
+                               goto out_fil;
+                       }
+               }
  
                 if (ro->bound) {
                         /* (try to) register the new filters */
@@ -584,10 +592,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                 ro->count  = count;
  
   out_fil:
-               if (dev)
-                       dev_put(dev);
-
+               dev_put(dev);
                 release_sock(sk);
+               rtnl_unlock();
  
                 break;
  
@@ -600,10 +607,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
  
                 err_mask &= CAN_ERR_MASK;
  
+               rtnl_lock();
                 lock_sock(sk);
  
-               if (ro->bound && ro->ifindex)
+               if (ro->bound && ro->ifindex) {
                         dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+                       if (!dev) {
+                               err = -ENODEV;
+                               goto out_err;
+                       }
+               }
  
                 /* remove current error mask */
                 if (ro->bound) {
@@ -623,10 +636,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                 ro->err_mask = err_mask;
  
   out_err:
-               if (dev)
-                       dev_put(dev);
-
+               dev_put(dev);
                 release_sock(sk);
+               rtnl_unlock();
  
                 break;
  
diff --git a/net/core/dev.c b/net/core/dev.c

index 64e1a5f..8865079 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -676,131 +676,6 @@ void dev_remove_offload(struct packet_offload *po)
  }
  EXPORT_SYMBOL(dev_remove_offload);
  
-/******************************************************************************
- *
- *                   Device Boot-time Settings Routines
- *
- ******************************************************************************/
-
-/* Boot time configuration table */
-static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
-
-/**
- *     netdev_boot_setup_add   - add new setup entry
- *     @name: name of the device
- *     @map: configured settings for the device
- *
- *     Adds new setup entry to the dev_boot_setup list.  The function
- *     returns 0 on error and 1 on success.  This is a generic routine to
- *     all netdevices.
- */
-static int netdev_boot_setup_add(char *name, struct ifmap *map)
-{
-       struct netdev_boot_setup *s;
-       int i;
-
-       s = dev_boot_setup;
-       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
-               if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
-                       memset(s[i].name, 0, sizeof(s[i].name));
-                       strlcpy(s[i].name, name, IFNAMSIZ);
-                       memcpy(&s[i].map, map, sizeof(s[i].map));
-                       break;
-               }
-       }
-
-       return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
-}
-
-/**
- * netdev_boot_setup_check     - check boot time settings
- * @dev: the netdevice
- *
- * Check boot time settings for the device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found, 1 if they are.
- */
-int netdev_boot_setup_check(struct net_device *dev)
-{
-       struct netdev_boot_setup *s = dev_boot_setup;
-       int i;
-
-       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
-               if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
-                   !strcmp(dev->name, s[i].name)) {
-                       dev->irq = s[i].map.irq;
-                       dev->base_addr = s[i].map.base_addr;
-                       dev->mem_start = s[i].map.mem_start;
-                       dev->mem_end = s[i].map.mem_end;
-                       return 1;
-               }
-       }
-       return 0;
-}
-EXPORT_SYMBOL(netdev_boot_setup_check);
-
-
-/**
- * netdev_boot_base    - get address from boot time settings
- * @prefix: prefix for network device
- * @unit: id for network device
- *
- * Check boot time settings for the base address of device.
- * The found settings are set for the device to be used
- * later in the device probing.
- * Returns 0 if no settings found.
- */
-unsigned long netdev_boot_base(const char *prefix, int unit)
-{
-       const struct netdev_boot_setup *s = dev_boot_setup;
-       char name[IFNAMSIZ];
-       int i;
-
-       sprintf(name, "%s%d", prefix, unit);
-
-       /*
-        * If device already registered then return base of 1
-        * to indicate not to probe for this interface
-        */
-       if (__dev_get_by_name(&init_net, name))
-               return 1;
-
-       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
-               if (!strcmp(name, s[i].name))
-                       return s[i].map.base_addr;
-       return 0;
-}
-
-/*
- * Saves at boot time configured settings for any netdevice.
- */
-int __init netdev_boot_setup(char *str)
-{
-       int ints[5];
-       struct ifmap map;
-
-       str = get_options(str, ARRAY_SIZE(ints), ints);
-       if (!str || !*str)
-               return 0;
-
-       /* Save settings */
-       memset(&map, 0, sizeof(map));
-       if (ints[0] > 0)
-               map.irq = ints[1];
-       if (ints[0] > 1)
-               map.base_addr = ints[2];
-       if (ints[0] > 2)
-               map.mem_start = ints[3];
-       if (ints[0] > 3)
-               map.mem_end = ints[4];
-
-       /* Add new entry to the list */
-       return netdev_boot_setup_add(str, &map);
-}
-
-__setup("netdev=", netdev_boot_setup);
-
  /*******************************************************************************
   *
   *                         Device Interface Subroutines
@@ -956,8 +831,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name)
  
         rcu_read_lock();
         dev = dev_get_by_name_rcu(net, name);
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
         rcu_read_unlock();
         return dev;
  }
@@ -1030,8 +904,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
  
         rcu_read_lock();
         dev = dev_get_by_index_rcu(net, ifindex);
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
         rcu_read_unlock();
         return dev;
  }
@@ -3098,6 +2971,50 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
  EXPORT_SYMBOL(netif_set_real_num_rx_queues);
  #endif
  
+/**
+ *     netif_set_real_num_queues - set actual number of RX and TX queues used
+ *     @dev: Network device
+ *     @txq: Actual number of TX queues
+ *     @rxq: Actual number of RX queues
+ *
+ *     Set the real number of both TX and RX queues.
+ *     Does nothing if the number of queues is already correct.
+ */
+int netif_set_real_num_queues(struct net_device *dev,
+                             unsigned int txq, unsigned int rxq)
+{
+       unsigned int old_rxq = dev->real_num_rx_queues;
+       int err;
+
+       if (txq < 1 || txq > dev->num_tx_queues ||
+           rxq < 1 || rxq > dev->num_rx_queues)
+               return -EINVAL;
+
+       /* Start from increases, so the error path only does decreases -
+        * decreases can't fail.
+        */
+       if (rxq > dev->real_num_rx_queues) {
+               err = netif_set_real_num_rx_queues(dev, rxq);
+               if (err)
+                       return err;
+       }
+       if (txq > dev->real_num_tx_queues) {
+               err = netif_set_real_num_tx_queues(dev, txq);
+               if (err)
+                       goto undo_rx;
+       }
+       if (rxq < dev->real_num_rx_queues)
+               WARN_ON(netif_set_real_num_rx_queues(dev, rxq));
+       if (txq < dev->real_num_tx_queues)
+               WARN_ON(netif_set_real_num_tx_queues(dev, txq));
+
+       return 0;
+undo_rx:
+       WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq));
+       return err;
+}
+EXPORT_SYMBOL(netif_set_real_num_queues);
+
  /**
   * netif_get_num_default_rss_queues - default number of RSS queues
   *
@@ -5878,7 +5795,7 @@ static void flush_all_backlogs(void)
          */
         ASSERT_RTNL();
  
-       get_online_cpus();
+       cpus_read_lock();
  
         cpumask_clear(&flush_cpus);
         for_each_online_cpu(cpu) {
@@ -5896,7 +5813,7 @@ static void flush_all_backlogs(void)
         for_each_cpu(cpu, &flush_cpus)
                 flush_work(per_cpu_ptr(&flush_works, cpu));
  
-       put_online_cpus();
+       cpus_read_unlock();
  }
  
  /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
@@ -7615,7 +7532,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
  {
         struct netdev_adjacent *lower;
  
-       WARN_ON_ONCE(!rcu_read_lock_held());
+       WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
  
         lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
  
@@ -9380,7 +9297,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
         return dev->xdp_state[mode].prog;
  }
  
-static u8 dev_xdp_prog_count(struct net_device *dev)
+u8 dev_xdp_prog_count(struct net_device *dev)
  {
         u8 count = 0;
         int i;
@@ -9390,6 +9307,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev)
                         count++;
         return count;
  }
+EXPORT_SYMBOL_GPL(dev_xdp_prog_count);
  
  u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
  {
@@ -9483,6 +9401,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
  {
         unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES);
         struct bpf_prog *cur_prog;
+       struct net_device *upper;
+       struct list_head *iter;
         enum bpf_xdp_mode mode;
         bpf_op_t bpf_op;
         int err;
@@ -9521,6 +9441,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
                 return -EBUSY;
         }
  
+       /* don't allow if an upper device already has a program */
+       netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+               if (dev_xdp_prog_count(upper) > 0) {
+                       NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program");
+                       return -EEXIST;
+               }
+       }
+
         cur_prog = dev_xdp_prog(dev, mode);
         /* can't replace attached prog with link */
         if (link && cur_prog) {
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c

index 4035bce..0e87237 100644 (file)
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -379,7 +379,14 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data,
         case SIOCBRDELIF:
                 if (!netif_device_present(dev))
                         return -ENODEV;
-               return br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+               if (!netif_is_bridge_master(dev))
+                       return -EOPNOTSUPP;
+               dev_hold(dev);
+               rtnl_unlock();
+               err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL);
+               dev_put(dev);
+               rtnl_lock();
+               return err;
  
         case SIOCSHWTSTAMP:
                 err = net_hwtstamp_validate(ifr);
diff --git a/net/core/devlink.c b/net/core/devlink.c

index fd2fc2b..b02d54a 100644 (file)
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -804,10 +804,11 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
         return 0;
  }
  
-static int
-devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops,
-                            struct devlink_port *port, struct sk_buff *msg,
-                            struct netlink_ext_ack *extack, bool *msg_updated)
+static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
+                                       struct devlink_port *port,
+                                       struct sk_buff *msg,
+                                       struct netlink_ext_ack *extack,
+                                       bool *msg_updated)
  {
         u8 hw_addr[MAX_ADDR_LEN];
         int hw_addr_len;
@@ -816,7 +817,8 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *
         if (!ops->port_function_hw_addr_get)
                 return 0;
  
-       err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack);
+       err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len,
+                                            extack);
         if (err) {
                 if (err == -EOPNOTSUPP)
                         return 0;
@@ -893,12 +895,11 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
                opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
  }
  
-static int
-devlink_port_fn_state_fill(struct devlink *devlink,
-                          const struct devlink_ops *ops,
-                          struct devlink_port *port, struct sk_buff *msg,
-                          struct netlink_ext_ack *extack,
-                          bool *msg_updated)
+static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
+                                     struct devlink_port *port,
+                                     struct sk_buff *msg,
+                                     struct netlink_ext_ack *extack,
+                                     bool *msg_updated)
  {
         enum devlink_port_fn_opstate opstate;
         enum devlink_port_fn_state state;
@@ -907,7 +908,7 @@ devlink_port_fn_state_fill(struct devlink *devlink,
         if (!ops->port_fn_state_get)
                 return 0;
  
-       err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack);
+       err = ops->port_fn_state_get(port, &state, &opstate, extack);
         if (err) {
                 if (err == -EOPNOTSUPP)
                         return 0;
@@ -935,7 +936,6 @@ static int
  devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
                                    struct netlink_ext_ack *extack)
  {
-       struct devlink *devlink = port->devlink;
         const struct devlink_ops *ops;
         struct nlattr *function_attr;
         bool msg_updated = false;
@@ -945,13 +945,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
         if (!function_attr)
                 return -EMSGSIZE;
  
-       ops = devlink->ops;
-       err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg,
-                                          extack, &msg_updated);
+       ops = port->devlink->ops;
+       err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack,
+                                          &msg_updated);
         if (err)
                 goto out;
-       err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack,
-                                        &msg_updated);
+       err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
  out:
         if (err || !msg_updated)
                 nla_nest_cancel(msg, function_attr);
@@ -1269,31 +1268,33 @@ out:
         return msg->len;
  }
  
-static int devlink_port_type_set(struct devlink *devlink,
-                                struct devlink_port *devlink_port,
+static int devlink_port_type_set(struct devlink_port *devlink_port,
                                  enum devlink_port_type port_type)
  
  {
         int err;
  
-       if (devlink->ops->port_type_set) {
-               if (port_type == devlink_port->type)
-                       return 0;
-               err = devlink->ops->port_type_set(devlink_port, port_type);
-               if (err)
-                       return err;
-               devlink_port->desired_type = port_type;
-               devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+       if (!devlink_port->devlink->ops->port_type_set)
+               return -EOPNOTSUPP;
+
+       if (port_type == devlink_port->type)
                 return 0;
-       }
-       return -EOPNOTSUPP;
+
+       err = devlink_port->devlink->ops->port_type_set(devlink_port,
+                                                       port_type);
+       if (err)
+               return err;
+
+       devlink_port->desired_type = port_type;
+       devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
+       return 0;
  }
  
-static int
-devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port,
-                                 const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_hw_addr_set(struct devlink_port *port,
+                                            const struct nlattr *attr,
+                                            struct netlink_ext_ack *extack)
  {
-       const struct devlink_ops *ops;
+       const struct devlink_ops *ops = port->devlink->ops;
         const u8 *hw_addr;
         int hw_addr_len;
  
@@ -1314,17 +1315,16 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *
                 }
         }
  
-       ops = devlink->ops;
         if (!ops->port_function_hw_addr_set) {
                 NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes");
                 return -EOPNOTSUPP;
         }
  
-       return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack);
+       return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
+                                             extack);
  }
  
-static int devlink_port_fn_state_set(struct devlink *devlink,
-                                    struct devlink_port *port,
+static int devlink_port_fn_state_set(struct devlink_port *port,
                                      const struct nlattr *attr,
                                      struct netlink_ext_ack *extack)
  {
@@ -1332,18 +1332,18 @@ static int devlink_port_fn_state_set(struct devlink *devlink,
         const struct devlink_ops *ops;
  
         state = nla_get_u8(attr);
-       ops = devlink->ops;
+       ops = port->devlink->ops;
         if (!ops->port_fn_state_set) {
                 NL_SET_ERR_MSG_MOD(extack,
                                    "Function does not support state setting");
                 return -EOPNOTSUPP;
         }
-       return ops->port_fn_state_set(devlink, port, state, extack);
+       return ops->port_fn_state_set(port, state, extack);
  }
  
-static int
-devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
-                         const struct nlattr *attr, struct netlink_ext_ack *extack)
+static int devlink_port_function_set(struct devlink_port *port,
+                                    const struct nlattr *attr,
+                                    struct netlink_ext_ack *extack)
  {
         struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1];
         int err;
@@ -1357,7 +1357,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
  
         attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR];
         if (attr) {
-               err = devlink_port_function_hw_addr_set(devlink, port, attr, extack);
+               err = devlink_port_function_hw_addr_set(port, attr, extack);
                 if (err)
                         return err;
         }
@@ -1367,7 +1367,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port,
          */
         attr = tb[DEVLINK_PORT_FN_ATTR_STATE];
         if (attr)
-               err = devlink_port_fn_state_set(devlink, port, attr, extack);
+               err = devlink_port_fn_state_set(port, attr, extack);
  
         if (!err)
                 devlink_port_notify(port, DEVLINK_CMD_PORT_NEW);
@@ -1378,14 +1378,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
                                         struct genl_info *info)
  {
         struct devlink_port *devlink_port = info->user_ptr[1];
-       struct devlink *devlink = devlink_port->devlink;
         int err;
  
         if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) {
                 enum devlink_port_type port_type;
  
                 port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]);
-               err = devlink_port_type_set(devlink, devlink_port, port_type);
+               err = devlink_port_type_set(devlink_port, port_type);
                 if (err)
                         return err;
         }
@@ -1394,7 +1393,7 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
                 struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION];
                 struct netlink_ext_ack *extack = info->extack;
  
-               err = devlink_port_function_set(devlink, devlink_port, attr, extack);
+               err = devlink_port_function_set(devlink_port, attr, extack);
                 if (err)
                         return err;
         }
@@ -8769,24 +8768,26 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
   *     @ops: ops
   *     @priv_size: size of user private data
   *     @net: net namespace
+ *     @dev: parent device
   *
   *     Allocate new devlink instance resources, including devlink index
   *     and name.
   */
  struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
-                                size_t priv_size, struct net *net)
+                                size_t priv_size, struct net *net,
+                                struct device *dev)
  {
         struct devlink *devlink;
  
-       if (WARN_ON(!ops))
-               return NULL;
-
+       WARN_ON(!ops || !dev);
         if (!devlink_reload_actions_valid(ops))
                 return NULL;
  
         devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
         if (!devlink)
                 return NULL;
+
+       devlink->dev = dev;
         devlink->ops = ops;
         xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
         write_pnet(&devlink->_net, net);
@@ -8811,12 +8812,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc_ns);
   *     devlink_register - Register devlink instance
   *
   *     @devlink: devlink
- *     @dev: parent device
   */
-int devlink_register(struct devlink *devlink, struct device *dev)
+int devlink_register(struct devlink *devlink)
  {
-       WARN_ON(devlink->dev);
-       devlink->dev = dev;
         mutex_lock(&devlink_mutex);
         list_add_tail(&devlink->list, &devlink_list);
         devlink_notify(devlink, DEVLINK_CMD_NEW);
@@ -9327,18 +9325,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
  
         switch (attrs->flavour) {
         case DEVLINK_PORT_FLAVOUR_PHYSICAL:
-       case DEVLINK_PORT_FLAVOUR_VIRTUAL:
                 n = snprintf(name, len, "p%u", attrs->phys.port_number);
                 if (n < len && attrs->split)
                         n += snprintf(name + n, len - n, "s%u",
                                       attrs->phys.split_subport_number);
-               if (!attrs->split)
-                       n = snprintf(name, len, "p%u", attrs->phys.port_number);
-               else
-                       n = snprintf(name, len, "p%us%u",
-                                    attrs->phys.port_number,
-                                    attrs->phys.split_subport_number);
-
                 break;
         case DEVLINK_PORT_FLAVOUR_CPU:
         case DEVLINK_PORT_FLAVOUR_DSA:
@@ -9380,6 +9370,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
                 n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
                              attrs->pci_sf.sf);
                 break;
+       case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+               return -EOPNOTSUPP;
         }
  
         if (n >= len)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c

index ead2a8a..49442ca 100644 (file)
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -850,8 +850,7 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata)
         }
  
         hw_metadata->input_dev = metadata->input_dev;
-       if (hw_metadata->input_dev)
-               dev_hold(hw_metadata->input_dev);
+       dev_hold(hw_metadata->input_dev);
  
         return hw_metadata;
  
@@ -867,8 +866,7 @@ free_hw_metadata:
  static void
  net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata)
  {
-       if (hw_metadata->input_dev)
-               dev_put(hw_metadata->input_dev);
+       dev_put(hw_metadata->input_dev);
         kfree(hw_metadata->fa_cookie);
         kfree(hw_metadata->trap_name);
         kfree(hw_metadata->trap_group_name);
diff --git a/net/core/dst.c b/net/core/dst.c

index fb3bcba..497ef9b 100644 (file)
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -49,8 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
               unsigned short flags)
  {
         dst->dev = dev;
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
         dst->ops = ops;
         dst_init_metrics(dst, dst_default_metrics.metrics, true);
         dst->expires = 0UL;
@@ -118,8 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst)
  
         if (dst->ops->destroy)
                 dst->ops->destroy(dst);
-       if (dst->dev)
-               dev_put(dst->dev);
+       dev_put(dst->dev);
  
         lwtstate_put(dst->lwtstate);
  
diff --git a/net/core/filter.c b/net/core/filter.c

index 3b4986e..3aca07c 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2180,17 +2180,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
         skb->tstamp = 0;
  
         if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, hh_len);
-               if (unlikely(!skb2)) {
-                       kfree_skb(skb);
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb)
                         return -ENOMEM;
-               }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
         }
  
         rcu_read_lock_bh();
@@ -2214,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
         }
         rcu_read_unlock_bh();
         if (dst)
-               IP6_INC_STATS(dev_net(dst->dev),
-                             ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+               IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
  out_drop:
         kfree_skb(skb);
         return -ENETDOWN;
@@ -2287,17 +2278,9 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
         skb->tstamp = 0;
  
         if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, hh_len);
-               if (unlikely(!skb2)) {
-                       kfree_skb(skb);
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb)
                         return -ENOMEM;
-               }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
         }
  
         rcu_read_lock_bh();
@@ -3950,6 +3933,31 @@ void bpf_clear_redirect_map(struct bpf_map *map)
         }
  }
  
+DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
+EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
+
+u32 xdp_master_redirect(struct xdp_buff *xdp)
+{
+       struct net_device *master, *slave;
+       struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+       master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
+       slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
+       if (slave && slave != xdp->rxq->dev) {
+               /* The target device is different from the receiving device, so
+                * redirect it to the new device.
+                * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
+                * drivers to unmap the packet from their rx ring.
+                */
+               ri->tgt_index = slave->ifindex;
+               ri->map_id = INT_MAX;
+               ri->map_type = BPF_MAP_TYPE_UNSPEC;
+               return XDP_REDIRECT;
+       }
+       return XDP_TX;
+}
+EXPORT_SYMBOL_GPL(xdp_master_redirect);
+
  int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
                     struct bpf_prog *xdp_prog)
  {
@@ -5016,6 +5024,40 @@ err_clear:
         return -EINVAL;
  }
  
+BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
+          int, optname, char *, optval, int, optlen)
+{
+       return _bpf_setsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_setsockopt_proto = {
+       .func           = bpf_sk_setsockopt,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
+          int, optname, char *, optval, int, optlen)
+{
+       return _bpf_getsockopt(sk, level, optname, optval, optlen);
+}
+
+const struct bpf_func_proto bpf_sk_getsockopt_proto = {
+       .func           = bpf_sk_getsockopt,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+       .arg4_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg5_type      = ARG_CONST_SIZE,
+};
+
  BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
            int, level, int, optname, char *, optval, int, optlen)
  {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c

index 39d7be0..bac0184 100644 (file)
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1508,7 +1508,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
  }
  EXPORT_SYMBOL(flow_get_u32_dst);
  
-/* Sort the source and destination IP (and the ports if the IP are the same),
+/* Sort the source and destination IP and the ports,
   * to have consistent hash within the two directions
   */
  static inline void __flow_hash_consistentify(struct flow_keys *keys)
@@ -1519,11 +1519,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
                 addr_diff = (__force u32)keys->addrs.v4addrs.dst -
                             (__force u32)keys->addrs.v4addrs.src;
-               if ((addr_diff < 0) ||
-                   (addr_diff == 0 &&
-                    ((__force u16)keys->ports.dst <
-                     (__force u16)keys->ports.src))) {
+               if (addr_diff < 0)
                         swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+
+               if ((__force u16)keys->ports.dst <
+                   (__force u16)keys->ports.src) {
                         swap(keys->ports.src, keys->ports.dst);
                 }
                 break;
@@ -1531,13 +1531,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
                 addr_diff = memcmp(&keys->addrs.v6addrs.dst,
                                    &keys->addrs.v6addrs.src,
                                    sizeof(keys->addrs.v6addrs.dst));
-               if ((addr_diff < 0) ||
-                   (addr_diff == 0 &&
-                    ((__force u16)keys->ports.dst <
-                     (__force u16)keys->ports.src))) {
+               if (addr_diff < 0) {
                         for (i = 0; i < 4; i++)
                                 swap(keys->addrs.v6addrs.src.s6_addr32[i],
                                      keys->addrs.v6addrs.dst.s6_addr32[i]);
+               }
+               if ((__force u16)keys->ports.dst <
+                   (__force u16)keys->ports.src) {
                         swap(keys->ports.src, keys->ports.dst);
                 }
                 break;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c

index 53e85c7..2d5bc3a 100644 (file)
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -741,12 +741,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
         write_pnet(&n->net, net);
         memcpy(n->key, pkey, key_len);
         n->dev = dev;
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
  
         if (tbl->pconstructor && tbl->pconstructor(n)) {
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
                 kfree(n);
                 n = NULL;
                 goto out;
@@ -778,8 +776,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
                         write_unlock_bh(&tbl->lock);
                         if (tbl->pdestructor)
                                 tbl->pdestructor(n);
-                       if (n->dev)
-                               dev_put(n->dev);
+                       dev_put(n->dev);
                         kfree(n);
                         return 0;
                 }
@@ -812,8 +809,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
                 n->next = NULL;
                 if (tbl->pdestructor)
                         tbl->pdestructor(n);
-               if (n->dev)
-                       dev_put(n->dev);
+               dev_put(n->dev);
                 kfree(n);
         }
         return -ENOENT;
@@ -1662,8 +1658,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
         list_del(&parms->list);
         parms->dead = 1;
         write_unlock_bh(&tbl->lock);
-       if (parms->dev)
-               dev_put(parms->dev);
+       dev_put(parms->dev);
         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
  }
  EXPORT_SYMBOL(neigh_parms_release);
@@ -2533,6 +2528,13 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
                 return false;
  
         master = dev ? netdev_master_upper_dev_get(dev) : NULL;
+
+       /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another
+        * invalid value for ifindex to denote "no master".
+        */
+       if (master_idx == -1)
+               return !!master;
+
         if (!master || master->ifindex != master_idx)
                 return true;
  
@@ -3315,12 +3317,13 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
         struct neigh_statistics *st = v;
  
         if (v == SEQ_START_TOKEN) {
-               seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
+               seq_puts(seq, "entries  allocs   destroys hash_grows lookups  hits     res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
                 return 0;
         }
  
-       seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
-                       "%08lx %08lx  %08lx %08lx %08lx %08lx\n",
+       seq_printf(seq, "%08x %08lx %08lx %08lx   %08lx %08lx %08lx   "
+                       "%08lx         %08lx         %08lx         "
+                       "%08lx       %08lx            %08lx\n",
                    atomic_read(&tbl->entries),
  
                    st->allocs,
diff --git a/net/core/page_pool.c b/net/core/page_pool.c

index 5e4eb45..ac11604 100644 (file)
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -24,6 +24,8 @@
  #define DEFER_TIME (msecs_to_jiffies(1000))
  #define DEFER_WARN_INTERVAL (60 * HZ)
  
+#define BIAS_MAX       LONG_MAX
+
  static int page_pool_init(struct page_pool *pool,
                           const struct page_pool_params *params)
  {
@@ -67,6 +69,10 @@ static int page_pool_init(struct page_pool *pool,
                  */
         }
  
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+           pool->p.flags & PP_FLAG_PAGE_FRAG)
+               return -EINVAL;
+
         if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
                 return -ENOMEM;
  
@@ -206,6 +212,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
         return true;
  }
  
+static void page_pool_set_pp_info(struct page_pool *pool,
+                                 struct page *page)
+{
+       page->pp = pool;
+       page->pp_magic |= PP_SIGNATURE;
+}
+
+static void page_pool_clear_pp_info(struct page *page)
+{
+       page->pp_magic = 0;
+       page->pp = NULL;
+}
+
  static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
                                                  gfp_t gfp)
  {
@@ -222,7 +241,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
                 return NULL;
         }
  
-       page->pp_magic |= PP_SIGNATURE;
+       page_pool_set_pp_info(pool, page);
  
         /* Track how many pages are held 'in-flight' */
         pool->pages_state_hold_cnt++;
@@ -266,7 +285,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
                         put_page(page);
                         continue;
                 }
-               page->pp_magic |= PP_SIGNATURE;
+
+               page_pool_set_pp_info(pool, page);
                 pool->alloc.cache[pool->alloc.count++] = page;
                 /* Track how many pages are held 'in-flight' */
                 pool->pages_state_hold_cnt++;
@@ -345,7 +365,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
                              DMA_ATTR_SKIP_CPU_SYNC);
         page_pool_set_dma_addr(page, 0);
  skip_dma_unmap:
-       page->pp_magic = 0;
+       page_pool_clear_pp_info(page);
  
         /* This may be the last page returned, releasing the pool, so
          * it is not safe to reference pool afterwards.
@@ -405,6 +425,11 @@ static __always_inline struct page *
  __page_pool_put_page(struct page_pool *pool, struct page *page,
                      unsigned int dma_sync_size, bool allow_direct)
  {
+       /* It is not the last user for the page frag case */
+       if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
+           page_pool_atomic_sub_frag_count_return(page, 1))
+               return NULL;
+
         /* This allocator is optimized for the XDP mode that uses
          * one-frame-per-page, but have fallbacks that act like the
          * regular page allocator APIs.
@@ -497,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
  }
  EXPORT_SYMBOL(page_pool_put_page_bulk);
  
+static struct page *page_pool_drain_frag(struct page_pool *pool,
+                                        struct page *page)
+{
+       long drain_count = BIAS_MAX - pool->frag_users;
+
+       /* Some user is still using the page frag */
+       if (likely(page_pool_atomic_sub_frag_count_return(page,
+                                                         drain_count)))
+               return NULL;
+
+       if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
+               if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+                       page_pool_dma_sync_for_device(pool, page, -1);
+
+               return page;
+       }
+
+       page_pool_return_page(pool, page);
+       return NULL;
+}
+
+static void page_pool_free_frag(struct page_pool *pool)
+{
+       long drain_count = BIAS_MAX - pool->frag_users;
+       struct page *page = pool->frag_page;
+
+       pool->frag_page = NULL;
+
+       if (!page ||
+           page_pool_atomic_sub_frag_count_return(page, drain_count))
+               return;
+
+       page_pool_return_page(pool, page);
+}
+
+struct page *page_pool_alloc_frag(struct page_pool *pool,
+                                 unsigned int *offset,
+                                 unsigned int size, gfp_t gfp)
+{
+       unsigned int max_size = PAGE_SIZE << pool->p.order;
+       struct page *page = pool->frag_page;
+
+       if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
+                   size > max_size))
+               return NULL;
+
+       size = ALIGN(size, dma_get_cache_alignment());
+       *offset = pool->frag_offset;
+
+       if (page && *offset + size > max_size) {
+               page = page_pool_drain_frag(pool, page);
+               if (page)
+                       goto frag_reset;
+       }
+
+       if (!page) {
+               page = page_pool_alloc_pages(pool, gfp);
+               if (unlikely(!page)) {
+                       pool->frag_page = NULL;
+                       return NULL;
+               }
+
+               pool->frag_page = page;
+
+frag_reset:
+               pool->frag_users = 1;
+               *offset = 0;
+               pool->frag_offset = size;
+               page_pool_set_frag_count(page, BIAS_MAX);
+               return page;
+       }
+
+       pool->frag_users++;
+       pool->frag_offset = *offset + size;
+       return page;
+}
+EXPORT_SYMBOL(page_pool_alloc_frag);
+
  static void page_pool_empty_ring(struct page_pool *pool)
  {
         struct page *page;
@@ -602,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool)
         if (!page_pool_put(pool))
                 return;
  
+       page_pool_free_frag(pool);
+
         if (!page_pool_release(pool))
                 return;
  
@@ -644,7 +749,6 @@ bool page_pool_return_skb_page(struct page *page)
          * The page will be returned to the pool here regardless of the
          * 'flipped' fragment being in use or not.
          */
-       page->pp = NULL;
         page_pool_put_full_page(pp, page, false);
  
         return true;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c

index 7e258d2..314f97a 100644 (file)
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1190,11 +1190,6 @@ static ssize_t pktgen_if_write(struct file *file,
                          * pktgen_xmit() is called
                          */
                         pkt_dev->last_ok = 1;
-
-                       /* override clone_skb if user passed default value
-                        * at module loading time
-                        */
-                       pkt_dev->clone_skb = 0;
                 } else if (strcmp(f, "queue_xmit") == 0) {
                         pkt_dev->xmit_mode = M_QUEUE_XMIT;
                         pkt_dev->last_ok = 1;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c

index e79aaf1..2dcf1c0 100644 (file)
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1959,6 +1959,13 @@ static bool link_master_filtered(struct net_device *dev, int master_idx)
                 return false;
  
         master = netdev_master_upper_dev_get(dev);
+
+       /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need
+        * another invalid value for ifindex to denote "no master".
+        */
+       if (master_idx == -1)
+               return !!master;
+
         if (!master || master->ifindex != master_idx)
                 return true;
  
@@ -2257,7 +2264,8 @@ invalid_attr:
         return -EINVAL;
  }
  
-static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
+static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
+                           struct netlink_ext_ack *extack)
  {
         if (dev) {
                 if (tb[IFLA_ADDRESS] &&
@@ -2284,7 +2292,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
                                 return -EOPNOTSUPP;
  
                         if (af_ops->validate_link_af) {
-                               err = af_ops->validate_link_af(dev, af);
+                               err = af_ops->validate_link_af(dev, af, extack);
                                 if (err < 0)
                                         return err;
                         }
@@ -2592,7 +2600,7 @@ static int do_setlink(const struct sk_buff *skb,
         const struct net_device_ops *ops = dev->netdev_ops;
         int err;
  
-       err = validate_linkmsg(dev, tb);
+       err = validate_linkmsg(dev, tb, extack);
         if (err < 0)
                 return err;
  
@@ -3290,7 +3298,7 @@ replay:
                         m_ops = master_dev->rtnl_link_ops;
         }
  
-       err = validate_linkmsg(dev, tb);
+       err = validate_linkmsg(dev, tb, extack);
         if (err < 0)
                 return err;
  
diff --git a/net/core/skbuff.c b/net/core/skbuff.c

index fcbd977..9240af2 100644 (file)
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1789,6 +1789,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  }
  EXPORT_SYMBOL(skb_realloc_headroom);
  
+/**
+ *     skb_expand_head - reallocate header of &sk_buff
+ *     @skb: buffer to reallocate
+ *     @headroom: needed headroom
+ *
+ *     Unlike skb_realloc_headroom, this one does not allocate a new skb
+ *     if possible; copies skb->sk to new skb as needed
+ *     and frees original skb in case of failures.
+ *
+ *     It expect increased headroom and generates warning otherwise.
+ */
+
+struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
+{
+       int delta = headroom - skb_headroom(skb);
+
+       if (WARN_ONCE(delta <= 0,
+                     "%s is expecting an increase in the headroom", __func__))
+               return skb;
+
+       /* pskb_expand_head() might crash, if skb is shared */
+       if (skb_shared(skb)) {
+               struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+               if (likely(nskb)) {
+                       if (skb->sk)
+                               skb_set_owner_w(nskb, skb->sk);
+                       consume_skb(skb);
+               } else {
+                       kfree_skb(skb);
+               }
+               skb = nskb;
+       }
+       if (skb &&
+           pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+               kfree_skb(skb);
+               skb = NULL;
+       }
+       return skb;
+}
+EXPORT_SYMBOL(skb_expand_head);
+
  /**
   *     skb_copy_expand -       copy and expand sk_buff
   *     @skb: buffer to copy
@@ -4327,7 +4369,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
                 memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
                 /* We dont need to clear skbinfo->nr_frags here */
  
-               new_truesize = SKB_TRUESIZE(sizeof(struct sk_buff));
+               new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff));
                 delta_truesize = skb->truesize - new_truesize;
                 skb->truesize = new_truesize;
                 NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c

index 15d7128..2d6249b 100644 (file)
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -584,29 +584,42 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
         return sk_psock_skb_ingress(psock, skb);
  }
  
-static void sock_drop(struct sock *sk, struct sk_buff *skb)
+static void sk_psock_skb_state(struct sk_psock *psock,
+                              struct sk_psock_work_state *state,
+                              struct sk_buff *skb,
+                              int len, int off)
  {
-       sk_drops_add(sk, skb);
-       kfree_skb(skb);
+       spin_lock_bh(&psock->ingress_lock);
+       if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+               state->skb = skb;
+               state->len = len;
+               state->off = off;
+       } else {
+               sock_drop(psock->sk, skb);
+       }
+       spin_unlock_bh(&psock->ingress_lock);
  }
  
  static void sk_psock_backlog(struct work_struct *work)
  {
         struct sk_psock *psock = container_of(work, struct sk_psock, work);
         struct sk_psock_work_state *state = &psock->work_state;
-       struct sk_buff *skb;
+       struct sk_buff *skb = NULL;
         bool ingress;
         u32 len, off;
         int ret;
  
         mutex_lock(&psock->work_mutex);
-       if (state->skb) {
+       if (unlikely(state->skb)) {
+               spin_lock_bh(&psock->ingress_lock);
                 skb = state->skb;
                 len = state->len;
                 off = state->off;
                 state->skb = NULL;
-               goto start;
+               spin_unlock_bh(&psock->ingress_lock);
         }
+       if (skb)
+               goto start;
  
         while ((skb = skb_dequeue(&psock->ingress_skb))) {
                 len = skb->len;
@@ -621,9 +634,8 @@ start:
                                                           len, ingress);
                         if (ret <= 0) {
                                 if (ret == -EAGAIN) {
-                                       state->skb = skb;
-                                       state->len = len;
-                                       state->off = off;
+                                       sk_psock_skb_state(psock, state, skb,
+                                                          len, off);
                                         goto end;
                                 }
                                 /* Hard errors break pipe and stop xmit. */
@@ -722,6 +734,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
                 skb_bpf_redirect_clear(skb);
                 sock_drop(psock->sk, skb);
         }
+       kfree_skb(psock->work_state.skb);
+       /* We null the skb here to ensure that calls to sk_psock_backlog
+        * do not pick up the free'd skb.
+        */
+       psock->work_state.skb = NULL;
         __sk_psock_purge_ingress_msg(psock);
  }
  
@@ -773,8 +790,6 @@ static void sk_psock_destroy(struct work_struct *work)
  
  void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
  {
-       sk_psock_stop(psock, false);
-
         write_lock_bh(&sk->sk_callback_lock);
         sk_psock_restore_proto(sk, psock);
         rcu_assign_sk_user_data(sk, NULL);
@@ -784,6 +799,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
                 sk_psock_stop_verdict(sk, psock);
         write_unlock_bh(&sk->sk_callback_lock);
  
+       sk_psock_stop(psock, false);
+
         INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
         queue_rcu_work(system_wq, &psock->rwork);
  }
diff --git a/net/core/sock.c b/net/core/sock.c

index 9671c32..aada649 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1358,6 +1358,15 @@ set_sndbuf:
                 ret = sock_bindtoindex_locked(sk, val);
                 break;
  
+       case SO_BUF_LOCK:
+               if (val & ~SOCK_BUF_LOCK_MASK) {
+                       ret = -EINVAL;
+                       break;
+               }
+               sk->sk_userlocks = val | (sk->sk_userlocks &
+                                         ~SOCK_BUF_LOCK_MASK);
+               break;
+
         default:
                 ret = -ENOPROTOOPT;
                 break;
@@ -1720,6 +1729,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
                 v.val64 = sock_net(sk)->net_cookie;
                 break;
  
+       case SO_BUF_LOCK:
+               v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK;
+               break;
+
         default:
                 /* We implement the SO_SNDLOWAT etc to not be settable
                  * (1003.1g 7).
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c

index d1c50a4..0ee7d4c 100644 (file)
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -521,8 +521,7 @@ int dn_dev_set_default(struct net_device *dev, int force)
         }
         spin_unlock(&dndev_lock);
  
-       if (old)
-               dev_put(old);
+       dev_put(old);
         return rv;
  }
  
@@ -536,8 +535,7 @@ static void dn_dev_check_default(struct net_device *dev)
         }
         spin_unlock(&dndev_lock);
  
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
  }
  
  /*
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c

index 387a7e8..269c029 100644 (file)
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -92,8 +92,7 @@ void dn_fib_free_info(struct dn_fib_info *fi)
         }
  
         change_nexthops(fi) {
-               if (nh->nh_dev)
-                       dev_put(nh->nh_dev);
+               dev_put(nh->nh_dev);
                 nh->nh_dev = NULL;
         } endfor_nexthops(fi);
         kfree(fi);
@@ -389,7 +388,7 @@ link_it:
                 return ofi;
         }
  
-       refcount_inc(&fi->fib_treeref);
+       refcount_set(&fi->fib_treeref, 1);
         refcount_set(&fi->fib_clntref, 1);
         spin_lock(&dn_fib_info_lock);
         fi->fib_next = dn_fib_info_list;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c

index 729d3de..7e85f2a 100644 (file)
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1026,8 +1026,7 @@ source_ok:
         if (!fld.daddr) {
                 fld.daddr = fld.saddr;
  
-               if (dev_out)
-                       dev_put(dev_out);
+               dev_put(dev_out);
                 err = -EINVAL;
                 dev_out = init_net.loopback_dev;
                 if (!dev_out->dn_ptr)
@@ -1084,8 +1083,7 @@ source_ok:
                                         neigh_release(neigh);
                                         neigh = NULL;
                                 } else {
-                                       if (dev_out)
-                                               dev_put(dev_out);
+                                       dev_put(dev_out);
                                         if (dn_dev_islocal(neigh->dev, fld.daddr)) {
                                                 dev_out = init_net.loopback_dev;
                                                 res.type = RTN_LOCAL;
@@ -1144,8 +1142,7 @@ select_source:
         if (res.type == RTN_LOCAL) {
                 if (!fld.saddr)
                         fld.saddr = fld.daddr;
-               if (dev_out)
-                       dev_put(dev_out);
+               dev_put(dev_out);
                 dev_out = init_net.loopback_dev;
                 dev_hold(dev_out);
                 if (!dev_out->dn_ptr)
@@ -1168,8 +1165,7 @@ select_source:
         if (!fld.saddr)
                 fld.saddr = DN_FIB_RES_PREFSRC(res);
  
-       if (dev_out)
-               dev_put(dev_out);
+       dev_put(dev_out);
         dev_out = DN_FIB_RES_DEV(res);
         dev_hold(dev_out);
         fld.flowidn_oif = dev_out->ifindex;
@@ -1222,8 +1218,7 @@ done:
                 neigh_release(neigh);
         if (free_res)
                 dn_fib_res_put(&res);
-       if (dev_out)
-               dev_put(dev_out);
+       dev_put(dev_out);
  out:
         return err;
  
@@ -1503,8 +1498,7 @@ done:
         if (free_res)
                 dn_fib_res_put(&res);
         dev_put(in_dev);
-       if (out_dev)
-               dev_put(out_dev);
+       dev_put(out_dev);
  out:
         return err;
  
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig

index bca1b5d..970906e 100644 (file)
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -138,6 +138,7 @@ config NET_DSA_TAG_LAN9303
  
  config NET_DSA_TAG_SJA1105
         tristate "Tag driver for NXP SJA1105 switches"
+       depends on (NET_DSA_SJA1105 && NET_DSA_SJA1105_PTP) || !NET_DSA_SJA1105 || !NET_DSA_SJA1105_PTP
         select PACKING
         help
           Say Y or M if you want to enable support for tagging frames with the
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c

index 84cad1b..1dc45e4 100644 (file)
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -238,7 +238,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
         if (!skb)
                 return 0;
  
-       nskb = cpu_dp->rcv(skb, dev, pt);
+       nskb = cpu_dp->rcv(skb, dev);
         if (!nskb) {
                 kfree_skb(skb);
                 return 0;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c

index c7fa85f..8150e16 100644 (file)
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -311,6 +311,9 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst)
         return NULL;
  }
  
+/* Assign the default CPU port (the first one in the tree) to all ports of the
+ * fabric which don't already have one as part of their own switch.
+ */
  static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
  {
         struct dsa_port *cpu_dp, *dp;
@@ -321,15 +324,48 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst)
                 return -EINVAL;
         }
  
-       /* Assign the default CPU port to all ports of the fabric */
-       list_for_each_entry(dp, &dst->ports, list)
+       list_for_each_entry(dp, &dst->ports, list) {
+               if (dp->cpu_dp)
+                       continue;
+
                 if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
                         dp->cpu_dp = cpu_dp;
+       }
  
         return 0;
  }
  
-static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst)
+/* Perform initial assignment of CPU ports to user ports and DSA links in the
+ * fabric, giving preference to CPU ports local to each switch. Default to
+ * using the first CPU port in the switch tree if the port does not have a CPU
+ * port local to this switch.
+ */
+static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst)
+{
+       struct dsa_port *cpu_dp, *dp;
+
+       list_for_each_entry(cpu_dp, &dst->ports, list) {
+               if (!dsa_port_is_cpu(cpu_dp))
+                       continue;
+
+               list_for_each_entry(dp, &dst->ports, list) {
+                       /* Prefer a local CPU port */
+                       if (dp->ds != cpu_dp->ds)
+                               continue;
+
+                       /* Prefer the first local CPU port found */
+                       if (dp->cpu_dp)
+                               continue;
+
+                       if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp))
+                               dp->cpu_dp = cpu_dp;
+               }
+       }
+
+       return dsa_tree_setup_default_cpu(dst);
+}
+
+static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst)
  {
         struct dsa_port *dp;
  
@@ -710,13 +746,14 @@ static int dsa_switch_setup(struct dsa_switch *ds)
         /* Add the switch to devlink before calling setup, so that setup can
          * add dpipe tables
          */
-       ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv));
+       ds->devlink =
+               devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev);
         if (!ds->devlink)
                 return -ENOMEM;
         dl_priv = devlink_priv(ds->devlink);
         dl_priv->ds = ds;
  
-       err = devlink_register(ds->devlink, ds->dev);
+       err = devlink_register(ds->devlink);
         if (err)
                 goto free_devlink;
  
@@ -921,13 +958,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
         if (!complete)
                 return 0;
  
-       err = dsa_tree_setup_default_cpu(dst);
+       err = dsa_tree_setup_cpu_ports(dst);
         if (err)
                 return err;
  
         err = dsa_tree_setup_switches(dst);
         if (err)
-               goto teardown_default_cpu;
+               goto teardown_cpu_ports;
  
         err = dsa_tree_setup_master(dst);
         if (err)
@@ -947,8 +984,8 @@ teardown_master:
         dsa_tree_teardown_master(dst);
  teardown_switches:
         dsa_tree_teardown_switches(dst);
-teardown_default_cpu:
-       dsa_tree_teardown_default_cpu(dst);
+teardown_cpu_ports:
+       dsa_tree_teardown_cpu_ports(dst);
  
         return err;
  }
@@ -966,7 +1003,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
  
         dsa_tree_teardown_switches(dst);
  
-       dsa_tree_teardown_default_cpu(dst);
+       dsa_tree_teardown_cpu_ports(dst);
  
         list_for_each_entry_safe(dl, next, &dst->rtable, list) {
                 list_del(&dl->list);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h

index e43c5dc..9575cab 100644 (file)
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -199,7 +199,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
  /* port.c */
  void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp,
                                const struct dsa_device_ops *tag_ops);
-int dsa_port_set_state(struct dsa_port *dp, u8 state);
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age);
  int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy);
  int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
  void dsa_port_disable_rt(struct dsa_port *dp);
@@ -241,11 +241,9 @@ int dsa_port_host_mdb_del(const struct dsa_port *dp,
  int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
                               struct switchdev_brport_flags flags,
                               struct netlink_ext_ack *extack);
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
                           struct switchdev_brport_flags flags,
                           struct netlink_ext_ack *extack);
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
-                    struct netlink_ext_ack *extack);
  int dsa_port_vlan_add(struct dsa_port *dp,
                       const struct switchdev_obj_port_vlan *vlan,
                       struct netlink_ext_ack *extack);
diff --git a/net/dsa/port.c b/net/dsa/port.c

index b927d94..831d50d 100644 (file)
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -30,7 +30,52 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
         return dsa_tree_notify(dp->ds->dst, e, v);
  }
  
-int dsa_port_set_state(struct dsa_port *dp, u8 state)
+static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp)
+{
+       struct net_device *brport_dev = dsa_port_to_bridge_port(dp);
+       struct switchdev_notifier_fdb_info info = {
+               /* flush all VLANs */
+               .vid = 0,
+       };
+
+       /* When the port becomes standalone it has already left the bridge.
+        * Don't notify the bridge in that case.
+        */
+       if (!brport_dev)
+               return;
+
+       call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE,
+                                brport_dev, &info.info, NULL);
+}
+
+static void dsa_port_fast_age(const struct dsa_port *dp)
+{
+       struct dsa_switch *ds = dp->ds;
+
+       if (!ds->ops->port_fast_age)
+               return;
+
+       ds->ops->port_fast_age(ds, dp->index);
+
+       dsa_port_notify_bridge_fdb_flush(dp);
+}
+
+static bool dsa_port_can_configure_learning(struct dsa_port *dp)
+{
+       struct switchdev_brport_flags flags = {
+               .mask = BR_LEARNING,
+       };
+       struct dsa_switch *ds = dp->ds;
+       int err;
+
+       if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags)
+               return false;
+
+       err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL);
+       return !err;
+}
+
+int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age)
  {
         struct dsa_switch *ds = dp->ds;
         int port = dp->index;
@@ -40,10 +85,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
  
         ds->ops->port_stp_state_set(ds, port, state);
  
-       if (ds->ops->port_fast_age) {
+       if (!dsa_port_can_configure_learning(dp) ||
+           (do_fast_age && dp->learning)) {
                 /* Fast age FDB entries or flush appropriate forwarding database
                  * for the given port, if we are moving it from Learning or
                  * Forwarding state, to Disabled or Blocking or Listening state.
+                * Ports that were standalone before the STP state change don't
+                * need to fast age the FDB, since address learning is off in
+                * standalone mode.
                  */
  
                 if ((dp->stp_state == BR_STATE_LEARNING ||
@@ -51,7 +100,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
                     (state == BR_STATE_DISABLED ||
                      state == BR_STATE_BLOCKING ||
                      state == BR_STATE_LISTENING))
-                       ds->ops->port_fast_age(ds, port);
+                       dsa_port_fast_age(dp);
         }
  
         dp->stp_state = state;
@@ -59,11 +108,12 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state)
         return 0;
  }
  
-static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state,
+                                  bool do_fast_age)
  {
         int err;
  
-       err = dsa_port_set_state(dp, state);
+       err = dsa_port_set_state(dp, state, do_fast_age);
         if (err)
                 pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
  }
@@ -81,7 +131,7 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy)
         }
  
         if (!dp->bridge_dev)
-               dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+               dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false);
  
         if (dp->pl)
                 phylink_start(dp->pl);
@@ -109,7 +159,7 @@ void dsa_port_disable_rt(struct dsa_port *dp)
                 phylink_stop(dp->pl);
  
         if (!dp->bridge_dev)
-               dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+               dsa_port_set_state_now(dp, BR_STATE_DISABLED, false);
  
         if (ds->ops->port_disable)
                 ds->ops->port_disable(ds, port);
@@ -178,7 +228,7 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
         if (err)
                 return err;
  
-       err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev));
+       err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false);
         if (err && err != -EOPNOTSUPP)
                 return err;
  
@@ -186,10 +236,6 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp,
         if (err && err != -EOPNOTSUPP)
                 return err;
  
-       err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack);
-       if (err && err != -EOPNOTSUPP)
-               return err;
-
         err = dsa_port_ageing_time(dp, br_get_ageing_time(br));
         if (err && err != -EOPNOTSUPP)
                 return err;
@@ -215,16 +261,10 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp)
         /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
          * so allow it to be in BR_STATE_FORWARDING to be kept functional
          */
-       dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
+       dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true);
  
         /* VLAN filtering is handled by dsa_switch_bridge_leave */
  
-       /* Some drivers treat the notification for having a local multicast
-        * router by allowing multicast to be flooded to the CPU, so we should
-        * allow this in standalone mode too.
-        */
-       dsa_port_mrouter(dp->cpu_dp, true, NULL);
-
         /* Ageing time may be global to the switch chip, so don't change it
          * here because we have no good reason (or value) to change it to.
          */
@@ -639,27 +679,35 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp,
         return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack);
  }
  
-int dsa_port_bridge_flags(const struct dsa_port *dp,
+int dsa_port_bridge_flags(struct dsa_port *dp,
                           struct switchdev_brport_flags flags,
                           struct netlink_ext_ack *extack)
  {
         struct dsa_switch *ds = dp->ds;
+       int err;
  
         if (!ds->ops->port_bridge_flags)
                 return -EOPNOTSUPP;
  
-       return ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
-}
+       err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack);
+       if (err)
+               return err;
  
-int dsa_port_mrouter(struct dsa_port *dp, bool mrouter,
-                    struct netlink_ext_ack *extack)
-{
-       struct dsa_switch *ds = dp->ds;
+       if (flags.mask & BR_LEARNING) {
+               bool learning = flags.val & BR_LEARNING;
  
-       if (!ds->ops->port_set_mrouter)
-               return -EOPNOTSUPP;
+               if (learning == dp->learning)
+                       return 0;
+
+               if ((dp->learning && !learning) &&
+                   (dp->stp_state == BR_STATE_LEARNING ||
+                    dp->stp_state == BR_STATE_FORWARDING))
+                       dsa_port_fast_age(dp);
  
-       return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack);
+               dp->learning = learning;
+       }
+
+       return 0;
  }
  
  int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
diff --git a/net/dsa/slave.c b/net/dsa/slave.c

index 6e1135d..acf73db 100644 (file)
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -286,7 +286,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
                 if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
                         return -EOPNOTSUPP;
  
-               ret = dsa_port_set_state(dp, attr->u.stp_state);
+               ret = dsa_port_set_state(dp, attr->u.stp_state, true);
                 break;
         case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
                 if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
@@ -314,12 +314,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx,
  
                 ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
                 break;
-       case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
-               if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
-                       return -EOPNOTSUPP;
-
-               ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
-               break;
         default:
                 ret = -EOPNOTSUPP;
                 break;
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c

index 0efae1a..8a02ac4 100644 (file)
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -44,8 +44,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
  }
  
  static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
-                                     struct net_device *ndev,
-                                     struct packet_type *pt)
+                                     struct net_device *ndev)
  {
         u8 ver, port;
         u16 hdr;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c

index a27f509..96e93b5 100644 (file)
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -136,7 +136,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb,
   */
  static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb,
                                        struct net_device *dev,
-                                      struct packet_type *pt,
                                        unsigned int offset)
  {
         int source_port;
@@ -182,13 +181,12 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb,
  }
  
  
-static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-                                   struct packet_type *pt)
+static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         struct sk_buff *nskb;
  
         /* skb->data points to the EtherType, the tag is right before it */
-       nskb = brcm_tag_rcv_ll(skb, dev, pt, 2);
+       nskb = brcm_tag_rcv_ll(skb, dev, 2);
         if (!nskb)
                 return nskb;
  
@@ -251,8 +249,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb,
  }
  
  static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb,
-                                       struct net_device *dev,
-                                       struct packet_type *pt)
+                                       struct net_device *dev)
  {
         int source_port;
         u8 *brcm_tag;
@@ -302,11 +299,10 @@ static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb,
  }
  
  static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
-                                           struct net_device *dev,
-                                           struct packet_type *pt)
+                                           struct net_device *dev)
  {
         /* tag is prepended to the packet */
-       return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN);
+       return brcm_tag_rcv_ll(skb, dev, ETH_HLEN);
  }
  
  static const struct dsa_device_ops brcm_prepend_netdev_ops = {
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c

index 3607499..e32f816 100644 (file)
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -332,8 +332,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
         return dsa_xmit_ll(skb, dev, 0);
  }
  
-static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
-                              struct packet_type *pt)
+static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         if (unlikely(!pskb_may_pull(skb, DSA_HLEN)))
                 return NULL;
@@ -373,8 +372,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
         return skb;
  }
  
-static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
-                               struct packet_type *pt)
+static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         if (unlikely(!pskb_may_pull(skb, EDSA_HLEN)))
                 return NULL;
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c

index 5985dab..df71409 100644 (file)
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -75,8 +75,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb,
  }
  
  static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
-                                    struct net_device *dev,
-                                    struct packet_type *pt)
+                                    struct net_device *dev)
  {
         int port;
         u8 *gswip_tag;
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c

index c41208c..f64b805 100644 (file)
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -29,8 +29,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb,
  }
  
  static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
-                                    struct net_device *dev,
-                                    struct packet_type *pt)
+                                    struct net_device *dev)
  {
         /* Tag decoding */
         u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN;
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c

index 1c2dfa8..fa1d60d 100644 (file)
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -67,8 +67,7 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
         return skb;
  }
  
-static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
-                                 struct packet_type *pt)
+static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
  
@@ -134,8 +133,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
         return skb;
  }
  
-static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         /* Tag decoding */
         u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c

index cf7cf2f..58d3a0e 100644 (file)
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -74,8 +74,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
         return skb;
  }
  
-static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         __be16 *lan9303_tag;
         u16 lan9303_tag1;
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c

index 3fb80e4..bbf37c0 100644 (file)
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -61,8 +61,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
         return skb;
  }
  
-static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         u16 hdr;
         int port;
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c

index 3252634..d37ab98 100644 (file)
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -55,8 +55,7 @@ static struct sk_buff *seville_xmit(struct sk_buff *skb,
  }
  
  static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
-                                 struct net_device *netdev,
-                                 struct packet_type *pt)
+                                 struct net_device *netdev)
  {
         u64 src_port, qos_class;
         u64 vlan_tci, tag_type;
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c

index c95de71..3038a25 100644 (file)
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -38,8 +38,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
  }
  
  static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
-                                 struct net_device *netdev,
-                                 struct packet_type *pt)
+                                 struct net_device *netdev)
  {
         int src_port, switch_id;
  
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c

index 693bda0..6e31369 100644 (file)
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -48,8 +48,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
         return skb;
  }
  
-static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         u8 ver;
         u16  hdr;
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c

index f6b63aa..aaddca3 100644 (file)
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -64,8 +64,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
  }
  
  static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
-                                    struct net_device *dev,
-                                    struct packet_type *pt)
+                                    struct net_device *dev)
  {
         u16 protport;
         __be16 *p;
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c

index 664cb80..38b2792 100644 (file)
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -391,8 +391,7 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port,
  }
  
  static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
-                                  struct net_device *netdev,
-                                  struct packet_type *pt)
+                                  struct net_device *netdev)
  {
         int source_port = -1, switch_id = -1;
         struct sja1105_meta meta = {0};
@@ -546,12 +545,11 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb,
  }
  
  static struct sk_buff *sja1110_rcv(struct sk_buff *skb,
-                                  struct net_device *netdev,
-                                  struct packet_type *pt)
+                                  struct net_device *netdev)
  {
         int source_port = -1, switch_id = -1;
         bool host_only = false;
-       u16 vid;
+       u16 vid = 0;
  
         if (sja1110_skb_has_inband_control_extension(skb)) {
                 skb = sja1110_rcv_inband_control_extension(skb, &source_port,
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c

index ba73804..5749ba8 100644 (file)
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -24,8 +24,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
         return skb;
  }
  
-static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         u8 *trailer;
         int source_port;
diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c

index da231c1..ff442b8 100644 (file)
--- a/net/dsa/tag_xrs700x.c
+++ b/net/dsa/tag_xrs700x.c
@@ -25,8 +25,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev)
         return skb;
  }
  
-static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev,
-                                  struct packet_type *pt)
+static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev)
  {
         int source_port;
         u8 *trailer;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c

index 171ba75..73fce94 100644 (file)
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -62,8 +62,6 @@
  #include <linux/uaccess.h>
  #include <net/pkt_sched.h>
  
-__setup("ether=", netdev_boot_setup);
-
  /**
   * eth_header - create the Ethernet header
   * @skb:       buffer to alter
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c

index b0fa2b0..81fa36a 100644 (file)
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -24,6 +24,7 @@
  #include <linux/rtnetlink.h>
  #include <linux/sched/signal.h>
  #include <linux/net.h>
+#include <linux/pm_runtime.h>
  #include <net/devlink.h>
  #include <net/xdp_sock_drv.h>
  #include <net/flow_offload.h>
@@ -2692,7 +2693,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
         int rc;
         netdev_features_t old_features;
  
-       if (!dev || !netif_device_present(dev))
+       if (!dev)
                 return -ENODEV;
  
         if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
@@ -2748,10 +2749,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
                         return -EPERM;
         }
  
+       if (dev->dev.parent)
+               pm_runtime_get_sync(dev->dev.parent);
+
+       if (!netif_device_present(dev)) {
+               rc = -ENODEV;
+               goto out;
+       }
+
         if (dev->ethtool_ops->begin) {
                 rc = dev->ethtool_ops->begin(dev);
-               if (rc  < 0)
-                       return rc;
+               if (rc < 0)
+                       goto out;
         }
         old_features = dev->features;
  
@@ -2970,6 +2979,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr)
  
         if (old_features != dev->features)
                 netdev_features_change(dev);
+out:
+       if (dev->dev.parent)
+               pm_runtime_put(dev->dev.parent);
  
         return rc;
  }
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c

index 73e0f5b..1797a0a 100644 (file)
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -2,6 +2,7 @@
  
  #include <net/sock.h>
  #include <linux/ethtool_netlink.h>
+#include <linux/pm_runtime.h>
  #include "netlink.h"
  
  static struct genl_family ethtool_genl_family;
@@ -29,6 +30,44 @@ const struct nla_policy ethnl_header_policy_stats[] = {
                                                           ETHTOOL_FLAGS_STATS),
  };
  
+int ethnl_ops_begin(struct net_device *dev)
+{
+       int ret;
+
+       if (!dev)
+               return -ENODEV;
+
+       if (dev->dev.parent)
+               pm_runtime_get_sync(dev->dev.parent);
+
+       if (!netif_device_present(dev)) {
+               ret = -ENODEV;
+               goto err;
+       }
+
+       if (dev->ethtool_ops->begin) {
+               ret = dev->ethtool_ops->begin(dev);
+               if (ret)
+                       goto err;
+       }
+
+       return 0;
+err:
+       if (dev->dev.parent)
+               pm_runtime_put(dev->dev.parent);
+
+       return ret;
+}
+
+void ethnl_ops_complete(struct net_device *dev)
+{
+       if (dev->ethtool_ops->complete)
+               dev->ethtool_ops->complete(dev);
+
+       if (dev->dev.parent)
+               pm_runtime_put(dev->dev.parent);
+}
+
  /**
   * ethnl_parse_header_dev_get() - parse request header
   * @req_info:    structure to put results into
@@ -101,12 +140,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
                 return -EINVAL;
         }
  
-       if (dev && !netif_device_present(dev)) {
-               dev_put(dev);
-               NL_SET_ERR_MSG(extack, "device not present");
-               return -ENODEV;
-       }
-
         req_info->dev = dev;
         req_info->flags = flags;
         return 0;
@@ -365,8 +398,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
                 ops->cleanup_data(reply_data);
  
         genlmsg_end(rskb, reply_payload);
-       if (req_info->dev)
-               dev_put(req_info->dev);
+       dev_put(req_info->dev);
         kfree(reply_data);
         kfree(req_info);
         return genlmsg_reply(rskb, info);
@@ -378,8 +410,7 @@ err_cleanup:
         if (ops->cleanup_data)
                 ops->cleanup_data(reply_data);
  err_dev:
-       if (req_info->dev)
-               dev_put(req_info->dev);
+       dev_put(req_info->dev);
         kfree(reply_data);
         kfree(req_info);
         return ret;
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h

index 3fc395c..077aac3 100644 (file)
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -247,19 +247,8 @@ struct ethnl_reply_data {
         struct net_device               *dev;
  };
  
-static inline int ethnl_ops_begin(struct net_device *dev)
-{
-       if (dev && dev->ethtool_ops->begin)
-               return dev->ethtool_ops->begin(dev);
-       else
-               return 0;
-}
-
-static inline void ethnl_ops_complete(struct net_device *dev)
-{
-       if (dev && dev->ethtool_ops->complete)
-               dev->ethtool_ops->complete(dev);
-}
+int ethnl_ops_begin(struct net_device *dev);
+void ethnl_ops_complete(struct net_device *dev);
  
  /**
   * struct ethnl_request_ops - unified handling of GET requests
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c

index 88215b5..dd5a45f 100644 (file)
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -340,8 +340,7 @@ nla_put_failure:
  out_dev:
         wpan_phy_put(phy);
  out:
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
  
         return rc;
  }
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c

index 0cf2374..277124f 100644 (file)
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -2226,8 +2226,7 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
                 if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) {
                         struct wpan_dev *wpan_dev = info->user_ptr[1];
  
-                       if (wpan_dev->netdev)
-                               dev_put(wpan_dev->netdev);
+                       dev_put(wpan_dev->netdev);
                 } else {
                         dev_put(info->user_ptr[1]);
                 }
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c

index f5077de..90233ef 100644 (file)
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -41,8 +41,7 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr)
                 ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr);
                 rcu_read_lock();
                 dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr);
-               if (dev)
-                       dev_hold(dev);
+               dev_hold(dev);
                 rcu_read_unlock();
                 break;
         case IEEE802154_ADDR_SHORT:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c

index c82aded..f446898 100644 (file)
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1950,7 +1950,8 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
  };
  
  static int inet_validate_link_af(const struct net_device *dev,
-                                const struct nlattr *nla)
+                                const struct nlattr *nla,
+                                struct netlink_ext_ack *extack)
  {
         struct nlattr *a, *tb[IFLA_INET_MAX+1];
         int err, rem;
@@ -1959,7 +1960,7 @@ static int inet_validate_link_af(const struct net_device *dev,
                 return -EAFNOSUPPORT;
  
         err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
-                                         inet_af_policy, NULL);
+                                         inet_af_policy, extack);
         if (err < 0)
                 return err;
  
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c

index fa19f4c..b42c429 100644 (file)
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -208,9 +208,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
  
  void fib_nh_common_release(struct fib_nh_common *nhc)
  {
-       if (nhc->nhc_dev)
-               dev_put(nhc->nhc_dev);
-
+       dev_put(nhc->nhc_dev);
         lwtstate_put(nhc->nhc_lwtstate);
         rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output);
         rt_fibinfo_free(&nhc->nhc_rth_input);
@@ -1551,7 +1549,7 @@ link_it:
                 return ofi;
         }
  
-       refcount_inc(&fi->fib_treeref);
+       refcount_set(&fi->fib_treeref, 1);
         refcount_set(&fi->fib_clntref, 1);
         spin_lock_bh(&fib_info_lock);
         hlist_add_head(&fi->fib_hash,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c

index c695d29..8b30cad 100644 (file)
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1095,8 +1095,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
                                          sizeof(struct in6_addr))
                                 goto send_mal_query;
                         dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
-                       if (dev)
-                               dev_hold(dev);
+                       dev_hold(dev);
                         break;
  #endif
                 default:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c

index 03589a0..7e50727 100644 (file)
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2233,7 +2233,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
                         iml->sfmode, psf->sl_count, psf->sl_addr, 0);
         RCU_INIT_POINTER(iml->sflist, NULL);
         /* decrease mem now to avoid the memleak warning */
-       atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
+       atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc);
         kfree_rcu(psf, rcu);
         return err;
  }
@@ -2382,7 +2382,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
  
                 if (psl)
                         count += psl->sl_max;
-               newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+                                     GFP_KERNEL);
                 if (!newpsl) {
                         err = -ENOBUFS;
                         goto done;
@@ -2393,7 +2394,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
                         for (i = 0; i < psl->sl_count; i++)
                                 newpsl->sl_addr[i] = psl->sl_addr[i];
                         /* decrease mem now to avoid the memleak warning */
-                       atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+                       atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                                  &sk->sk_omem_alloc);
                         kfree_rcu(psl, rcu);
                 }
                 rcu_assign_pointer(pmc->sflist, newpsl);
@@ -2468,19 +2470,22 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
                 goto done;
         }
         if (msf->imsf_numsrc) {
-               newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc),
-                                                          GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+                                                     msf->imsf_numsrc),
+                                     GFP_KERNEL);
                 if (!newpsl) {
                         err = -ENOBUFS;
                         goto done;
                 }
                 newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
-               memcpy(newpsl->sl_addr, msf->imsf_slist,
-                       msf->imsf_numsrc * sizeof(msf->imsf_slist[0]));
+               memcpy(newpsl->sl_addr, msf->imsf_slist_flex,
+                      flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc));
                 err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
                         msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
                 if (err) {
-                       sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max));
+                       sock_kfree_s(sk, newpsl,
+                                    struct_size(newpsl, sl_addr,
+                                                newpsl->sl_max));
                         goto done;
                 }
         } else {
@@ -2493,7 +2498,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
                 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
                         psl->sl_count, psl->sl_addr, 0);
                 /* decrease mem now to avoid the memleak warning */
-               atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+               atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                          &sk->sk_omem_alloc);
                 kfree_rcu(psl, rcu);
         } else
                 (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
@@ -2551,14 +2557,14 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
                 count = psl->sl_count;
         }
         copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
-       len = copycount * sizeof(psl->sl_addr[0]);
+       len = flex_array_size(psl, sl_addr, copycount);
         msf->imsf_numsrc = count;
         if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
             copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
                 return -EFAULT;
         }
         if (len &&
-           copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len))
+           copy_to_user(&optval->imsf_slist_flex[0], psl->sl_addr, len))
                 return -EFAULT;
         return 0;
  done:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c

index a202dce..6b04a88 100644 (file)
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,19 +198,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
         } else if (rt->rt_type == RTN_BROADCAST)
                 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
  
-       /* Be paranoid, rather than too clever. */
         if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
-               struct sk_buff *skb2;
-
-               skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
-               if (!skb2) {
-                       kfree_skb(skb);
+               skb = skb_expand_head(skb, hh_len);
+               if (!skb)
                         return -ENOMEM;
-               }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
         }
  
         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c

index ec60367..b297bb2 100644 (file)
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -663,12 +663,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
                               struct sockaddr_storage *group,
                               struct sockaddr_storage *list)
  {
-       int msize = IP_MSFILTER_SIZE(numsrc);
         struct ip_msfilter *msf;
         struct sockaddr_in *psin;
         int err, i;
  
-       msf = kmalloc(msize, GFP_KERNEL);
+       msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
         if (!msf)
                 return -ENOBUFS;
  
@@ -684,7 +683,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
  
                 if (psin->sin_family != AF_INET)
                         goto Eaddrnotavail;
-               msf->imsf_slist[i] = psin->sin_addr.s_addr;
+               msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
         }
         err = ip_mc_msfilter(sk, msf, ifindex);
         kfree(msf);
@@ -791,7 +790,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
                 goto out_free_gsf;
  
         err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
-                                gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist);
+                                gsf->gf_fmode, &gsf->gf_group,
+                                gsf->gf_slist_flex);
  out_free_gsf:
         kfree(gsf);
         return err;
@@ -800,7 +800,7 @@ out_free_gsf:
  static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                 int optlen)
  {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
         struct compat_group_filter *gf32;
         unsigned int n;
         void *p;
@@ -814,7 +814,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
         p = kmalloc(optlen + 4, GFP_KERNEL);
         if (!p)
                 return -ENOMEM;
-       gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+       gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
  
         err = -EFAULT;
         if (copy_from_sockptr(gf32, optval, optlen))
@@ -827,7 +827,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                 goto out_free_gsf;
  
         err = -EINVAL;
-       if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+       if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
                 goto out_free_gsf;
  
         /* numsrc >= (4G-140)/128 overflow in 32 bits */
@@ -835,7 +835,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
         if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
                 goto out_free_gsf;
         err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
-                                &gf32->gf_group, gf32->gf_slist);
+                                &gf32->gf_group, gf32->gf_slist_flex);
  out_free_gsf:
         kfree(p);
         return err;
@@ -1456,7 +1456,7 @@ static bool getsockopt_needs_rtnl(int optname)
  static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
                 int __user *optlen, int len)
  {
-       const int size0 = offsetof(struct group_filter, gf_slist);
+       const int size0 = offsetof(struct group_filter, gf_slist_flex);
         struct group_filter __user *p = optval;
         struct group_filter gsf;
         int num;
@@ -1468,7 +1468,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
                 return -EFAULT;
  
         num = gsf.gf_numsrc;
-       err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+       err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex);
         if (err)
                 return err;
         if (gsf.gf_numsrc < num)
@@ -1482,7 +1482,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
  static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
                 int __user *optlen, int len)
  {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
         struct compat_group_filter __user *p = optval;
         struct compat_group_filter gf32;
         struct group_filter gf;
@@ -1499,7 +1499,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
         num = gf.gf_numsrc = gf32.gf_numsrc;
         gf.gf_group = gf32.gf_group;
  
-       err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+       err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex);
         if (err)
                 return err;
         if (gf.gf_numsrc < num)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c

index 7f0e810..fe9101d 100644 (file)
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -390,7 +390,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
         }
  
-       skb_reset_network_header(skb);
+       skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
  
         err = IP_ECN_decapsulate(iph, skb);
         if (unlikely(err)) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index 04754d5..b181773 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -276,12 +276,13 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
         struct rt_cache_stat *st = v;
  
         if (v == SEQ_START_TOKEN) {
-               seq_printf(seq, "entries  in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src  out_hit out_slow_tot out_slow_mc  gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
+               seq_puts(seq, "entries  in_hit   in_slow_tot in_slow_mc in_no_route in_brd   in_martian_dst in_martian_src out_hit  out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
                 return 0;
         }
  
-       seq_printf(seq,"%08x  %08x %08x %08x %08x %08x %08x %08x "
-                  " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
+       seq_printf(seq, "%08x %08x %08x    %08x   %08x    %08x %08x       "
+                       "%08x       %08x %08x     %08x    %08x %08x   "
+                       "%08x     %08x        %08x        %08x\n",
                    dst_entries_get_slow(&ipv4_dst_ops),
                    0, /* st->in_hit */
                    st->in_slow_tot,
@@ -2812,8 +2813,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
                 new->output = dst_discard_out;
  
                 new->dev = net->loopback_dev;
-               if (new->dev)
-                       dev_hold(new->dev);
+               dev_hold(new->dev);
  
                 rt->rt_is_input = ort->rt_is_input;
                 rt->rt_iif = ort->rt_iif;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 84db1c9..2e62e0d 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2277,51 +2277,72 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock);
  #ifdef CONFIG_PROC_FS
  /* Proc filesystem TCP sock list dumping. */
  
-/*
- * Get next listener socket follow cur.  If cur is NULL, get first socket
- * starting from bucket given in st->bucket; when st->bucket is zero the
- * very first socket in the hash table is returned.
+static unsigned short seq_file_family(const struct seq_file *seq);
+
+static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
+{
+       unsigned short family = seq_file_family(seq);
+
+       /* AF_UNSPEC is used as a match all */
+       return ((family == AF_UNSPEC || family == sk->sk_family) &&
+               net_eq(sock_net(sk), seq_file_net(seq)));
+}
+
+/* Find a non empty bucket (starting from st->bucket)
+ * and return the first sk from it.
   */
-static void *listening_get_next(struct seq_file *seq, void *cur)
+static void *listening_get_first(struct seq_file *seq)
  {
-       struct tcp_seq_afinfo *afinfo;
         struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-       struct inet_listen_hashbucket *ilb;
-       struct hlist_nulls_node *node;
-       struct sock *sk = cur;
  
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
+       st->offset = 0;
+       for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) {
+               struct inet_listen_hashbucket *ilb2;
+               struct inet_connection_sock *icsk;
+               struct sock *sk;
  
-       if (!sk) {
-get_head:
-               ilb = &tcp_hashinfo.listening_hash[st->bucket];
-               spin_lock(&ilb->lock);
-               sk = sk_nulls_head(&ilb->nulls_head);
-               st->offset = 0;
-               goto get_sk;
+               ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+               if (hlist_empty(&ilb2->head))
+                       continue;
+
+               spin_lock(&ilb2->lock);
+               inet_lhash2_for_each_icsk(icsk, &ilb2->head) {
+                       sk = (struct sock *)icsk;
+                       if (seq_sk_match(seq, sk))
+                               return sk;
+               }
+               spin_unlock(&ilb2->lock);
         }
-       ilb = &tcp_hashinfo.listening_hash[st->bucket];
+
+       return NULL;
+}
+
+/* Find the next sk of "cur" within the same bucket (i.e. st->bucket).
+ * If "cur" is the last one in the st->bucket,
+ * call listening_get_first() to return the first sk of the next
+ * non empty bucket.
+ */
+static void *listening_get_next(struct seq_file *seq, void *cur)
+{
+       struct tcp_iter_state *st = seq->private;
+       struct inet_listen_hashbucket *ilb2;
+       struct inet_connection_sock *icsk;
+       struct sock *sk = cur;
+
         ++st->num;
         ++st->offset;
  
-       sk = sk_nulls_next(sk);
-get_sk:
-       sk_nulls_for_each_from(sk, node) {
-               if (!net_eq(sock_net(sk), net))
-                       continue;
-               if (afinfo->family == AF_UNSPEC ||
-                   sk->sk_family == afinfo->family)
+       icsk = inet_csk(sk);
+       inet_lhash2_for_each_icsk_continue(icsk) {
+               sk = (struct sock *)icsk;
+               if (seq_sk_match(seq, sk))
                         return sk;
         }
-       spin_unlock(&ilb->lock);
-       st->offset = 0;
-       if (++st->bucket < INET_LHTABLE_SIZE)
-               goto get_head;
-       return NULL;
+
+       ilb2 = &tcp_hashinfo.lhash2[st->bucket];
+       spin_unlock(&ilb2->lock);
+       ++st->bucket;
+       return listening_get_first(seq);
  }
  
  static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
@@ -2331,7 +2352,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
  
         st->bucket = 0;
         st->offset = 0;
-       rc = listening_get_next(seq, NULL);
+       rc = listening_get_first(seq);
  
         while (rc && *pos) {
                 rc = listening_get_next(seq, rc);
@@ -2351,15 +2372,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st)
   */
  static void *established_get_first(struct seq_file *seq)
  {
-       struct tcp_seq_afinfo *afinfo;
         struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-       void *rc = NULL;
-
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
  
         st->offset = 0;
         for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
@@ -2373,32 +2386,20 @@ static void *established_get_first(struct seq_file *seq)
  
                 spin_lock_bh(lock);
                 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
-                       if ((afinfo->family != AF_UNSPEC &&
-                            sk->sk_family != afinfo->family) ||
-                           !net_eq(sock_net(sk), net)) {
-                               continue;
-                       }
-                       rc = sk;
-                       goto out;
+                       if (seq_sk_match(seq, sk))
+                               return sk;
                 }
                 spin_unlock_bh(lock);
         }
-out:
-       return rc;
+
+       return NULL;
  }
  
  static void *established_get_next(struct seq_file *seq, void *cur)
  {
-       struct tcp_seq_afinfo *afinfo;
         struct sock *sk = cur;
         struct hlist_nulls_node *node;
         struct tcp_iter_state *st = seq->private;
-       struct net *net = seq_file_net(seq);
-
-       if (st->bpf_seq_afinfo)
-               afinfo = st->bpf_seq_afinfo;
-       else
-               afinfo = PDE_DATA(file_inode(seq->file));
  
         ++st->num;
         ++st->offset;
@@ -2406,9 +2407,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
         sk = sk_nulls_next(sk);
  
         sk_nulls_for_each_from(sk, node) {
-               if ((afinfo->family == AF_UNSPEC ||
-                    sk->sk_family == afinfo->family) &&
-                   net_eq(sock_net(sk), net))
+               if (seq_sk_match(seq, sk))
                         return sk;
         }
  
@@ -2451,17 +2450,18 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
  static void *tcp_seek_last_pos(struct seq_file *seq)
  {
         struct tcp_iter_state *st = seq->private;
+       int bucket = st->bucket;
         int offset = st->offset;
         int orig_num = st->num;
         void *rc = NULL;
  
         switch (st->state) {
         case TCP_SEQ_STATE_LISTENING:
-               if (st->bucket >= INET_LHTABLE_SIZE)
+               if (st->bucket > tcp_hashinfo.lhash2_mask)
                         break;
                 st->state = TCP_SEQ_STATE_LISTENING;
-               rc = listening_get_next(seq, NULL);
-               while (offset-- && rc)
+               rc = listening_get_first(seq);
+               while (offset-- && rc && bucket == st->bucket)
                         rc = listening_get_next(seq, rc);
                 if (rc)
                         break;
@@ -2472,7 +2472,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
                 if (st->bucket > tcp_hashinfo.ehash_mask)
                         break;
                 rc = established_get_first(seq);
-               while (offset-- && rc)
+               while (offset-- && rc && bucket == st->bucket)
                         rc = established_get_next(seq, rc);
         }
  
@@ -2542,7 +2542,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v)
         switch (st->state) {
         case TCP_SEQ_STATE_LISTENING:
                 if (v != SEQ_START_TOKEN)
-                       spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock);
+                       spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
                 break;
         case TCP_SEQ_STATE_ESTABLISHED:
                 if (v)
@@ -2687,6 +2687,15 @@ out:
  }
  
  #ifdef CONFIG_BPF_SYSCALL
+struct bpf_tcp_iter_state {
+       struct tcp_iter_state state;
+       unsigned int cur_sk;
+       unsigned int end_sk;
+       unsigned int max_sk;
+       struct sock **batch;
+       bool st_bucket_done;
+};
+
  struct bpf_iter__tcp {
         __bpf_md_ptr(struct bpf_iter_meta *, meta);
         __bpf_md_ptr(struct sock_common *, sk_common);
@@ -2705,16 +2714,204 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
         return bpf_iter_run_prog(prog, &ctx);
  }
  
+static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter)
+{
+       while (iter->cur_sk < iter->end_sk)
+               sock_put(iter->batch[iter->cur_sk++]);
+}
+
+static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
+                                     unsigned int new_batch_sz)
+{
+       struct sock **new_batch;
+
+       new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
+                            GFP_USER | __GFP_NOWARN);
+       if (!new_batch)
+               return -ENOMEM;
+
+       bpf_iter_tcp_put_batch(iter);
+       kvfree(iter->batch);
+       iter->batch = new_batch;
+       iter->max_sk = new_batch_sz;
+
+       return 0;
+}
+
+static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq,
+                                                struct sock *start_sk)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct inet_connection_sock *icsk;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       icsk = inet_csk(start_sk);
+       inet_lhash2_for_each_icsk_continue(icsk) {
+               sk = (struct sock *)icsk;
+               if (seq_sk_match(seq, sk)) {
+                       if (iter->end_sk < iter->max_sk) {
+                               sock_hold(sk);
+                               iter->batch[iter->end_sk++] = sk;
+                       }
+                       expected++;
+               }
+       }
+       spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock);
+
+       return expected;
+}
+
+static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq,
+                                                  struct sock *start_sk)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct hlist_nulls_node *node;
+       unsigned int expected = 1;
+       struct sock *sk;
+
+       sock_hold(start_sk);
+       iter->batch[iter->end_sk++] = start_sk;
+
+       sk = sk_nulls_next(start_sk);
+       sk_nulls_for_each_from(sk, node) {
+               if (seq_sk_match(seq, sk)) {
+                       if (iter->end_sk < iter->max_sk) {
+                               sock_hold(sk);
+                               iter->batch[iter->end_sk++] = sk;
+                       }
+                       expected++;
+               }
+       }
+       spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+
+       return expected;
+}
+
+static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       unsigned int expected;
+       bool resized = false;
+       struct sock *sk;
+
+       /* The st->bucket is done.  Directly advance to the next
+        * bucket instead of having the tcp_seek_last_pos() to skip
+        * one by one in the current bucket and eventually find out
+        * it has to advance to the next bucket.
+        */
+       if (iter->st_bucket_done) {
+               st->offset = 0;
+               st->bucket++;
+               if (st->state == TCP_SEQ_STATE_LISTENING &&
+                   st->bucket > tcp_hashinfo.lhash2_mask) {
+                       st->state = TCP_SEQ_STATE_ESTABLISHED;
+                       st->bucket = 0;
+               }
+       }
+
+again:
+       /* Get a new batch */
+       iter->cur_sk = 0;
+       iter->end_sk = 0;
+       iter->st_bucket_done = false;
+
+       sk = tcp_seek_last_pos(seq);
+       if (!sk)
+               return NULL; /* Done */
+
+       if (st->state == TCP_SEQ_STATE_LISTENING)
+               expected = bpf_iter_tcp_listening_batch(seq, sk);
+       else
+               expected = bpf_iter_tcp_established_batch(seq, sk);
+
+       if (iter->end_sk == expected) {
+               iter->st_bucket_done = true;
+               return sk;
+       }
+
+       if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) {
+               resized = true;
+               goto again;
+       }
+
+       return sk;
+}
+
+static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       /* bpf iter does not support lseek, so it always
+        * continue from where it was stop()-ped.
+        */
+       if (*pos)
+               return bpf_iter_tcp_batch(seq);
+
+       return SEQ_START_TOKEN;
+}
+
+static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       struct bpf_tcp_iter_state *iter = seq->private;
+       struct tcp_iter_state *st = &iter->state;
+       struct sock *sk;
+
+       /* Whenever seq_next() is called, the iter->cur_sk is
+        * done with seq_show(), so advance to the next sk in
+        * the batch.
+        */
+       if (iter->cur_sk < iter->end_sk) {
+               /* Keeping st->num consistent in tcp_iter_state.
+                * bpf_iter_tcp does not use st->num.
+                * meta.seq_num is used instead.
+                */
+               st->num++;
+               /* Move st->offset to the next sk in the bucket such that
+                * the future start() will resume at st->offset in
+                * st->bucket.  See tcp_seek_last_pos().
+                */
+               st->offset++;
+               sock_put(iter->batch[iter->cur_sk++]);
+       }
+
+       if (iter->cur_sk < iter->end_sk)
+               sk = iter->batch[iter->cur_sk];
+       else
+               sk = bpf_iter_tcp_batch(seq);
+
+       ++*pos;
+       /* Keeping st->last_pos consistent in tcp_iter_state.
+        * bpf iter does not do lseek, so st->last_pos always equals to *pos.
+        */
+       st->last_pos = *pos;
+       return sk;
+}
+
  static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
  {
         struct bpf_iter_meta meta;
         struct bpf_prog *prog;
         struct sock *sk = v;
+       bool slow;
         uid_t uid;
+       int ret;
  
         if (v == SEQ_START_TOKEN)
                 return 0;
  
+       if (sk_fullsock(sk))
+               slow = lock_sock_fast(sk);
+
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+               goto unlock;
+       }
+
         if (sk->sk_state == TCP_TIME_WAIT) {
                 uid = 0;
         } else if (sk->sk_state == TCP_NEW_SYN_RECV) {
@@ -2728,11 +2925,18 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
  
         meta.seq = seq;
         prog = bpf_iter_get_info(&meta, false);
-       return tcp_prog_seq_show(prog, &meta, v, uid);
+       ret = tcp_prog_seq_show(prog, &meta, v, uid);
+
+unlock:
+       if (sk_fullsock(sk))
+               unlock_sock_fast(sk, slow);
+       return ret;
+
  }
  
  static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
  {
+       struct bpf_tcp_iter_state *iter = seq->private;
         struct bpf_iter_meta meta;
         struct bpf_prog *prog;
  
@@ -2743,17 +2947,34 @@ static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v)
                         (void)tcp_prog_seq_show(prog, &meta, v, 0);
         }
  
-       tcp_seq_stop(seq, v);
+       if (iter->cur_sk < iter->end_sk) {
+               bpf_iter_tcp_put_batch(iter);
+               iter->st_bucket_done = false;
+       }
  }
  
  static const struct seq_operations bpf_iter_tcp_seq_ops = {
         .show           = bpf_iter_tcp_seq_show,
-       .start          = tcp_seq_start,
-       .next           = tcp_seq_next,
+       .start          = bpf_iter_tcp_seq_start,
+       .next           = bpf_iter_tcp_seq_next,
         .stop           = bpf_iter_tcp_seq_stop,
  };
+#endif
+static unsigned short seq_file_family(const struct seq_file *seq)
+{
+       const struct tcp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+       /* Iterated from bpf_iter.  Let the bpf prog to filter instead. */
+       if (seq->op == &bpf_iter_tcp_seq_ops)
+               return AF_UNSPEC;
  #endif
  
+       /* Iterated from proc fs */
+       afinfo = PDE_DATA(file_inode(seq->file));
+       return afinfo->family;
+}
+
  static const struct seq_operations tcp4_seq_ops = {
         .show           = tcp4_seq_show,
         .start          = tcp_seq_start,
@@ -3002,39 +3223,55 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
  DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta,
                      struct sock_common *sk_common, uid_t uid)
  
+#define INIT_BATCH_SZ 16
+
  static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux)
  {
-       struct tcp_iter_state *st = priv_data;
-       struct tcp_seq_afinfo *afinfo;
-       int ret;
+       struct bpf_tcp_iter_state *iter = priv_data;
+       int err;
  
-       afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
-       if (!afinfo)
-               return -ENOMEM;
+       err = bpf_iter_init_seq_net(priv_data, aux);
+       if (err)
+               return err;
  
-       afinfo->family = AF_UNSPEC;
-       st->bpf_seq_afinfo = afinfo;
-       ret = bpf_iter_init_seq_net(priv_data, aux);
-       if (ret)
-               kfree(afinfo);
-       return ret;
+       err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ);
+       if (err) {
+               bpf_iter_fini_seq_net(priv_data);
+               return err;
+       }
+
+       return 0;
  }
  
  static void bpf_iter_fini_tcp(void *priv_data)
  {
-       struct tcp_iter_state *st = priv_data;
+       struct bpf_tcp_iter_state *iter = priv_data;
  
-       kfree(st->bpf_seq_afinfo);
         bpf_iter_fini_seq_net(priv_data);
+       kvfree(iter->batch);
  }
  
  static const struct bpf_iter_seq_info tcp_seq_info = {
         .seq_ops                = &bpf_iter_tcp_seq_ops,
         .init_seq_private       = bpf_iter_init_tcp,
         .fini_seq_private       = bpf_iter_fini_tcp,
-       .seq_priv_size          = sizeof(struct tcp_iter_state),
+       .seq_priv_size          = sizeof(struct bpf_tcp_iter_state),
  };
  
+static const struct bpf_func_proto *
+bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id,
+                           const struct bpf_prog *prog)
+{
+       switch (func_id) {
+       case BPF_FUNC_setsockopt:
+               return &bpf_sk_setsockopt_proto;
+       case BPF_FUNC_getsockopt:
+               return &bpf_sk_getsockopt_proto;
+       default:
+               return NULL;
+       }
+}
+
  static struct bpf_iter_reg tcp_reg_info = {
         .target                 = "tcp",
         .ctx_arg_info_size      = 1,
@@ -3042,6 +3279,7 @@ static struct bpf_iter_reg tcp_reg_info = {
                 { offsetof(struct bpf_iter__tcp, sk_common),
                   PTR_TO_BTF_ID_OR_NULL },
         },
+       .get_func_proto         = bpf_iter_tcp_get_func_proto,
         .seq_info               = &tcp_seq_info,
  };
  
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c

index e09147a..fc61cd3 100644 (file)
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -298,6 +298,9 @@ int tcp_gro_complete(struct sk_buff *skb)
         if (th->cwr)
                 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
  
+       if (skb->encapsulation)
+               skb->inner_transport_header = skb->transport_header;
+
         return 0;
  }
  EXPORT_SYMBOL(tcp_gro_complete);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c

index 9dde1e5..1380a6b 100644 (file)
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -624,6 +624,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb)
  
         skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
         skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+
+       if (skb->encapsulation)
+               skb->inner_transport_header = skb->transport_header;
+
         return 0;
  }
  
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c

index db0a898..8381288 100644 (file)
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -701,8 +701,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
  errout:
         if (in6_dev)
                 in6_dev_put(in6_dev);
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
         return err;
  }
  
@@ -5417,8 +5416,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
  errout_ifa:
         in6_ifa_put(ifa);
  errout:
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
         if (fillargs.netnsid >= 0)
                 put_net(tgt_net);
  
@@ -5792,7 +5790,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net,
  }
  
  static int inet6_validate_link_af(const struct net_device *dev,
-                                 const struct nlattr *nla)
+                                 const struct nlattr *nla,
+                                 struct netlink_ext_ack *extack)
  {
         struct nlattr *tb[IFLA_INET6_MAX + 1];
         struct inet6_dev *idev = NULL;
@@ -5805,7 +5804,7 @@ static int inet6_validate_link_af(const struct net_device *dev,
         }
  
         err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
-                                         inet6_af_policy, NULL);
+                                         inet6_af_policy, extack);
         if (err)
                 return err;
  
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c

index d897faa..3a871a0 100644 (file)
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -55,19 +55,6 @@
  
  #include <linux/uaccess.h>
  
-/*
- *     Parsing tlv encoded headers.
- *
- *     Parsing function "func" returns true, if parsing succeed
- *     and false, if it failed.
- *     It MUST NOT touch skb->h.
- */
-
-struct tlvtype_proc {
-       int     type;
-       bool    (*func)(struct sk_buff *skb, int offset);
-};
-
  /*********************
    Generic functions
   *********************/
@@ -112,16 +99,23 @@ drop:
         return false;
  }
  
+static bool ipv6_hop_ra(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static bool ipv6_dest_hao(struct sk_buff *skb, int optoff);
+#endif
+
  /* Parse tlv encoded option header (hop-by-hop or destination) */
  
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+static bool ip6_parse_tlv(bool hopbyhop,
                           struct sk_buff *skb,
                           int max_count)
  {
         int len = (skb_transport_header(skb)[1] + 1) << 3;
         const unsigned char *nh = skb_network_header(skb);
         int off = skb_network_header_len(skb);
-       const struct tlvtype_proc *curr;
         bool disallow_unknowns = false;
         int tlv_count = 0;
         int padlen = 0;
@@ -176,20 +170,45 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
                         if (tlv_count > max_count)
                                 goto bad;
  
-                       for (curr = procs; curr->type >= 0; curr++) {
-                               if (curr->type == nh[off]) {
-                                       /* type specific length/alignment
-                                          checks will be performed in the
-                                          func(). */
-                                       if (curr->func(skb, off) == false)
+                       if (hopbyhop) {
+                               switch (nh[off]) {
+                               case IPV6_TLV_ROUTERALERT:
+                                       if (!ipv6_hop_ra(skb, off))
+                                               return false;
+                                       break;
+                               case IPV6_TLV_IOAM:
+                                       if (!ipv6_hop_ioam(skb, off))
+                                               return false;
+                                       break;
+                               case IPV6_TLV_JUMBO:
+                                       if (!ipv6_hop_jumbo(skb, off))
+                                               return false;
+                                       break;
+                               case IPV6_TLV_CALIPSO:
+                                       if (!ipv6_hop_calipso(skb, off))
+                                               return false;
+                                       break;
+                               default:
+                                       if (!ip6_tlvopt_unknown(skb, off,
+                                                               disallow_unknowns))
+                                               return false;
+                                       break;
+                               }
+                       } else {
+                               switch (nh[off]) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+                               case IPV6_TLV_HAO:
+                                       if (!ipv6_dest_hao(skb, off))
+                                               return false;
+                                       break;
+#endif
+                               default:
+                                       if (!ip6_tlvopt_unknown(skb, off,
+                                                               disallow_unknowns))
                                                 return false;
                                         break;
                                 }
                         }
-                       if (curr->type < 0 &&
-                           !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
-                               return false;
-
                         padlen = 0;
                 }
                 off += optlen;
@@ -267,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
  }
  #endif
  
-static const struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
-       {
-               .type   = IPV6_TLV_HAO,
-               .func   = ipv6_dest_hao,
-       },
-#endif
-       {-1,                    NULL}
-};
-
  static int ipv6_destopt_rcv(struct sk_buff *skb)
  {
         struct inet6_dev *idev = __in6_dev_get(skb->dev);
@@ -307,8 +316,7 @@ fail_and_free:
         dstbuf = opt->dst1;
  #endif
  
-       if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
-                         net->ipv6.sysctl.max_dst_opts_cnt)) {
+       if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) {
                 skb->transport_header += extlen;
                 opt = IP6CB(skb);
  #if IS_ENABLED(CONFIG_IPV6_MIP6)
@@ -1051,26 +1059,6 @@ drop:
         return false;
  }
  
-static const struct tlvtype_proc tlvprochopopt_lst[] = {
-       {
-               .type   = IPV6_TLV_ROUTERALERT,
-               .func   = ipv6_hop_ra,
-       },
-       {
-               .type   = IPV6_TLV_IOAM,
-               .func   = ipv6_hop_ioam,
-       },
-       {
-               .type   = IPV6_TLV_JUMBO,
-               .func   = ipv6_hop_jumbo,
-       },
-       {
-               .type   = IPV6_TLV_CALIPSO,
-               .func   = ipv6_hop_calipso,
-       },
-       { -1, }
-};
-
  int ipv6_parse_hopopts(struct sk_buff *skb)
  {
         struct inet6_skb_parm *opt = IP6CB(skb);
@@ -1096,8 +1084,7 @@ fail_and_free:
                 goto fail_and_free;
  
         opt->flags |= IP6SKB_HOPBYHOP;
-       if (ip6_parse_tlv(tlvprochopopt_lst, skb,
-                         net->ipv6.sysctl.max_hbh_opts_cnt)) {
+       if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) {
                 skb->transport_header += extlen;
                 opt = IP6CB(skb);
                 opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c

index b7b27d9..12f985f 100644 (file)
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
  {
         struct dst_entry *dst = skb_dst(skb);
         struct net_device *dev = dst->dev;
+       struct inet6_dev *idev = ip6_dst_idev(dst);
         unsigned int hh_len = LL_RESERVED_SPACE(dev);
-       int delta = hh_len - skb_headroom(skb);
-       const struct in6_addr *nexthop;
+       const struct in6_addr *daddr, *nexthop;
+       struct ipv6hdr *hdr;
         struct neighbour *neigh;
         int ret;
  
         /* Be paranoid, rather than too clever. */
-       if (unlikely(delta > 0) && dev->header_ops) {
-               /* pskb_expand_head() might crash, if skb is shared */
-               if (skb_shared(skb)) {
-                       struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
-                       if (likely(nskb)) {
-                               if (skb->sk)
-                                       skb_set_owner_w(nskb, skb->sk);
-                               consume_skb(skb);
-                       } else {
-                               kfree_skb(skb);
-                       }
-                       skb = nskb;
-               }
-               if (skb &&
-                   pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
-                       kfree_skb(skb);
-                       skb = NULL;
-               }
+       if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
+               skb = skb_expand_head(skb, hh_len);
                 if (!skb) {
-                       IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+                       IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
                         return -ENOMEM;
                 }
         }
  
-       if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
-               struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
-
+       hdr = ipv6_hdr(skb);
+       daddr = &hdr->daddr;
+       if (ipv6_addr_is_multicast(daddr)) {
                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
                     ((mroute6_is_socket(net, skb) &&
                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
-                    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
-                                        &ipv6_hdr(skb)->saddr))) {
+                    ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  
                         /* Do not check for IFF_ALLMULTI; multicast routing
@@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                                         net, sk, newskb, NULL, newskb->dev,
                                         dev_loopback_xmit);
  
-                       if (ipv6_hdr(skb)->hop_limit == 0) {
+                       if (hdr->hop_limit == 0) {
                                 IP6_INC_STATS(net, idev,
                                               IPSTATS_MIB_OUTDISCARDS);
                                 kfree_skb(skb);
@@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
                 }
  
                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
-
-               if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
-                   IPV6_ADDR_SCOPE_NODELOCAL &&
+               if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
                     !(dev->flags & IFF_LOOPBACK)) {
                         kfree_skb(skb);
                         return 0;
@@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
         }
  
         rcu_read_lock_bh();
-       nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
-       neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+       nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+       neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
         if (unlikely(!neigh))
-               neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+               neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
         if (!IS_ERR(neigh)) {
                 sock_confirm_neigh(skb, neigh);
                 ret = neigh_output(neigh, skb, false);
@@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
         }
         rcu_read_unlock_bh();
  
-       IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+       IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
         kfree_skb(skb);
         return -EINVAL;
  }
@@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
         const struct ipv6_pinfo *np = inet6_sk(sk);
         struct in6_addr *first_hop = &fl6->daddr;
         struct dst_entry *dst = skb_dst(skb);
+       struct net_device *dev = dst->dev;
+       struct inet6_dev *idev = ip6_dst_idev(dst);
         unsigned int head_room;
         struct ipv6hdr *hdr;
         u8  proto = fl6->flowi6_proto;
@@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
         int hlimit = -1;
         u32 mtu;
  
-       head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+       head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
         if (opt)
                 head_room += opt->opt_nflen + opt->opt_flen;
  
-       if (unlikely(skb_headroom(skb) < head_room)) {
-               struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
-               if (!skb2) {
-                       IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-                                     IPSTATS_MIB_OUTDISCARDS);
-                       kfree_skb(skb);
+       if (unlikely(head_room > skb_headroom(skb))) {
+               skb = skb_expand_head(skb, head_room);
+               if (!skb) {
+                       IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
                         return -ENOBUFS;
                 }
-               if (skb->sk)
-                       skb_set_owner_w(skb2, skb->sk);
-               consume_skb(skb);
-               skb = skb2;
         }
  
         if (opt) {
@@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
  
         mtu = dst_mtu(dst);
         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
-               IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
-                             IPSTATS_MIB_OUT, skb->len);
+               IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
  
                 /* if egress device is enslaved to an L3 master device pass the
                  * skb to its handler for processing
@@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
                  * we promote our socket to non const
                  */
                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
-                              net, (struct sock *)sk, skb, NULL, dst->dev,
+                              net, (struct sock *)sk, skb, NULL, dev,
                                dst_output);
         }
  
-       skb->dev = dst->dev;
+       skb->dev = dev;
         /* ipv6_local_error() does not require socket lock,
          * we promote our socket to non const
          */
         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
  
-       IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
+       IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
         kfree_skb(skb);
         return -EMSGSIZE;
  }
@@ -549,9 +525,10 @@ int ip6_forward(struct sk_buff *skb)
         if (net->ipv6.devconf_all->proxy_ndp &&
             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
                 int proxied = ip6_forward_proxy_check(skb);
-               if (proxied > 0)
+               if (proxied > 0) {
+                       hdr->hop_limit--;
                         return ip6_input(skb);
-               else if (proxied < 0) {
+               } else if (proxied < 0) {
                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
                         goto drop;
                 }
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c

index 06b0d2c..36ed9ef 100644 (file)
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb)
         read_lock(&mrt_lock);
         if (reg_vif_num >= 0)
                 reg_dev = mrt->vif_table[reg_vif_num].dev;
-       if (reg_dev)
-               dev_hold(reg_dev);
+       dev_hold(reg_dev);
         read_unlock(&mrt_lock);
  
         if (!reg_dev)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c

index a6804a7..e4bdb09 100644 (file)
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
         if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
                 goto out_free_gsf;
  
-       ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
+       ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex);
  out_free_gsf:
         kfree(gsf);
         return ret;
@@ -234,7 +234,7 @@ out_free_gsf:
  static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                 int optlen)
  {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
         struct compat_group_filter *gf32;
         void *p;
         int ret;
@@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
         if (!p)
                 return -ENOMEM;
  
-       gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+       gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
         ret = -EFAULT;
         if (copy_from_sockptr(gf32, optval, optlen))
                 goto out_free_p;
@@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
                 goto out_free_p;
  
         ret = -EINVAL;
-       if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+       if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
                 goto out_free_p;
  
         ret = ip6_mc_msfilter(sk, &(struct group_filter){
                         .gf_interface = gf32->gf_interface,
                         .gf_group = gf32->gf_group,
                         .gf_fmode = gf32->gf_fmode,
-                       .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+                       .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex);
  
  out_free_p:
         kfree(p);
@@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
  static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
                 int __user *optlen, int len)
  {
-       const int size0 = offsetof(struct group_filter, gf_slist);
+       const int size0 = offsetof(struct group_filter, gf_slist_flex);
         struct group_filter __user *p = optval;
         struct group_filter gsf;
         int num;
@@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
                 return -EADDRNOTAVAIL;
         num = gsf.gf_numsrc;
         lock_sock(sk);
-       err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+       err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex);
         if (!err) {
                 if (num > gsf.gf_numsrc)
                         num = gsf.gf_numsrc;
@@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
  static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
                 int __user *optlen)
  {
-       const int size0 = offsetof(struct compat_group_filter, gf_slist);
+       const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
         struct compat_group_filter __user *p = optval;
         struct compat_group_filter gf32;
         struct group_filter gf;
@@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
                 return -EADDRNOTAVAIL;
  
         lock_sock(sk);
-       err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+       err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex);
         release_sock(sk);
         if (err)
                 return err;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c

index 54ec163..cd951fa 100644 (file)
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
  
                 if (psl)
                         count += psl->sl_max;
-               newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+                                     GFP_KERNEL);
                 if (!newpsl) {
                         err = -ENOBUFS;
                         goto done;
@@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
                 if (psl) {
                         for (i = 0; i < psl->sl_count; i++)
                                 newpsl->sl_addr[i] = psl->sl_addr[i];
-                       atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+                       atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                                  &sk->sk_omem_alloc);
                         kfree_rcu(psl, rcu);
                 }
                 psl = newpsl;
@@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
                 goto done;
         }
         if (gsf->gf_numsrc) {
-               newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
-                                                         GFP_KERNEL);
+               newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+                                                     gsf->gf_numsrc),
+                                     GFP_KERNEL);
                 if (!newpsl) {
                         err = -ENOBUFS;
                         goto done;
@@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
                                      newpsl->sl_count, newpsl->sl_addr, 0);
                 if (err) {
                         mutex_unlock(&idev->mc_lock);
-                       sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+                       sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr,
+                                                            newpsl->sl_max));
                         goto done;
                 }
                 mutex_unlock(&idev->mc_lock);
@@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
         if (psl) {
                 ip6_mc_del_src(idev, group, pmc->sfmode,
                                psl->sl_count, psl->sl_addr, 0);
-               atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+               atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                          &sk->sk_omem_alloc);
                 kfree_rcu(psl, rcu);
         } else {
                 ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
@@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
                 err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
                                      psl->sl_count, psl->sl_addr, 0);
                 RCU_INIT_POINTER(iml->sflist, NULL);
-               atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+               atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+                          &sk->sk_omem_alloc);
                 kfree_rcu(psl, rcu);
         }
  
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index 6b80511..6cf4bb8 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3626,8 +3626,7 @@ out:
         if (err) {
                 lwtstate_put(fib6_nh->fib_nh_lws);
                 fib6_nh->fib_nh_lws = NULL;
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
         }
  
         return err;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c

index 44453b3..18316ee 100644 (file)
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1044,7 +1044,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
                         if (err == 0) {
                                 atomic_dec(&iucv->skbs_in_xmit);
                                 skb_unlink(skb, &iucv->send_skb_q);
-                               kfree_skb(skb);
+                               consume_skb(skb);
                         }
  
                         /* this error should never happen since the     */
@@ -1293,7 +1293,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg,
                         }
                 }
  
-               kfree_skb(skb);
+               consume_skb(skb);
                 if (iucv->transport == AF_IUCV_TRANS_HIPER) {
                         atomic_inc(&iucv->msg_recv);
                         if (atomic_read(&iucv->msg_recv) > iucv->msglimit) {
@@ -1756,7 +1756,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
         spin_unlock_irqrestore(&list->lock, flags);
  
         if (this) {
-               kfree_skb(this);
+               consume_skb(this);
                 /* wake up any process waiting for sending */
                 iucv_sock_wake_msglim(sk);
         }
@@ -1903,17 +1903,17 @@ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb)
  {
         struct iucv_sock *iucv = iucv_sk(sk);
  
-       if (!iucv)
-               goto out;
-       if (sk->sk_state != IUCV_BOUND)
-               goto out;
+       if (!iucv || sk->sk_state != IUCV_BOUND) {
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
         bh_lock_sock(sk);
         iucv->msglimit_peer = iucv_trans_hdr(skb)->window;
         sk->sk_state = IUCV_CONNECTED;
         sk->sk_state_change(sk);
         bh_unlock_sock(sk);
-out:
-       kfree_skb(skb);
+       consume_skb(skb);
         return NET_RX_SUCCESS;
  }
  
@@ -1924,16 +1924,16 @@ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb)
  {
         struct iucv_sock *iucv = iucv_sk(sk);
  
-       if (!iucv)
-               goto out;
-       if (sk->sk_state != IUCV_BOUND)
-               goto out;
+       if (!iucv || sk->sk_state != IUCV_BOUND) {
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
         bh_lock_sock(sk);
         sk->sk_state = IUCV_DISCONN;
         sk->sk_state_change(sk);
         bh_unlock_sock(sk);
-out:
-       kfree_skb(skb);
+       consume_skb(skb);
         return NET_RX_SUCCESS;
  }
  
@@ -1945,16 +1945,18 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb)
         struct iucv_sock *iucv = iucv_sk(sk);
  
         /* other end of connection closed */
-       if (!iucv)
-               goto out;
+       if (!iucv) {
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
         bh_lock_sock(sk);
         if (sk->sk_state == IUCV_CONNECTED) {
                 sk->sk_state = IUCV_DISCONN;
                 sk->sk_state_change(sk);
         }
         bh_unlock_sock(sk);
-out:
-       kfree_skb(skb);
+       consume_skb(skb);
         return NET_RX_SUCCESS;
  }
  
@@ -2107,7 +2109,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
         case (AF_IUCV_FLAG_WIN):
                 err = afiucv_hs_callback_win(sk, skb);
                 if (skb->len == sizeof(struct af_iucv_trans_hdr)) {
-                       kfree_skb(skb);
+                       consume_skb(skb);
                         break;
                 }
                 fallthrough;    /* and receive non-zero length data */
@@ -2262,21 +2264,11 @@ static struct packet_type iucv_packet_type = {
         .func = afiucv_hs_rcv,
  };
  
-static int afiucv_iucv_init(void)
-{
-       return pr_iucv->iucv_register(&af_iucv_handler, 0);
-}
-
-static void afiucv_iucv_exit(void)
-{
-       pr_iucv->iucv_unregister(&af_iucv_handler, 0);
-}
-
  static int __init afiucv_init(void)
  {
         int err;
  
-       if (MACHINE_IS_VM) {
+       if (MACHINE_IS_VM && IS_ENABLED(CONFIG_IUCV)) {
                 cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err);
                 if (unlikely(err)) {
                         WARN_ON(err);
@@ -2284,11 +2276,7 @@ static int __init afiucv_init(void)
                         goto out;
                 }
  
-               pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv");
-               if (!pr_iucv) {
-                       printk(KERN_WARNING "iucv_if lookup failed\n");
-                       memset(&iucv_userid, 0, sizeof(iucv_userid));
-               }
+               pr_iucv = &iucv_if;
         } else {
                 memset(&iucv_userid, 0, sizeof(iucv_userid));
                 pr_iucv = NULL;
@@ -2302,7 +2290,7 @@ static int __init afiucv_init(void)
                 goto out_proto;
  
         if (pr_iucv) {
-               err = afiucv_iucv_init();
+               err = pr_iucv->iucv_register(&af_iucv_handler, 0);
                 if (err)
                         goto out_sock;
         }
@@ -2316,23 +2304,19 @@ static int __init afiucv_init(void)
  
  out_notifier:
         if (pr_iucv)
-               afiucv_iucv_exit();
+               pr_iucv->iucv_unregister(&af_iucv_handler, 0);
  out_sock:
         sock_unregister(PF_IUCV);
  out_proto:
         proto_unregister(&iucv_proto);
  out:
-       if (pr_iucv)
-               symbol_put(iucv_if);
         return err;
  }
  
  static void __exit afiucv_exit(void)
  {
-       if (pr_iucv) {
-               afiucv_iucv_exit();
-               symbol_put(iucv_if);
-       }
+       if (pr_iucv)
+               pr_iucv->iucv_unregister(&af_iucv_handler, 0);
  
         unregister_netdevice_notifier(&afiucv_netdev_notifier);
         dev_remove_pack(&iucv_packet_type);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c

index e6795d5..f3343a8 100644 (file)
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -286,19 +286,19 @@ static union iucv_param *iucv_param_irq[NR_CPUS];
   */
  static inline int __iucv_call_b2f0(int command, union iucv_param *parm)
  {
-       register unsigned long reg0 asm ("0");
-       register unsigned long reg1 asm ("1");
-       int ccode;
+       int cc;
  
-       reg0 = command;
-       reg1 = (unsigned long)parm;
         asm volatile(
-               "       .long 0xb2f01000\n"
-               "       ipm     %0\n"
-               "       srl     %0,28\n"
-               : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1)
-               :  "m" (*parm) : "cc");
-       return ccode;
+               "       lgr     0,%[reg0]\n"
+               "       lgr     1,%[reg1]\n"
+               "       .long   0xb2f01000\n"
+               "       ipm     %[cc]\n"
+               "       srl     %[cc],28\n"
+               : [cc] "=&d" (cc), "+m" (*parm)
+               : [reg0] "d" ((unsigned long)command),
+                 [reg1] "d" ((unsigned long)parm)
+               : "cc", "0", "1");
+       return cc;
  }
  
  static inline int iucv_call_b2f0(int command, union iucv_param *parm)
@@ -319,19 +319,21 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm)
   */
  static int __iucv_query_maxconn(void *param, unsigned long *max_pathid)
  {
-       register unsigned long reg0 asm ("0");
-       register unsigned long reg1 asm ("1");
-       int ccode;
+       unsigned long reg1 = (unsigned long)param;
+       int cc;
  
-       reg0 = IUCV_QUERY;
-       reg1 = (unsigned long) param;
         asm volatile (
+               "       lghi    0,%[cmd]\n"
+               "       lgr     1,%[reg1]\n"
                 "       .long   0xb2f01000\n"
-               "       ipm     %0\n"
-               "       srl     %0,28\n"
-               : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+               "       ipm     %[cc]\n"
+               "       srl     %[cc],28\n"
+               "       lgr     %[reg1],1\n"
+               : [cc] "=&d" (cc), [reg1] "+&d" (reg1)
+               : [cmd] "K" (IUCV_QUERY)
+               : "cc", "0", "1");
         *max_pathid = reg1;
-       return ccode;
+       return cc;
  }
  
  static int iucv_query_maxconn(void)
@@ -500,14 +502,14 @@ static void iucv_setmask_mp(void)
  {
         int cpu;
  
-       get_online_cpus();
+       cpus_read_lock();
         for_each_online_cpu(cpu)
                 /* Enable all cpus with a declared buffer. */
                 if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) &&
                     !cpumask_test_cpu(cpu, &iucv_irq_cpumask))
                         smp_call_function_single(cpu, iucv_allow_cpu,
                                                  NULL, 1);
-       put_online_cpus();
+       cpus_read_unlock();
  }
  
  /**
@@ -540,7 +542,7 @@ static int iucv_enable(void)
         size_t alloc_size;
         int cpu, rc;
  
-       get_online_cpus();
+       cpus_read_lock();
         rc = -ENOMEM;
         alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
         iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
@@ -553,12 +555,12 @@ static int iucv_enable(void)
         if (cpumask_empty(&iucv_buffer_cpumask))
                 /* No cpu could declare an iucv buffer. */
                 goto out;
-       put_online_cpus();
+       cpus_read_unlock();
         return 0;
  out:
         kfree(iucv_path_table);
         iucv_path_table = NULL;
-       put_online_cpus();
+       cpus_read_unlock();
         return rc;
  }
  
@@ -571,11 +573,11 @@ out:
   */
  static void iucv_disable(void)
  {
-       get_online_cpus();
+       cpus_read_lock();
         on_each_cpu(iucv_retrieve_cpu, NULL, 1);
         kfree(iucv_path_table);
         iucv_path_table = NULL;
-       put_online_cpus();
+       cpus_read_unlock();
  }
  
  static int iucv_cpu_dead(unsigned int cpu)
@@ -784,7 +786,7 @@ static int iucv_reboot_event(struct notifier_block *this,
         if (cpumask_empty(&iucv_irq_cpumask))
                 return NOTIFY_DONE;
  
-       get_online_cpus();
+       cpus_read_lock();
         on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1);
         preempt_disable();
         for (i = 0; i < iucv_max_pathid; i++) {
@@ -792,7 +794,7 @@ static int iucv_reboot_event(struct notifier_block *this,
                         iucv_sever_pathid(i, NULL);
         }
         preempt_enable();
-       put_online_cpus();
+       cpus_read_unlock();
         iucv_disable();
         return NOTIFY_DONE;
  }
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c

index 7180979..3086f4a 100644 (file)
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
  {
         u8 rc = LLC_PDU_LEN_U;
  
-       if (addr->sllc_test || addr->sllc_xid)
+       if (addr->sllc_test)
                 rc = LLC_PDU_LEN_U;
+       else if (addr->sllc_xid)
+               /* We need to expand header to sizeof(struct llc_xid_info)
+                * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header
+                * as XID PDU. In llc_ui_sendmsg() we reserved header size and then
+                * filled all other space with user data. If we won't reserve this
+                * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data
+                */
+               rc = LLC_PDU_LEN_U_XID;
         else if (sk->sk_type == SOCK_STREAM)
                 rc = LLC_PDU_LEN_I;
         return rc;
@@ -216,8 +224,7 @@ static int llc_ui_release(struct socket *sock)
         } else {
                 release_sock(sk);
         }
-       if (llc->dev)
-               dev_put(llc->dev);
+       dev_put(llc->dev);
         sock_put(sk);
         llc_sk_free(sk);
  out:
@@ -355,8 +362,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
         } else
                 llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
                                            addr->sllc_mac);
-       if (llc->dev)
-               dev_hold(llc->dev);
+       dev_hold(llc->dev);
         rcu_read_unlock();
         if (!llc->dev)
                 goto out;
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c

index b554f26..79d1cef 100644 (file)
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
         struct llc_sap_state_ev *ev = llc_sap_ev(skb);
         int rc;
  
-       llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+       llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap,
                             ev->daddr.lsap, LLC_PDU_CMD);
         llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
         rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c

index 84cc773..4e6f11e 100644 (file)
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -152,6 +152,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
                                   struct vif_params *params)
  {
         struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+       struct ieee80211_local *local = sdata->local;
+       struct sta_info *sta;
         int ret;
  
         ret = ieee80211_if_change_type(sdata, type);
@@ -162,7 +164,24 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
                 RCU_INIT_POINTER(sdata->u.vlan.sta, NULL);
                 ieee80211_check_fast_rx_iface(sdata);
         } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) {
+               struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+
+               if (params->use_4addr == ifmgd->use_4addr)
+                       return 0;
+
                 sdata->u.mgd.use_4addr = params->use_4addr;
+               if (!ifmgd->associated)
+                       return 0;
+
+               mutex_lock(&local->sta_mtx);
+               sta = sta_info_get(sdata, ifmgd->bssid);
+               if (sta)
+                       drv_sta_set_4addr(local, sdata, &sta->sta,
+                                         params->use_4addr);
+               mutex_unlock(&local->sta_mtx);
+
+               if (params->use_4addr)
+                       ieee80211_send_4addr_nullfunc(local, sdata);
         }
  
         if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h

index 22549b9..30ce6d2 100644 (file)
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2201,6 +2201,8 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t);
  void ieee80211_send_nullfunc(struct ieee80211_local *local,
                              struct ieee80211_sub_if_data *sdata,
                              bool powersave);
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+                                  struct ieee80211_sub_if_data *sdata);
  void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
                              struct ieee80211_hdr *hdr, bool ack, u16 tx_time);
  
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c

index a00f11a..c0ea3b1 100644 (file)
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1095,8 +1095,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
         ieee80211_tx_skb(sdata, skb);
  }
  
-static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
-                                         struct ieee80211_sub_if_data *sdata)
+void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
+                                  struct ieee80211_sub_if_data *sdata)
  {
         struct sk_buff *skb;
         struct ieee80211_hdr *nullfunc;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c

index 771921c..2563473 100644 (file)
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -730,7 +730,8 @@ ieee80211_make_monitor_skb(struct ieee80211_local *local,
                  * Need to make a copy and possibly remove radiotap header
                  * and FCS from the original.
                  */
-               skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC);
+               skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD,
+                                     0, GFP_ATOMIC);
  
                 if (!skb)
                         return NULL;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c

index e969811..8509778 100644 (file)
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1147,6 +1147,29 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
         return queued;
  }
  
+static void
+ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
+                    struct sta_info *sta,
+                    struct sk_buff *skb)
+{
+       struct rate_control_ref *ref = sdata->local->rate_ctrl;
+       u16 tid;
+
+       if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
+               return;
+
+       if (!sta || !sta->sta.ht_cap.ht_supported ||
+           !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
+           skb->protocol == sdata->control_port_protocol)
+               return;
+
+       tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+       if (likely(sta->ampdu_mlme.tid_tx[tid]))
+               return;
+
+       ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
+}
+
  /*
   * initialises @tx
   * pass %NULL for the station if unknown, a valid pointer if known
@@ -1160,6 +1183,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
         struct ieee80211_local *local = sdata->local;
         struct ieee80211_hdr *hdr;
         struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+       bool aggr_check = false;
         int tid;
  
         memset(tx, 0, sizeof(*tx));
@@ -1188,8 +1212,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
                 } else if (tx->sdata->control_port_protocol == tx->skb->protocol) {
                         tx->sta = sta_info_get_bss(sdata, hdr->addr1);
                 }
-               if (!tx->sta && !is_multicast_ether_addr(hdr->addr1))
+               if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) {
                         tx->sta = sta_info_get(sdata, hdr->addr1);
+                       aggr_check = true;
+               }
         }
  
         if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
@@ -1199,8 +1225,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
                 struct tid_ampdu_tx *tid_tx;
  
                 tid = ieee80211_get_tid(hdr);
-
                 tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+               if (!tid_tx && aggr_check) {
+                       ieee80211_aggr_check(sdata, tx->sta, skb);
+                       tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
+               }
+
                 if (tid_tx) {
                         bool queued;
  
@@ -4120,29 +4150,6 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
  }
  EXPORT_SYMBOL(ieee80211_txq_schedule_start);
  
-static void
-ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata,
-                    struct sta_info *sta,
-                    struct sk_buff *skb)
-{
-       struct rate_control_ref *ref = sdata->local->rate_ctrl;
-       u16 tid;
-
-       if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER))
-               return;
-
-       if (!sta || !sta->sta.ht_cap.ht_supported ||
-           !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO ||
-           skb->protocol == sdata->control_port_protocol)
-               return;
-
-       tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
-       if (likely(sta->ampdu_mlme.tid_tx[tid]))
-               return;
-
-       ieee80211_start_tx_ba_session(&sta->sta, tid, 0);
-}
-
  void __ieee80211_subif_start_xmit(struct sk_buff *skb,
                                   struct net_device *dev,
                                   u32 info_flags,
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c

index 84f722d..a9526ac 100644 (file)
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -170,7 +170,6 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
                 /* TODO: expand mctp_skb_cb for header fields? */
                 struct mctp_hdr *hdr = mctp_hdr(skb);
  
-               hdr = mctp_hdr(skb);
                 addr = msg->msg_name;
                 addr->smctp_family = AF_MCTP;
                 addr->smctp_network = cb->net;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c

index d2591eb..56263c2 100644 (file)
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry {
         struct mptcp_addr_info  addr;
         u8                      flags;
         int                     ifindex;
-       struct rcu_head         rcu;
         struct socket           *lsk;
  };
  
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 83c52df..5c03e51 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -670,8 +670,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
                 return false;
  
         tstamp = nf_conn_tstamp_find(ct);
-       if (tstamp && tstamp->stop == 0)
+       if (tstamp) {
+               s32 timeout = ct->timeout - nfct_time_stamp;
+
                 tstamp->stop = ktime_get_real_ns();
+               if (timeout < 0)
+                       tstamp->stop -= jiffies_to_nsecs(-timeout);
+       }
  
         if (nf_conntrack_event_report(IPCT_DESTROY, ct,
                                     portid, report) < 0) {
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c

index ec3dd1c..a106721 100644 (file)
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -321,7 +321,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add);
  void flow_offload_refresh(struct nf_flowtable *flow_table,
                           struct flow_offload *flow)
  {
-       flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+       u32 timeout;
+
+       timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+       if (READ_ONCE(flow->timeout) != timeout)
+               WRITE_ONCE(flow->timeout, timeout);
  
         if (likely(!nf_flowtable_hw_offload(flow_table)))
                 return;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c

index f92006c..2bfd9f1 100644 (file)
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -251,8 +251,7 @@ static int flow_offload_eth_src(struct net *net,
         flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
                             &val, &mask);
  
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
  
         return 0;
  }
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c

index 4903da8..6d12afa 100644 (file)
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -51,18 +51,14 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
         struct nf_hook_state *state = &entry->state;
  
         /* Release those devices we held, or Alexey will kill me. */
-       if (state->in)
-               dev_put(state->in);
-       if (state->out)
-               dev_put(state->out);
+       dev_put(state->in);
+       dev_put(state->out);
         if (state->sk)
                 sock_put(state->sk);
  
  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-       if (entry->physin)
-               dev_put(entry->physin);
-       if (entry->physout)
-               dev_put(entry->physout);
+       dev_put(entry->physin);
+       dev_put(entry->physout);
  #endif
  }
  
@@ -95,18 +91,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
  {
         struct nf_hook_state *state = &entry->state;
  
-       if (state->in)
-               dev_hold(state->in);
-       if (state->out)
-               dev_hold(state->out);
+       dev_hold(state->in);
+       dev_hold(state->out);
         if (state->sk)
                 sock_hold(state->sk);
  
  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-       if (entry->physin)
-               dev_hold(entry->physin);
-       if (entry->physout)
-               dev_hold(entry->physout);
+       dev_hold(entry->physin);
+       dev_hold(entry->physout);
  #endif
  }
  EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c

index de182d1..081437d 100644 (file)
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8445,6 +8445,16 @@ static int nf_tables_commit_audit_alloc(struct list_head *adl,
         return 0;
  }
  
+static void nf_tables_commit_audit_free(struct list_head *adl)
+{
+       struct nft_audit_data *adp, *adn;
+
+       list_for_each_entry_safe(adp, adn, adl, list) {
+               list_del(&adp->list);
+               kfree(adp);
+       }
+}
+
  static void nf_tables_commit_audit_collect(struct list_head *adl,
                                            struct nft_table *table, u32 op)
  {
@@ -8509,6 +8519,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                 ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table);
                 if (ret) {
                         nf_tables_commit_chain_prepare_cancel(net);
+                       nf_tables_commit_audit_free(&adl);
                         return ret;
                 }
                 if (trans->msg_type == NFT_MSG_NEWRULE ||
@@ -8518,6 +8529,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                         ret = nf_tables_commit_chain_prepare(net, chain);
                         if (ret < 0) {
                                 nf_tables_commit_chain_prepare_cancel(net);
+                               nf_tables_commit_audit_free(&adl);
                                 return ret;
                         }
                 }
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c

index 50b4e3c..202f57d 100644 (file)
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -174,7 +174,9 @@ static const struct nf_hook_entries *
  nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
  {
         const struct nf_hook_entries *hook_head = NULL;
+#ifdef CONFIG_NETFILTER_INGRESS
         struct net_device *netdev;
+#endif
  
         switch (pf) {
         case NFPROTO_IPV4:
diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c

index 8088b99..304e33c 100644 (file)
--- a/net/netfilter/nft_last.c
+++ b/net/netfilter/nft_last.c
@@ -48,24 +48,30 @@ static void nft_last_eval(const struct nft_expr *expr,
  {
         struct nft_last_priv *priv = nft_expr_priv(expr);
  
-       priv->last_jiffies = jiffies;
-       priv->last_set = 1;
+       if (READ_ONCE(priv->last_jiffies) != jiffies)
+               WRITE_ONCE(priv->last_jiffies, jiffies);
+       if (READ_ONCE(priv->last_set) == 0)
+               WRITE_ONCE(priv->last_set, 1);
  }
  
  static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr)
  {
         struct nft_last_priv *priv = nft_expr_priv(expr);
+       unsigned long last_jiffies = READ_ONCE(priv->last_jiffies);
+       u32 last_set = READ_ONCE(priv->last_set);
         __be64 msecs;
  
-       if (time_before(jiffies, priv->last_jiffies))
-               priv->last_set = 0;
+       if (time_before(jiffies, last_jiffies)) {
+               WRITE_ONCE(priv->last_set, 0);
+               last_set = 0;
+       }
  
-       if (priv->last_set)
-               msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies);
+       if (last_set)
+               msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies);
         else
                 msecs = 0;
  
-       if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) ||
+       if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) ||
             nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD))
                 goto nla_put_failure;
  
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c

index 0840c63..be1595d 100644 (file)
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -201,7 +201,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
                 alen = sizeof_field(struct nf_nat_range, min_addr.ip6);
                 break;
         default:
-               return -EAFNOSUPPORT;
+               if (tb[NFTA_NAT_REG_ADDR_MIN])
+                       return -EAFNOSUPPORT;
+               break;
         }
         priv->family = family;
  
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c

index 2483df0..566ba43 100644 (file)
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -492,8 +492,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
                 netlbl_af4list_audit_addr(audit_buf, 1,
                                           (dev != NULL ? dev->name : NULL),
                                           addr->s_addr, mask->s_addr);
-               if (dev != NULL)
-                       dev_put(dev);
+               dev_put(dev);
                 if (entry != NULL &&
                     security_secid_to_secctx(entry->secid,
                                              &secctx, &secctx_len) == 0) {
@@ -553,8 +552,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
                 netlbl_af6list_audit_addr(audit_buf, 1,
                                           (dev != NULL ? dev->name : NULL),
                                           addr, mask);
-               if (dev != NULL)
-                       dev_put(dev);
+               dev_put(dev);
                 if (entry != NULL &&
                     security_secid_to_secctx(entry->secid,
                                              &secctx, &secctx_len) == 0) {
diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c

index a880dd3..511819f 100644 (file)
--- a/net/netrom/nr_loopback.c
+++ b/net/netrom/nr_loopback.c
@@ -59,8 +59,7 @@ static void nr_loopback_timer(struct timer_list *unused)
                 if (dev == NULL || nr_rx_frame(skb, dev) == 0)
                         kfree_skb(skb);
  
-               if (dev != NULL)
-                       dev_put(dev);
+               dev_put(dev);
  
                 if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running())
                         mod_timer(&loopback_timer, jiffies + 10);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c

index de04560..ddd5cbd 100644 (file)
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -582,8 +582,7 @@ struct net_device *nr_dev_first(void)
                         if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
                                 first = dev;
         }
-       if (first)
-               dev_hold(first);
+       dev_hold(first);
         rcu_read_unlock();
  
         return first;
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c

index 80a5c2a..82ab39d 100644 (file)
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -95,8 +95,8 @@ static void nci_req_cancel(struct nci_dev *ndev, int err)
  
  /* Execute request and wait for completion. */
  static int __nci_request(struct nci_dev *ndev,
-                        void (*req)(struct nci_dev *ndev, unsigned long opt),
-                        unsigned long opt, __u32 timeout)
+                        void (*req)(struct nci_dev *ndev, const void *opt),
+                        const void *opt, __u32 timeout)
  {
         int rc = 0;
         long completion_rc;
@@ -139,8 +139,8 @@ static int __nci_request(struct nci_dev *ndev,
  
  inline int nci_request(struct nci_dev *ndev,
                        void (*req)(struct nci_dev *ndev,
-                                  unsigned long opt),
-                      unsigned long opt, __u32 timeout)
+                                  const void *opt),
+                      const void *opt, __u32 timeout)
  {
         int rc;
  
@@ -155,7 +155,7 @@ inline int nci_request(struct nci_dev *ndev,
         return rc;
  }
  
-static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_reset_req(struct nci_dev *ndev, const void *opt)
  {
         struct nci_core_reset_cmd cmd;
  
@@ -163,17 +163,17 @@ static void nci_reset_req(struct nci_dev *ndev, unsigned long opt)
         nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd);
  }
  
-static void nci_init_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_req(struct nci_dev *ndev, const void *opt)
  {
         u8 plen = 0;
  
         if (opt)
                 plen = sizeof(struct nci_core_init_v2_cmd);
  
-       nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt);
+       nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt);
  }
  
-static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_init_complete_req(struct nci_dev *ndev, const void *opt)
  {
         struct nci_rf_disc_map_cmd cmd;
         struct disc_map_config *cfg = cmd.mapping_configs;
@@ -215,10 +215,9 @@ struct nci_set_config_param {
         const __u8      *val;
  };
  
-static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_set_config_req(struct nci_dev *ndev, const void *opt)
  {
-       const struct nci_set_config_param *param =
-               (struct nci_set_config_param *)opt;
+       const struct nci_set_config_param *param = opt;
         struct nci_core_set_config_cmd cmd;
  
         BUG_ON(param->len > NCI_MAX_PARAM_LEN);
@@ -236,10 +235,9 @@ struct nci_rf_discover_param {
         __u32   tm_protocols;
  };
  
-static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt)
  {
-       const struct nci_rf_discover_param *param =
-               (struct nci_rf_discover_param *)opt;
+       const struct nci_rf_discover_param *param = opt;
         struct nci_rf_disc_cmd cmd;
  
         cmd.num_disc_configs = 0;
@@ -302,10 +300,9 @@ struct nci_rf_discover_select_param {
         __u8    rf_protocol;
  };
  
-static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt)
  {
-       const struct nci_rf_discover_select_param *param =
-               (struct nci_rf_discover_select_param *)opt;
+       const struct nci_rf_discover_select_param *param = opt;
         struct nci_rf_discover_select_cmd cmd;
  
         cmd.rf_discovery_id = param->rf_discovery_id;
@@ -329,11 +326,11 @@ static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt)
                      sizeof(struct nci_rf_discover_select_cmd), &cmd);
  }
  
-static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt)
  {
         struct nci_rf_deactivate_cmd cmd;
  
-       cmd.type = opt;
+       cmd.type = (unsigned long)opt;
  
         nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD,
                      sizeof(struct nci_rf_deactivate_cmd), &cmd);
@@ -345,10 +342,9 @@ struct nci_cmd_param {
         const __u8 *payload;
  };
  
-static void nci_generic_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_generic_req(struct nci_dev *ndev, const void *opt)
  {
-       const struct nci_cmd_param *param =
-               (struct nci_cmd_param *)opt;
+       const struct nci_cmd_param *param = opt;
  
         nci_send_cmd(ndev, param->opcode, param->len, param->payload);
  }
@@ -361,7 +357,7 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload
         param.len = len;
         param.payload = payload;
  
-       return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+       return __nci_request(ndev, nci_generic_req, &param,
                              msecs_to_jiffies(NCI_CMD_TIMEOUT));
  }
  EXPORT_SYMBOL(nci_prop_cmd);
@@ -375,21 +371,21 @@ int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len,
         param.len = len;
         param.payload = payload;
  
-       return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+       return __nci_request(ndev, nci_generic_req, &param,
                              msecs_to_jiffies(NCI_CMD_TIMEOUT));
  }
  EXPORT_SYMBOL(nci_core_cmd);
  
  int nci_core_reset(struct nci_dev *ndev)
  {
-       return __nci_request(ndev, nci_reset_req, 0,
+       return __nci_request(ndev, nci_reset_req, (void *)0,
                              msecs_to_jiffies(NCI_RESET_TIMEOUT));
  }
  EXPORT_SYMBOL(nci_core_reset);
  
  int nci_core_init(struct nci_dev *ndev)
  {
-       return __nci_request(ndev, nci_init_req, 0,
+       return __nci_request(ndev, nci_init_req, (void *)0,
                              msecs_to_jiffies(NCI_INIT_TIMEOUT));
  }
  EXPORT_SYMBOL(nci_core_init);
@@ -399,9 +395,9 @@ struct nci_loopback_data {
         struct sk_buff *data;
  };
  
-static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_send_data_req(struct nci_dev *ndev, const void *opt)
  {
-       const struct nci_loopback_data *data = (struct nci_loopback_data *)opt;
+       const struct nci_loopback_data *data = opt;
  
         nci_send_data(ndev, data->conn_id, data->data);
  }
@@ -462,7 +458,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len,
         loopback_data.data = skb;
  
         ndev->cur_conn_id = conn_id;
-       r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data,
+       r = nci_request(ndev, nci_send_data_req, &loopback_data,
                         msecs_to_jiffies(NCI_DATA_TIMEOUT));
         if (r == NCI_STATUS_OK && resp)
                 *resp = conn_info->rx_skb;
@@ -495,7 +491,7 @@ static int nci_open_device(struct nci_dev *ndev)
                 rc = ndev->ops->init(ndev);
  
         if (!rc) {
-               rc = __nci_request(ndev, nci_reset_req, 0,
+               rc = __nci_request(ndev, nci_reset_req, (void *)0,
                                    msecs_to_jiffies(NCI_RESET_TIMEOUT));
         }
  
@@ -508,10 +504,10 @@ static int nci_open_device(struct nci_dev *ndev)
                         .feature1 = NCI_FEATURE_DISABLE,
                         .feature2 = NCI_FEATURE_DISABLE
                 };
-               unsigned long opt = 0;
+               const void *opt = NULL;
  
                 if (ndev->nci_ver & NCI_VER_2_MASK)
-                       opt = (unsigned long)&nci_init_v2_cmd;
+                       opt = &nci_init_v2_cmd;
  
                 rc = __nci_request(ndev, nci_init_req, opt,
                                    msecs_to_jiffies(NCI_INIT_TIMEOUT));
@@ -521,7 +517,7 @@ static int nci_open_device(struct nci_dev *ndev)
                 rc = ndev->ops->post_setup(ndev);
  
         if (!rc) {
-               rc = __nci_request(ndev, nci_init_complete_req, 0,
+               rc = __nci_request(ndev, nci_init_complete_req, (void *)0,
                                    msecs_to_jiffies(NCI_INIT_TIMEOUT));
         }
  
@@ -571,7 +567,7 @@ static int nci_close_device(struct nci_dev *ndev)
         atomic_set(&ndev->cmd_cnt, 1);
  
         set_bit(NCI_INIT, &ndev->flags);
-       __nci_request(ndev, nci_reset_req, 0,
+       __nci_request(ndev, nci_reset_req, (void *)0,
                       msecs_to_jiffies(NCI_RESET_TIMEOUT));
  
         /* After this point our queues are empty
@@ -637,15 +633,15 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val)
         param.len = len;
         param.val = val;
  
-       return __nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+       return __nci_request(ndev, nci_set_config_req, &param,
                              msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
  }
  EXPORT_SYMBOL(nci_set_config);
  
-static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt)
  {
         struct nci_nfcee_discover_cmd cmd;
-       __u8 action = opt;
+       __u8 action = (unsigned long)opt;
  
         cmd.discovery_action = action;
  
@@ -654,15 +650,16 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
  
  int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
  {
-       return __nci_request(ndev, nci_nfcee_discover_req, action,
+       unsigned long opt = action;
+
+       return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt,
                                 msecs_to_jiffies(NCI_CMD_TIMEOUT));
  }
  EXPORT_SYMBOL(nci_nfcee_discover);
  
-static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt)
  {
-       const struct nci_nfcee_mode_set_cmd *cmd =
-                                       (struct nci_nfcee_mode_set_cmd *)opt;
+       const struct nci_nfcee_mode_set_cmd *cmd = opt;
  
         nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD,
                      sizeof(struct nci_nfcee_mode_set_cmd), cmd);
@@ -675,16 +672,14 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
         cmd.nfcee_id = nfcee_id;
         cmd.nfcee_mode = nfcee_mode;
  
-       return __nci_request(ndev, nci_nfcee_mode_set_req,
-                            (unsigned long)&cmd,
+       return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd,
                              msecs_to_jiffies(NCI_CMD_TIMEOUT));
  }
  EXPORT_SYMBOL(nci_nfcee_mode_set);
  
-static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt)
  {
-       const struct core_conn_create_data *data =
-                                       (struct core_conn_create_data *)opt;
+       const struct core_conn_create_data *data = opt;
  
         nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd);
  }
@@ -721,24 +716,26 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
         }
         ndev->cur_dest_type = destination_type;
  
-       r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data,
+       r = __nci_request(ndev, nci_core_conn_create_req, &data,
                           msecs_to_jiffies(NCI_CMD_TIMEOUT));
         kfree(cmd);
         return r;
  }
  EXPORT_SYMBOL(nci_core_conn_create);
  
-static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt)
  {
-       __u8 conn_id = opt;
+       __u8 conn_id = (unsigned long)opt;
  
         nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id);
  }
  
  int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
  {
+       unsigned long opt = conn_id;
+
         ndev->cur_conn_id = conn_id;
-       return __nci_request(ndev, nci_core_conn_close_req, conn_id,
+       return __nci_request(ndev, nci_core_conn_close_req, (void *)opt,
                              msecs_to_jiffies(NCI_CMD_TIMEOUT));
  }
  EXPORT_SYMBOL(nci_core_conn_close);
@@ -758,14 +755,14 @@ static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
  
         param.id = NCI_PN_ATR_REQ_GEN_BYTES;
  
-       rc = nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+       rc = nci_request(ndev, nci_set_config_req, &param,
                          msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
         if (rc)
                 return rc;
  
         param.id = NCI_LN_ATR_RES_GEN_BYTES;
  
-       return nci_request(ndev, nci_set_config_req, (unsigned long)&param,
+       return nci_request(ndev, nci_set_config_req, &param,
                            msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT));
  }
  
@@ -815,7 +812,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
                 pr_debug("target active or w4 select, implicitly deactivate\n");
  
                 rc = nci_request(ndev, nci_rf_deactivate_req,
-                                NCI_DEACTIVATE_TYPE_IDLE_MODE,
+                                (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
                                  msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
                 if (rc)
                         return -EBUSY;
@@ -837,7 +834,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev,
  
         param.im_protocols = im_protocols;
         param.tm_protocols = tm_protocols;
-       rc = nci_request(ndev, nci_rf_discover_req, (unsigned long)&param,
+       rc = nci_request(ndev, nci_rf_discover_req, &param,
                          msecs_to_jiffies(NCI_RF_DISC_TIMEOUT));
  
         if (!rc)
@@ -856,7 +853,8 @@ static void nci_stop_poll(struct nfc_dev *nfc_dev)
                 return;
         }
  
-       nci_request(ndev, nci_rf_deactivate_req, NCI_DEACTIVATE_TYPE_IDLE_MODE,
+       nci_request(ndev, nci_rf_deactivate_req,
+                   (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE,
                     msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
  }
  
@@ -915,8 +913,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
                 else
                         param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP;
  
-               rc = nci_request(ndev, nci_rf_discover_select_req,
-                                (unsigned long)&param,
+               rc = nci_request(ndev, nci_rf_discover_select_req, &param,
                                  msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT));
         }
  
@@ -931,7 +928,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
                                   __u8 mode)
  {
         struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
-       u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
+       unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
  
         pr_debug("entry\n");
  
@@ -949,7 +946,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
         }
  
         if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
-               nci_request(ndev, nci_rf_deactivate_req, nci_mode,
+               nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode,
                             msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
         }
  }
@@ -987,8 +984,8 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev)
         } else {
                 if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
                     atomic_read(&ndev->state) == NCI_DISCOVERY) {
-                       nci_request(ndev, nci_rf_deactivate_req, 0,
-                               msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
+                       nci_request(ndev, nci_rf_deactivate_req, (void *)0,
+                                   msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
                 }
  
                 rc = nfc_tm_deactivated(nfc_dev);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c

index a8ff794..e199912 100644 (file)
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -195,9 +195,9 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
         return i;
  }
  
-static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt)
  {
-       const struct nci_data *data = (struct nci_data *)opt;
+       const struct nci_data *data = opt;
  
         nci_hci_send_data(ndev, data->pipe, data->cmd,
                           data->data, data->data_len);
@@ -240,7 +240,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
         data.data = param;
         data.data_len = param_len;
  
-       r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+       r = nci_request(ndev, nci_hci_send_data_req, &data,
                         msecs_to_jiffies(NCI_DATA_TIMEOUT));
         if (r == NCI_STATUS_OK) {
                 message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -511,9 +511,8 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
         data.data = NULL;
         data.data_len = 0;
  
-       return nci_request(ndev, nci_hci_send_data_req,
-                       (unsigned long)&data,
-                       msecs_to_jiffies(NCI_DATA_TIMEOUT));
+       return nci_request(ndev, nci_hci_send_data_req, &data,
+                          msecs_to_jiffies(NCI_DATA_TIMEOUT));
  }
  EXPORT_SYMBOL(nci_hci_open_pipe);
  
@@ -587,8 +586,7 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
         data.data = tmp;
         data.data_len = param_len + 1;
  
-       r = nci_request(ndev, nci_hci_send_data_req,
-                       (unsigned long)&data,
+       r = nci_request(ndev, nci_hci_send_data_req, &data,
                         msecs_to_jiffies(NCI_DATA_TIMEOUT));
         if (r == NCI_STATUS_OK) {
                 message = (struct nci_hcp_message *)conn_info->rx_skb->data;
@@ -627,7 +625,7 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
         data.data = &idx;
         data.data_len = 1;
  
-       r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+       r = nci_request(ndev, nci_hci_send_data_req, &data,
                         msecs_to_jiffies(NCI_DATA_TIMEOUT));
  
         if (r == NCI_STATUS_OK) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c

index 57a1971..543365f 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -250,8 +250,7 @@ static struct net_device *packet_cached_dev_get(struct packet_sock *po)
  
         rcu_read_lock();
         dev = rcu_dereference(po->cached_dev);
-       if (likely(dev))
-               dev_hold(dev);
+       dev_hold(dev);
         rcu_read_unlock();
  
         return dev;
@@ -3024,8 +3023,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
  out_free:
         kfree_skb(skb);
  out_unlock:
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
  out:
         return err;
  }
@@ -3158,8 +3156,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                 }
         }
  
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
  
         proto_curr = po->prot_hook.type;
         dev_curr = po->prot_hook.dev;
@@ -3196,8 +3193,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
                         packet_cached_dev_assign(po, dev);
                 }
         }
-       if (dev_curr)
-               dev_put(dev_curr);
+       dev_put(dev_curr);
  
         if (proto == 0 || !need_rehook)
                 goto out_unlock;
@@ -4109,8 +4105,7 @@ static int packet_notifier(struct notifier_block *this,
                                 if (msg == NETDEV_UNREGISTER) {
                                         packet_cached_dev_reset(po);
                                         WRITE_ONCE(po->ifindex, -1);
-                                       if (po->prot_hook.dev)
-                                               dev_put(po->prot_hook.dev);
+                                       dev_put(po->prot_hook.dev);
                                         po->prot_hook.dev = NULL;
                                 }
                                 spin_unlock(&po->bind_lock);
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c

index ca6ae4c..65218b7 100644 (file)
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -275,8 +275,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
  
  drop:
         kfree_skb(skb);
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
         return err;
  }
  EXPORT_SYMBOL(pn_skb_send);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c

index 876d0ae..cde671d 100644 (file)
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -122,8 +122,7 @@ struct net_device *phonet_device_get(struct net *net)
                         break;
                 dev = NULL;
         }
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
         rcu_read_unlock();
         return dev;
  }
@@ -411,8 +410,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr)
         daddr >>= 2;
         rcu_read_lock();
         dev = rcu_dereference(routes->table[daddr]);
-       if (dev)
-               dev_hold(dev);
+       dev_hold(dev);
         rcu_read_unlock();
  
         if (!dev)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c

index 2599235..71e2caf 100644 (file)
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -379,8 +379,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
                         saddr = PN_NO_ADDR;
                 release_sock(sk);
  
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
                 if (saddr == PN_NO_ADDR)
                         return -EHOSTUNREACH;
  
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c

index fa61167..1dc955c 100644 (file)
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -15,6 +15,7 @@ struct qrtr_mhi_dev {
         struct qrtr_endpoint ep;
         struct mhi_device *mhi_dev;
         struct device *dev;
+       struct completion ready;
  };
  
  /* From MHI to QRTR */
@@ -50,6 +51,10 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
         struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
         int rc;
  
+       rc = wait_for_completion_interruptible(&qdev->ready);
+       if (rc)
+               goto free_skb;
+
         if (skb->sk)
                 sock_hold(skb->sk);
  
@@ -79,7 +84,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
         int rc;
  
         /* start channels */
-       rc = mhi_prepare_for_transfer(mhi_dev);
+       rc = mhi_prepare_for_transfer(mhi_dev, 0);
         if (rc)
                 return rc;
  
@@ -96,6 +101,15 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
         if (rc)
                 return rc;
  
+       /* start channels */
+       rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS);
+       if (rc) {
+               qrtr_endpoint_unregister(&qdev->ep);
+               dev_set_drvdata(&mhi_dev->dev, NULL);
+               return rc;
+       }
+
+       complete_all(&qdev->ready);
         dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
  
         return 0;
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c

index e718478..6c61b7b 100644 (file)
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -518,8 +518,10 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
                 if (!ipc)
                         goto err;
  
-               if (sock_queue_rcv_skb(&ipc->sk, skb))
+               if (sock_queue_rcv_skb(&ipc->sk, skb)) {
+                       qrtr_port_put(ipc);
                         goto err;
+               }
  
                 qrtr_port_put(ipc);
         }
@@ -839,6 +841,8 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
  
         ipc = qrtr_port_lookup(to->sq_port);
         if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
+               if (ipc)
+                       qrtr_port_put(ipc);
                 kfree_skb(skb);
                 return -ENODEV;
         }
diff --git a/net/sched/act_api.c b/net/sched/act_api.c

index 998a237..7dd3a2d 100644 (file)
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -495,7 +495,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
         p->tcfa_tm.install = jiffies;
         p->tcfa_tm.lastuse = jiffies;
         p->tcfa_tm.firstuse = 0;
-       p->tcfa_flags = flags;
+       p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK;
         if (est) {
                 err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
                                         &p->tcfa_rate_est,
@@ -941,7 +941,7 @@ void tcf_idr_insert_many(struct tc_action *actions[])
         }
  }
  
-struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
                                          bool rtnl_held,
                                          struct netlink_ext_ack *extack)
  {
@@ -951,7 +951,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
         struct nlattr *kind;
         int err;
  
-       if (name == NULL) {
+       if (!police) {
                 err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
                                                   tcf_action_policy, extack);
                 if (err < 0)
@@ -967,7 +967,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
                         return ERR_PTR(err);
                 }
         } else {
-               if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) {
+               if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) {
                         NL_SET_ERR_MSG(extack, "TC action name too long");
                         return ERR_PTR(-EINVAL);
                 }
@@ -1004,12 +1004,11 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla,
  
  struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                                     struct nlattr *nla, struct nlattr *est,
-                                   char *name, int ovr, int bind,
                                     struct tc_action_ops *a_o, int *init_res,
-                                   bool rtnl_held,
-                                   struct netlink_ext_ack *extack)
+                                   u32 flags, struct netlink_ext_ack *extack)
  {
-       struct nla_bitfield32 flags = { 0, 0 };
+       bool police = flags & TCA_ACT_FLAGS_POLICE;
+       struct nla_bitfield32 userflags = { 0, 0 };
         u8 hw_stats = TCA_ACT_HW_STATS_ANY;
         struct nlattr *tb[TCA_ACT_MAX + 1];
         struct tc_cookie *cookie = NULL;
@@ -1017,7 +1016,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
         int err;
  
         /* backward compatibility for policer */
-       if (name == NULL) {
+       if (!police) {
                 err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla,
                                                   tcf_action_policy, extack);
                 if (err < 0)
@@ -1032,22 +1031,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
                 }
                 hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]);
                 if (tb[TCA_ACT_FLAGS])
-                       flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
+                       userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]);
  
-               err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-                               rtnl_held, tp, flags.value, extack);
+               err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp,
+                               userflags.value | flags, extack);
         } else {
-               err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
-                               tp, flags.value, extack);
+               err = a_o->init(net, nla, est, &a, tp, userflags.value | flags,
+                               extack);
         }
         if (err < 0)
                 goto err_out;
         *init_res = err;
  
-       if (!name && tb[TCA_ACT_COOKIE])
+       if (!police && tb[TCA_ACT_COOKIE])
                 tcf_set_action_cookie(&a->act_cookie, cookie);
  
-       if (!name)
+       if (!police)
                 a->hw_stats = hw_stats;
  
         return a;
@@ -1063,9 +1062,9 @@ err_out:
  /* Returns numbers of initialized actions or negative error. */
  
  int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
-                   struct nlattr *est, char *name, int ovr, int bind,
-                   struct tc_action *actions[], int init_res[], size_t *attr_size,
-                   bool rtnl_held, struct netlink_ext_ack *extack)
+                   struct nlattr *est, struct tc_action *actions[],
+                   int init_res[], size_t *attr_size, u32 flags,
+                   struct netlink_ext_ack *extack)
  {
         struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
         struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -1082,7 +1081,9 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
         for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
                 struct tc_action_ops *a_o;
  
-               a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack);
+               a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE,
+                                        !(flags & TCA_ACT_FLAGS_NO_RTNL),
+                                        extack);
                 if (IS_ERR(a_o)) {
                         err = PTR_ERR(a_o);
                         goto err_mod;
@@ -1091,9 +1092,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
         }
  
         for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
-               act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind,
-                                       ops[i - 1], &init_res[i - 1], rtnl_held,
-                                       extack);
+               act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1],
+                                       &init_res[i - 1], flags, extack);
                 if (IS_ERR(act)) {
                         err = PTR_ERR(act);
                         goto err;
@@ -1113,7 +1113,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
         goto err_mod;
  
  err:
-       tcf_action_destroy(actions, bind);
+       tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
  err_mod:
         for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
                 if (ops[i])
@@ -1495,7 +1495,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
  }
  
  static int tcf_action_add(struct net *net, struct nlattr *nla,
-                         struct nlmsghdr *n, u32 portid, int ovr,
+                         struct nlmsghdr *n, u32 portid, u32 flags,
                           struct netlink_ext_ack *extack)
  {
         size_t attr_size = 0;
@@ -1504,8 +1504,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
         int init_res[TCA_ACT_MAX_PRIO] = {};
  
         for (loop = 0; loop < 10; loop++) {
-               ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
-                                     actions, init_res, &attr_size, true, extack);
+               ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res,
+                                     &attr_size, flags, extack);
                 if (ret != -EAGAIN)
                         break;
         }
@@ -1535,7 +1535,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
         struct net *net = sock_net(skb->sk);
         struct nlattr *tca[TCA_ROOT_MAX + 1];
         u32 portid = NETLINK_CB(skb).portid;
-       int ret = 0, ovr = 0;
+       u32 flags = 0;
+       int ret = 0;
  
         if ((n->nlmsg_type != RTM_GETACTION) &&
             !netlink_capable(skb, CAP_NET_ADMIN))
@@ -1561,8 +1562,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
                  * is zero) then just set this
                  */
                 if (n->nlmsg_flags & NLM_F_REPLACE)
-                       ovr = 1;
-               ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
+                       flags = TCA_ACT_FLAGS_REPLACE;
+               ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags,
                                      extack);
                 break;
         case RTM_DELACTION:
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c

index e409a00..040807a 100644 (file)
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -275,11 +275,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
  
  static int tcf_bpf_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **act,
-                       int replace, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, bpf_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         struct tcf_bpf_cfg cfg, old;
@@ -317,7 +317,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
                 if (bind)
                         return 0;
  
-               if (!replace) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         tcf_idr_release(*act, bind);
                         return -EEXIST;
                 }
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c

index e19885d..94e78ac 100644 (file)
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -96,12 +96,12 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
  
  static int tcf_connmark_init(struct net *net, struct nlattr *nla,
                              struct nlattr *est, struct tc_action **a,
-                            int ovr, int bind, bool rtnl_held,
                              struct tcf_proto *tp, u32 flags,
                              struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, connmark_net_id);
         struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct tcf_chain *goto_ch = NULL;
         struct tcf_connmark_info *ci;
         struct tc_connmark *parm;
@@ -144,7 +144,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
                 ci = to_connmark(*a);
                 if (bind)
                         return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         tcf_idr_release(*a, bind);
                         return -EEXIST;
                 }
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c

index 4fa4fcb..a15ec95 100644 (file)
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -41,11 +41,12 @@ static unsigned int csum_net_id;
  static struct tc_action_ops act_csum_ops;
  
  static int tcf_csum_init(struct net *net, struct nlattr *nla,
-                        struct nlattr *est, struct tc_action **a, int ovr,
-                        int bind, bool rtnl_held, struct tcf_proto *tp,
+                        struct nlattr *est, struct tc_action **a,
+                        struct tcf_proto *tp,
                          u32 flags, struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, csum_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct tcf_csum_params *params_new;
         struct nlattr *tb[TCA_CSUM_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
@@ -78,7 +79,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
         } else if (err > 0) {
                 if (bind)/* dont override defaults */
                         return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         tcf_idr_release(*a, bind);
                         return -EEXIST;
                 }
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c

index 1b4b351..ad9df0c 100644 (file)
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1235,11 +1235,11 @@ static int tcf_ct_fill_params(struct net *net,
  
  static int tcf_ct_init(struct net *net, struct nlattr *nla,
                        struct nlattr *est, struct tc_action **a,
-                      int replace, int bind, bool rtnl_held,
                        struct tcf_proto *tp, u32 flags,
                        struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, ct_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct tcf_ct_params *params = NULL;
         struct nlattr *tb[TCA_CT_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
@@ -1279,7 +1279,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
                 if (bind)
                         return 0;
  
-               if (!replace) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         tcf_idr_release(*a, bind);
                         return -EEXIST;
                 }
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c

index b20c8ce..549374a 100644 (file)
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -154,11 +154,11 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
  
  static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                            struct tcf_proto *tp, u32 flags,
                            struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         u32 dscpmask = 0, dscpstatemask, index;
         struct nlattr *tb[TCA_CTINFO_MAX + 1];
         struct tcf_ctinfo_params *cp_new;
@@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
         } else if (err > 0) {
                 if (bind) /* don't override defaults */
                         return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         tcf_idr_release(*a, bind);
                         return -EEXIST;
                 }
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c

index 73c3926..d8dce17 100644 (file)
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -52,11 +52,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
  
  static int tcf_gact_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                          struct tcf_proto *tp, u32 flags,
                          struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, gact_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_GACT_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         struct tc_gact *parm;
@@ -109,7 +109,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
         } else if (err > 0) {
                 if (bind)/* dont override defaults */
                         return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         tcf_idr_release(*a, bind);
                         return -EEXIST;
                 }
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c

index a78cb79..7df72a4 100644 (file)
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -295,12 +295,12 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
  
  static int tcf_gate_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                          struct tcf_proto *tp, u32 flags,
                          struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, gate_net_id);
         enum tk_offsets tk_offset = TK_OFFS_TAI;
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_GATE_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         u64 cycletime = 0, basetime = 0;
@@ -364,7 +364,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
                 }
  
                 ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                 tcf_idr_release(*a, bind);
                 return -EEXIST;
         }
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c

index a2ddea0..7064a36 100644 (file)
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -479,11 +479,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
  
  static int tcf_ife_init(struct net *net, struct nlattr *nla,
                         struct nlattr *est, struct tc_action **a,
-                       int ovr, int bind, bool rtnl_held,
                         struct tcf_proto *tp, u32 flags,
                         struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, ife_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_IFE_MAX + 1];
         struct nlattr *tb2[IFE_META_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
@@ -532,7 +532,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
                         kfree(p);
                         return err;
                 }
-               err = load_metalist(tb2, rtnl_held);
+               err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL));
                 if (err) {
                         kfree(p);
                         return err;
@@ -560,7 +560,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
                         return ret;
                 }
                 ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                 tcf_idr_release(*a, bind);
                 kfree(p);
                 return -EEXIST;
@@ -600,7 +600,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
         }
  
         if (tb[TCA_IFE_METALST]) {
-               err = populate_metalist(ife, tb2, exists, rtnl_held);
+               err = populate_metalist(ife, tb2, exists,
+                                       !(flags & TCA_ACT_FLAGS_NO_RTNL));
                 if (err)
                         goto metadata_parse_err;
         } else {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c

index ac7297f..265b144 100644 (file)
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -94,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
  
  static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                         const struct tc_action_ops *ops, int ovr, int bind,
+                         const struct tc_action_ops *ops,
                           struct tcf_proto *tp, u32 flags)
  {
         struct tc_action_net *tn = net_generic(net, id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_IPT_MAX + 1];
         struct tcf_ipt *ipt;
         struct xt_entry_target *td, *t;
@@ -154,7 +155,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
                 if (bind)/* dont override defaults */
                         return 0;
  
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         tcf_idr_release(*a, bind);
                         return -EEXIST;
                 }
@@ -201,21 +202,21 @@ err1:
  }
  
  static int tcf_ipt_init(struct net *net, struct nlattr *nla,
-                       struct nlattr *est, struct tc_action **a, int ovr,
-                       int bind, bool rtnl_held, struct tcf_proto *tp,
+                       struct nlattr *est, struct tc_action **a,
+                       struct tcf_proto *tp,
                         u32 flags, struct netlink_ext_ack *extack)
  {
-       return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
-                             bind, tp, flags);
+       return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops,
+                             tp, flags);
  }
  
  static int tcf_xt_init(struct net *net, struct nlattr *nla,
-                      struct nlattr *est, struct tc_action **a, int ovr,
-                      int bind, bool unlocked, struct tcf_proto *tp,
+                      struct nlattr *est, struct tc_action **a,
+                      struct tcf_proto *tp,
                        u32 flags, struct netlink_ext_ack *extack)
  {
-       return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
-                             bind, tp, flags);
+       return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops,
+                             tp, flags);
  }
  
  static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c

index 7153c67..37f51d7 100644 (file)
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -78,8 +78,7 @@ static void tcf_mirred_release(struct tc_action *a)
  
         /* last reference to action, no need to lock */
         dev = rcu_dereference_protected(m->tcfm_dev, 1);
-       if (dev)
-               dev_put(dev);
+       dev_put(dev);
  }
  
  static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -91,11 +90,11 @@ static struct tc_action_ops act_mirred_ops;
  
  static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                            struct tcf_proto *tp,
                            u32 flags, struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, mirred_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_MIRRED_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         bool mac_header_xmit = false;
@@ -155,7 +154,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                         return ret;
                 }
                 ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                 tcf_idr_release(*a, bind);
                 return -EEXIST;
         }
@@ -180,8 +179,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                 mac_header_xmit = dev_is_mac_header_xmit(dev);
                 dev = rcu_replace_pointer(m->tcfm_dev, dev,
                                           lockdep_is_held(&m->tcf_lock));
-               if (dev)
-                       dev_put(dev);
+               dev_put(dev);
                 m->tcfm_mac_header_xmit = mac_header_xmit;
         }
         goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c

index d1486ea..e4529b4 100644 (file)
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -152,11 +152,11 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
  
  static int tcf_mpls_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                          struct tcf_proto *tp, u32 flags,
                          struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, mpls_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_MPLS_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         struct tcf_mpls_params *p;
@@ -255,7 +255,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
                 }
  
                 ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                 tcf_idr_release(*a, bind);
                 return -EEXIST;
         }
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c

index 1ebd2a8..7dd6b58 100644 (file)
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -34,11 +34,11 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
  };
  
  static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
-                       struct tc_action **a, int ovr, int bind,
-                       bool rtnl_held, struct tcf_proto *tp,
+                       struct tc_action **a, struct tcf_proto *tp,
                         u32 flags, struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, nat_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_NAT_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         struct tc_nat *parm;
@@ -70,7 +70,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
         } else if (err > 0) {
                 if (bind)
                         return 0;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         tcf_idr_release(*a, bind);
                         return -EEXIST;
                 }
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c

index b453044..c6c862c 100644 (file)
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -136,11 +136,11 @@ nla_failure:
  
  static int tcf_pedit_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a,
-                         int ovr, int bind, bool rtnl_held,
                           struct tcf_proto *tp, u32 flags,
                           struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, pedit_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_PEDIT_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         struct tc_pedit_key *keys = NULL;
@@ -198,7 +198,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
         } else if (err > 0) {
                 if (bind)
                         goto out_free;
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         ret = -EEXIST;
                         goto out_release;
                 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c

index 0fab8de..832157a 100644 (file)
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -48,11 +48,11 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
  
  static int tcf_police_init(struct net *net, struct nlattr *nla,
                                struct nlattr *est, struct tc_action **a,
-                              int ovr, int bind, bool rtnl_held,
                                struct tcf_proto *tp, u32 flags,
                                struct netlink_ext_ack *extack)
  {
         int ret = 0, tcfp_result = TC_ACT_OK, err, size;
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_POLICE_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         struct tc_police *parm;
@@ -97,7 +97,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
                 }
                 ret = ACT_P_CREATED;
                 spin_lock_init(&(to_police(*a)->tcfp_lock));
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                 tcf_idr_release(*a, bind);
                 return -EEXIST;
         }
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c

index 6a0c16e..230501e 100644 (file)
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -34,11 +34,12 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
  };
  
  static int tcf_sample_init(struct net *net, struct nlattr *nla,
-                          struct nlattr *est, struct tc_action **a, int ovr,
-                          int bind, bool rtnl_held, struct tcf_proto *tp,
+                          struct nlattr *est, struct tc_action **a,
+                          struct tcf_proto *tp,
                            u32 flags, struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, sample_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_SAMPLE_MAX + 1];
         struct psample_group *psample_group;
         u32 psample_group_num, rate, index;
@@ -75,7 +76,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
                         return ret;
                 }
                 ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                 tcf_idr_release(*a, bind);
                 return -EEXIST;
         }
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c

index 726cc95..cbbe186 100644 (file)
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -85,11 +85,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
  
  static int tcf_simp_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                          struct tcf_proto *tp, u32 flags,
                          struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, simp_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_DEF_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         struct tc_defact *parm;
@@ -147,7 +147,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
                 tcf_action_set_ctrlact(*a, parm->action, goto_ch);
                 ret = ACT_P_CREATED;
         } else {
-               if (!ovr) {
+               if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                         err = -EEXIST;
                         goto release_idr;
                 }
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c

index e5f3fb8..6054185 100644 (file)
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -96,11 +96,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
  
  static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                             struct nlattr *est, struct tc_action **a,
-                           int ovr, int bind, bool rtnl_held,
                             struct tcf_proto *tp, u32 act_flags,
                             struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+       bool bind = act_flags & TCA_ACT_FLAGS_BIND;
         struct tcf_skbedit_params *params_new;
         struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
@@ -186,7 +186,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                 ret = ACT_P_CREATED;
         } else {
                 d = to_skbedit(*a);
-               if (!ovr) {
+               if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
                         tcf_idr_release(*a, bind);
                         return -EEXIST;
                 }
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c

index 762ceec..ecb9ee6 100644 (file)
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -100,11 +100,12 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
  
  static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                            struct tcf_proto *tp, u32 flags,
                            struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+       bool ovr = flags & TCA_ACT_FLAGS_REPLACE;
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_SKBMOD_MAX + 1];
         struct tcf_skbmod_params *p, *p_old;
         struct tcf_chain *goto_ch = NULL;
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c

index 85c0d0d..d9cd174 100644 (file)
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -355,11 +355,11 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
  
  static int tunnel_key_init(struct net *net, struct nlattr *nla,
                            struct nlattr *est, struct tc_action **a,
-                          int ovr, int bind, bool rtnl_held,
                            struct tcf_proto *tp, u32 act_flags,
                            struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+       bool bind = act_flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
         struct tcf_tunnel_key_params *params_new;
         struct metadata_dst *metadata = NULL;
@@ -504,7 +504,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                 }
  
                 ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) {
                 NL_SET_ERR_MSG(extack, "TC IDR already exists");
                 ret = -EEXIST;
                 goto release_tun_meta;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c

index 71f2015..e4dc5a5 100644 (file)
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -114,11 +114,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
  
  static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                          struct nlattr *est, struct tc_action **a,
-                        int ovr, int bind, bool rtnl_held,
                          struct tcf_proto *tp, u32 flags,
                          struct netlink_ext_ack *extack)
  {
         struct tc_action_net *tn = net_generic(net, vlan_net_id);
+       bool bind = flags & TCA_ACT_FLAGS_BIND;
         struct nlattr *tb[TCA_VLAN_MAX + 1];
         struct tcf_chain *goto_ch = NULL;
         bool push_prio_exists = false;
@@ -223,7 +223,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
                 }
  
                 ret = ACT_P_CREATED;
-       } else if (!ovr) {
+       } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
                 tcf_idr_release(*a, bind);
                 return -EEXIST;
         }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c

index 7be5b9d..4a7043a 100644 (file)
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1949,6 +1949,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
         int err;
         int tp_created;
         bool rtnl_held = false;
+       u32 flags;
  
         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
                 return -EPERM;
@@ -1969,6 +1970,7 @@ replay:
         tp = NULL;
         cl = 0;
         block = NULL;
+       flags = 0;
  
         if (prio == 0) {
                 /* If no priority is provided by the user,
@@ -2112,9 +2114,12 @@ replay:
                 goto errout;
         }
  
+       if (!(n->nlmsg_flags & NLM_F_CREATE))
+               flags |= TCA_ACT_FLAGS_REPLACE;
+       if (!rtnl_held)
+               flags |= TCA_ACT_FLAGS_NO_RTNL;
         err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
-                             n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
-                             rtnl_held, extack);
+                             flags, extack);
         if (err == 0) {
                 tfilter_notify(net, skb, n, tp, block, q, parent, fh,
                                RTM_NEWTFILTER, false, rtnl_held);
@@ -3020,8 +3025,8 @@ void tcf_exts_destroy(struct tcf_exts *exts)
  EXPORT_SYMBOL(tcf_exts_destroy);
  
  int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
-                     struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
-                     bool rtnl_held, struct netlink_ext_ack *extack)
+                     struct nlattr *rate_tlv, struct tcf_exts *exts,
+                     u32 flags, struct netlink_ext_ack *extack)
  {
  #ifdef CONFIG_NET_CLS_ACT
         {
@@ -3032,13 +3037,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                 if (exts->police && tb[exts->police]) {
                         struct tc_action_ops *a_o;
  
-                       a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
+                       a_o = tc_action_load_ops(tb[exts->police], true,
+                                                !(flags & TCA_ACT_FLAGS_NO_RTNL),
+                                                extack);
                         if (IS_ERR(a_o))
                                 return PTR_ERR(a_o);
+                       flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
                         act = tcf_action_init_1(net, tp, tb[exts->police],
-                                               rate_tlv, "police", ovr,
-                                               TCA_ACT_BIND, a_o, init_res,
-                                               rtnl_held, extack);
+                                               rate_tlv, a_o, init_res, flags,
+                                               extack);
                         module_put(a_o->owner);
                         if (IS_ERR(act))
                                 return PTR_ERR(act);
@@ -3050,10 +3057,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
                 } else if (exts->action && tb[exts->action]) {
                         int err;
  
+                       flags |= TCA_ACT_FLAGS_BIND;
                         err = tcf_action_init(net, tp, tb[exts->action],
-                                             rate_tlv, NULL, ovr, TCA_ACT_BIND,
-                                             exts->actions, init_res,
-                                             &attr_size, rtnl_held, extack);
+                                             rate_tlv, exts->actions, init_res,
+                                             &attr_size, flags, extack);
                         if (err < 0)
                                 return err;
                         exts->nr_actions = err;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c

index f256a7c..8158fc9 100644 (file)
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -145,12 +145,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
  static int basic_set_parms(struct net *net, struct tcf_proto *tp,
                            struct basic_filter *f, unsigned long base,
                            struct nlattr **tb,
-                          struct nlattr *est, bool ovr,
+                          struct nlattr *est, u32 flags,
                            struct netlink_ext_ack *extack)
  {
         int err;
  
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
         if (err < 0)
                 return err;
  
@@ -169,8 +169,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
  
  static int basic_change(struct net *net, struct sk_buff *in_skb,
                         struct tcf_proto *tp, unsigned long base, u32 handle,
-                       struct nlattr **tca, void **arg, bool ovr,
-                       bool rtnl_held, struct netlink_ext_ack *extack)
+                       struct nlattr **tca, void **arg,
+                       u32 flags, struct netlink_ext_ack *extack)
  {
         int err;
         struct basic_head *head = rtnl_dereference(tp->root);
@@ -216,7 +216,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
                 goto errout;
         }
  
-       err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
+       err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags,
                               extack);
         if (err < 0) {
                 if (!fold)
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c

index fa739ef..3b472ba 100644 (file)
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -404,7 +404,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
  
  static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
                              struct cls_bpf_prog *prog, unsigned long base,
-                            struct nlattr **tb, struct nlattr *est, bool ovr,
+                            struct nlattr **tb, struct nlattr *est, u32 flags,
                              struct netlink_ext_ack *extack)
  {
         bool is_bpf, is_ebpf, have_exts = false;
@@ -416,7 +416,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
         if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
                 return -EINVAL;
  
-       ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+       ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
                                 extack);
         if (ret < 0)
                 return ret;
@@ -455,7 +455,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
  static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                           struct tcf_proto *tp, unsigned long base,
                           u32 handle, struct nlattr **tca,
-                         void **arg, bool ovr, bool rtnl_held,
+                         void **arg, u32 flags,
                           struct netlink_ext_ack *extack)
  {
         struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -500,7 +500,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                 goto errout;
         prog->handle = handle;
  
-       ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr,
+       ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
                                 extack);
         if (ret < 0)
                 goto errout_idr;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c

index fb88114..ed00001 100644 (file)
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -76,7 +76,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
  static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
                              struct tcf_proto *tp, unsigned long base,
                              u32 handle, struct nlattr **tca,
-                            void **arg, bool ovr, bool rtnl_held,
+                            void **arg, u32 flags,
                              struct netlink_ext_ack *extack)
  {
         struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -108,8 +108,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
         if (err < 0)
                 goto errout;
  
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
-                               true, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags,
+                               extack);
         if (err < 0)
                 goto errout;
  
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c

index 87398af..972303a 100644 (file)
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -387,7 +387,7 @@ static void flow_destroy_filter_work(struct work_struct *work)
  static int flow_change(struct net *net, struct sk_buff *in_skb,
                        struct tcf_proto *tp, unsigned long base,
                        u32 handle, struct nlattr **tca,
-                      void **arg, bool ovr, bool rtnl_held,
+                      void **arg, u32 flags,
                        struct netlink_ext_ack *extack)
  {
         struct flow_head *head = rtnl_dereference(tp->root);
@@ -442,8 +442,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
         if (err < 0)
                 goto err2;
  
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
-                               true, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags,
+                               extack);
         if (err < 0)
                 goto err2;
  
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c

index d7869a9..23b2125 100644 (file)
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1915,23 +1915,22 @@ errout_cleanup:
  static int fl_set_parms(struct net *net, struct tcf_proto *tp,
                         struct cls_fl_filter *f, struct fl_flow_mask *mask,
                         unsigned long base, struct nlattr **tb,
-                       struct nlattr *est, bool ovr,
-                       struct fl_flow_tmplt *tmplt, bool rtnl_held,
+                       struct nlattr *est,
+                       struct fl_flow_tmplt *tmplt, u32 flags,
                         struct netlink_ext_ack *extack)
  {
         int err;
  
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
-                               extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
         if (err < 0)
                 return err;
  
         if (tb[TCA_FLOWER_CLASSID]) {
                 f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
-               if (!rtnl_held)
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
                         rtnl_lock();
                 tcf_bind_filter(tp, &f->res, base);
-               if (!rtnl_held)
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
                         rtnl_unlock();
         }
  
@@ -1975,10 +1974,11 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
  static int fl_change(struct net *net, struct sk_buff *in_skb,
                      struct tcf_proto *tp, unsigned long base,
                      u32 handle, struct nlattr **tca,
-                    void **arg, bool ovr, bool rtnl_held,
+                    void **arg, u32 flags,
                      struct netlink_ext_ack *extack)
  {
         struct cls_fl_head *head = fl_head_dereference(tp);
+       bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
         struct cls_fl_filter *fold = *arg;
         struct cls_fl_filter *fnew;
         struct fl_flow_mask *mask;
@@ -2034,8 +2034,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
                 }
         }
  
-       err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
-                          tp->chain->tmplt_priv, rtnl_held, extack);
+       err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
+                          tp->chain->tmplt_priv, flags, extack);
         if (err)
                 goto errout;
  
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c

index ec94529..8654b0c 100644 (file)
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -198,15 +198,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = {
  
  static int fw_set_parms(struct net *net, struct tcf_proto *tp,
                         struct fw_filter *f, struct nlattr **tb,
-                       struct nlattr **tca, unsigned long base, bool ovr,
+                       struct nlattr **tca, unsigned long base, u32 flags,
                         struct netlink_ext_ack *extack)
  {
         struct fw_head *head = rtnl_dereference(tp->root);
         u32 mask;
         int err;
  
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
-                               true, extack);
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags,
+                               extack);
         if (err < 0)
                 return err;
  
@@ -237,8 +237,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
  static int fw_change(struct net *net, struct sk_buff *in_skb,
                      struct tcf_proto *tp, unsigned long base,
                      u32 handle, struct nlattr **tca, void **arg,
-                    bool ovr, bool rtnl_held,
-                    struct netlink_ext_ack *extack)
+                    u32 flags, struct netlink_ext_ack *extack)
  {
         struct fw_head *head = rtnl_dereference(tp->root);
         struct fw_filter *f = *arg;
@@ -277,7 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
                         return err;
                 }
  
-               err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack);
+               err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack);
                 if (err < 0) {
                         tcf_exts_destroy(&fnew->exts);
                         kfree(fnew);
@@ -326,7 +325,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
         f->id = handle;
         f->tp = tp;
  
-       err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack);
+       err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack);
         if (err < 0)
                 goto errout;
  
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c

index cafb844..24f0046 100644 (file)
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -163,13 +163,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
  static int mall_set_parms(struct net *net, struct tcf_proto *tp,
                           struct cls_mall_head *head,
                           unsigned long base, struct nlattr **tb,
-                         struct nlattr *est, bool ovr,
+                         struct nlattr *est, u32 flags,
                           struct netlink_ext_ack *extack)
  {
         int err;
  
-       err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
-                               extack);
+       err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack);
         if (err < 0)
                 return err;
  
@@ -183,13 +182,13 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
  static int mall_change(struct net *net, struct sk_buff *in_skb,
                        struct tcf_proto *tp, unsigned long base,
                        u32 handle, struct nlattr **tca,
-                      void **arg, bool ovr, bool rtnl_held,
+                      void **arg, u32 flags,
                        struct netlink_ext_ack *extack)
  {
         struct cls_mall_head *head = rtnl_dereference(tp->root);
         struct nlattr *tb[TCA_MATCHALL_MAX + 1];
         struct cls_mall_head *new;
-       u32 flags = 0;
+       u32 userflags = 0;
         int err;
  
         if (!tca[TCA_OPTIONS])
@@ -204,8 +203,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
                 return err;
  
         if (tb[TCA_MATCHALL_FLAGS]) {
-               flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
-               if (!tc_flags_valid(flags))
+               userflags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]);
+               if (!tc_flags_valid(userflags))
                         return -EINVAL;
         }
  
@@ -220,14 +219,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
         if (!handle)
                 handle = 1;
         new->handle = handle;
-       new->flags = flags;
+       new->flags = userflags;
         new->pf = alloc_percpu(struct tc_matchall_pcnt);
         if (!new->pf) {
                 err = -ENOMEM;
                 goto err_alloc_percpu;
         }
  
-       err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
+       err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags,
                              extack);
         if (err)
                 goto err_set_parms;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c

index 5efa3e7..a35ab8c 100644 (file)
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -382,7 +382,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
                             unsigned long base, struct route4_filter *f,
                             u32 handle, struct route4_head *head,
                             struct nlattr **tb, struct nlattr *est, int new,
-                           bool ovr, struct netlink_ext_ack *extack)
+                           u32 flags, struct netlink_ext_ack *extack)
  {
         u32 id = 0, to = 0, nhandle = 0x8000;
         struct route4_filter *fp;
@@ -390,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
         struct route4_bucket *b;
         int err;
  
-       err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack);
         if (err < 0)
                 return err;
  
@@ -464,8 +464,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
  
  static int route4_change(struct net *net, struct sk_buff *in_skb,
                          struct tcf_proto *tp, unsigned long base, u32 handle,
-                        struct nlattr **tca, void **arg, bool ovr,
-                        bool rtnl_held, struct netlink_ext_ack *extack)
+                        struct nlattr **tca, void **arg, u32 flags,
+                        struct netlink_ext_ack *extack)
  {
         struct route4_head *head = rtnl_dereference(tp->root);
         struct route4_filter __rcu **fp;
@@ -510,7 +510,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
         }
  
         err = route4_set_parms(net, tp, base, f, handle, head, tb,
-                              tca[TCA_RATE], new, ovr, extack);
+                              tca[TCA_RATE], new, flags, extack);
         if (err < 0)
                 goto errout;
  
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h

index 27a4b6d..5cd9d6b 100644 (file)
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -470,9 +470,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
  
  static int rsvp_change(struct net *net, struct sk_buff *in_skb,
                        struct tcf_proto *tp, unsigned long base,
-                      u32 handle,
-                      struct nlattr **tca,
-                      void **arg, bool ovr, bool rtnl_held,
+                      u32 handle, struct nlattr **tca,
+                      void **arg, u32 flags,
                        struct netlink_ext_ack *extack)
  {
         struct rsvp_head *data = rtnl_dereference(tp->root);
@@ -499,7 +498,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
         err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
         if (err < 0)
                 return err;
-       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+       err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags,
                                 extack);
         if (err < 0)
                 goto errout2;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c

index e9a8a2c..742c7d4 100644 (file)
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -330,7 +330,7 @@ static int
  tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
                   u32 handle, struct tcindex_data *p,
                   struct tcindex_filter_result *r, struct nlattr **tb,
-                 struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
+                 struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
  {
         struct tcindex_filter_result new_filter_result, *old_r = r;
         struct tcindex_data *cp = NULL, *oldp;
@@ -342,7 +342,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
         err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
         if (err < 0)
                 return err;
-       err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
         if (err < 0)
                 goto errout;
  
@@ -529,8 +529,8 @@ errout:
  static int
  tcindex_change(struct net *net, struct sk_buff *in_skb,
                struct tcf_proto *tp, unsigned long base, u32 handle,
-              struct nlattr **tca, void **arg, bool ovr,
-              bool rtnl_held, struct netlink_ext_ack *extack)
+              struct nlattr **tca, void **arg, u32 flags,
+              struct netlink_ext_ack *extack)
  {
         struct nlattr *opt = tca[TCA_OPTIONS];
         struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -551,7 +551,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
                 return err;
  
         return tcindex_set_parms(net, tp, base, handle, p, r, tb,
-                                tca[TCA_RATE], ovr, extack);
+                                tca[TCA_RATE], flags, extack);
  }
  
  static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c

index 6e1abe8..4272814 100644 (file)
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -709,12 +709,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
  static int u32_set_parms(struct net *net, struct tcf_proto *tp,
                          unsigned long base,
                          struct tc_u_knode *n, struct nlattr **tb,
-                        struct nlattr *est, bool ovr,
+                        struct nlattr *est, u32 flags,
                          struct netlink_ext_ack *extack)
  {
         int err;
  
-       err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
+       err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack);
         if (err < 0)
                 return err;
  
@@ -840,7 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
  
  static int u32_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base, u32 handle,
-                     struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
+                     struct nlattr **tca, void **arg, u32 flags,
                       struct netlink_ext_ack *extack)
  {
         struct tc_u_common *tp_c = tp->data;
@@ -849,7 +849,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
         struct tc_u32_sel *s;
         struct nlattr *opt = tca[TCA_OPTIONS];
         struct nlattr *tb[TCA_U32_MAX + 1];
-       u32 htid, flags = 0;
+       u32 htid, userflags = 0;
         size_t sel_size;
         int err;
  
@@ -868,8 +868,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                 return err;
  
         if (tb[TCA_U32_FLAGS]) {
-               flags = nla_get_u32(tb[TCA_U32_FLAGS]);
-               if (!tc_flags_valid(flags)) {
+               userflags = nla_get_u32(tb[TCA_U32_FLAGS]);
+               if (!tc_flags_valid(userflags)) {
                         NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags");
                         return -EINVAL;
                 }
@@ -884,7 +884,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                         return -EINVAL;
                 }
  
-               if ((n->flags ^ flags) &
+               if ((n->flags ^ userflags) &
                     ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) {
                         NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags");
                         return -EINVAL;
@@ -895,7 +895,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                         return -ENOMEM;
  
                 err = u32_set_parms(net, tp, base, new, tb,
-                                   tca[TCA_RATE], ovr, extack);
+                                   tca[TCA_RATE], flags, extack);
  
                 if (err) {
                         u32_destroy_key(new, false);
@@ -955,9 +955,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                 ht->handle = handle;
                 ht->prio = tp->prio;
                 idr_init(&ht->handle_idr);
-               ht->flags = flags;
+               ht->flags = userflags;
  
-               err = u32_replace_hw_hnode(tp, ht, flags, extack);
+               err = u32_replace_hw_hnode(tp, ht, userflags, extack);
                 if (err) {
                         idr_remove(&tp_c->handle_idr, handle);
                         kfree(ht);
@@ -1038,7 +1038,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
         RCU_INIT_POINTER(n->ht_up, ht);
         n->handle = handle;
         n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
-       n->flags = flags;
+       n->flags = userflags;
  
         err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
         if (err < 0)
@@ -1060,7 +1060,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
         }
  #endif
  
-       err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr,
+       err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags,
                             extack);
         if (err == 0) {
                 struct tc_u_knode __rcu **ins;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

index d9ac60f..a8dd06c 100644 (file)
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -913,7 +913,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
  
         /* seqlock has the same scope of busylock, for NOLOCK qdisc */
         spin_lock_init(&sch->seqlock);
-       lockdep_set_class(&sch->busylock,
+       lockdep_set_class(&sch->seqlock,
                           dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
  
         seqcount_init(&sch->running);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c

index 07b30d0..9c79374 100644 (file)
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1739,8 +1739,6 @@ static void taprio_attach(struct Qdisc *sch)
                 if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
                         qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
                         old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
-                       if (ntx < dev->real_num_tx_queues)
-                               qdisc_hash_add(qdisc, false);
                 } else {
                         old = dev_graft_qdisc(qdisc->dev_queue, sch);
                         qdisc_refcount_inc(sch);
diff --git a/net/sctp/auth.c b/net/sctp/auth.c

index fe74c5f..db6b737 100644 (file)
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -857,14 +857,18 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
         memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
         cur_key->key = key;
  
-       if (replace) {
-               list_del_init(&shkey->key_list);
-               sctp_auth_shkey_release(shkey);
-               if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
-                       sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+       if (!replace) {
+               list_add(&cur_key->key_list, sh_keys);
+               return 0;
         }
+
+       list_del_init(&shkey->key_list);
+       sctp_auth_shkey_release(shkey);
         list_add(&cur_key->key_list, sh_keys);
  
+       if (asoc && asoc->active_key_id == auth_key->sca_keynumber)
+               sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+
         return 0;
  }
  
diff --git a/net/sctp/input.c b/net/sctp/input.c

index eb3c2a3..5ef86fd 100644 (file)
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -1203,7 +1203,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
         if (unlikely(!af))
                 return NULL;
  
-       if (af->from_addr_param(&paddr, param, peer_port, 0))
+       if (!af->from_addr_param(&paddr, param, peer_port, 0))
                 return NULL;
  
         return __sctp_lookup_association(net, laddr, &paddr, transportp);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c

index e48dd90..470dbdc 100644 (file)
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -100,8 +100,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
                 list_for_each_entry_safe(addr, temp,
                                         &net->sctp.local_addr_list, list) {
                         if (addr->a.sa.sa_family == AF_INET6 &&
-                                       ipv6_addr_equal(&addr->a.v6.sin6_addr,
-                                               &ifa->addr)) {
+                           ipv6_addr_equal(&addr->a.v6.sin6_addr,
+                                           &ifa->addr) &&
+                           addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) {
                                 sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL);
                                 found = 1;
                                 addr->valid = 0;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c

index 09a8f23..32df65f 100644 (file)
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1109,12 +1109,12 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net,
         if (!sctp_transport_pl_enabled(transport))
                 return SCTP_DISPOSITION_CONSUME;
  
-       sctp_transport_pl_send(transport);
-
-       reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
-       if (!reply)
-               return SCTP_DISPOSITION_NOMEM;
-       sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+       if (sctp_transport_pl_send(transport)) {
+               reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size);
+               if (!reply)
+                       return SCTP_DISPOSITION_NOMEM;
+               sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply));
+       }
         sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE,
                         SCTP_TRANSPORT(transport));
  
@@ -1274,8 +1274,7 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net,
                     !sctp_transport_pl_enabled(link))
                         return SCTP_DISPOSITION_DISCARD;
  
-               sctp_transport_pl_recv(link);
-               if (link->pl.state == SCTP_PL_COMPLETE)
+               if (sctp_transport_pl_recv(link))
                         return SCTP_DISPOSITION_CONSUME;
  
                 return sctp_sf_send_probe(net, ep, asoc, type, link, commands);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c

index 397a624..a3d3ca6 100644 (file)
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -258,16 +258,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
         sctp_transport_pl_update(transport);
  }
  
-void sctp_transport_pl_send(struct sctp_transport *t)
+bool sctp_transport_pl_send(struct sctp_transport *t)
  {
-       pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
-                __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
-
-       if (t->pl.probe_count < SCTP_MAX_PROBES) {
-               t->pl.probe_count++;
-               return;
-       }
+       if (t->pl.probe_count < SCTP_MAX_PROBES)
+               goto out;
  
+       t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
+       t->pl.probe_count = 0;
         if (t->pl.state == SCTP_PL_BASE) {
                 if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */
                         t->pl.state = SCTP_PL_ERROR; /* Base -> Error */
@@ -299,14 +296,27 @@ void sctp_transport_pl_send(struct sctp_transport *t)
                         sctp_assoc_sync_pmtu(t->asoc);
                 }
         }
-       t->pl.probe_count = 1;
+
+out:
+       if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 &&
+           !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) {
+               t->pl.raise_count++;
+               return false;
+       }
+
+       pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
+                __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
+
+       t->pl.probe_count++;
+       return true;
  }
  
-void sctp_transport_pl_recv(struct sctp_transport *t)
+bool sctp_transport_pl_recv(struct sctp_transport *t)
  {
         pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n",
                  __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high);
  
+       t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks;
         t->pl.pmtu = t->pl.probe_size;
         t->pl.probe_count = 0;
         if (t->pl.state == SCTP_PL_BASE) {
@@ -323,7 +333,7 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
                 if (!t->pl.probe_high) {
                         t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP,
                                                SCTP_MAX_PLPMTU);
-                       return;
+                       return false;
                 }
                 t->pl.probe_size += SCTP_PL_MIN_STEP;
                 if (t->pl.probe_size >= t->pl.probe_high) {
@@ -335,14 +345,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t)
                         t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t);
                         sctp_assoc_sync_pmtu(t->asoc);
                 }
-       } else if (t->pl.state == SCTP_PL_COMPLETE) {
-               t->pl.raise_count++;
-               if (t->pl.raise_count == 30) {
-                       /* Raise probe_size again after 30 * interval in Search Complete */
-                       t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
-                       t->pl.probe_size += SCTP_PL_MIN_STEP;
-               }
+       } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) {
+               /* Raise probe_size again after 30 * interval in Search Complete */
+               t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */
+               t->pl.probe_size += SCTP_PL_MIN_STEP;
         }
+
+       return t->pl.state == SCTP_PL_COMPLETE;
  }
  
  static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu)
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c

index cd0d7c9..edc8962 100644 (file)
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1752,21 +1752,30 @@ out:
         return rc;
  }
  
-/* convert the RMB size into the compressed notation - minimum 16K.
+#define SMCD_DMBE_SIZES                6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCR_RMBE_SIZES                5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
+
+/* convert the RMB size into the compressed notation (minimum 16K, see
+ * SMCD/R_DMBE_SIZES.
   * In contrast to plain ilog2, this rounds towards the next power of 2,
   * so the socket application gets at least its desired sndbuf / rcvbuf size.
   */
-static u8 smc_compress_bufsize(int size)
+static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
  {
+       const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
         u8 compressed;
  
         if (size <= SMC_BUF_MIN_SIZE)
                 return 0;
  
-       size = (size - 1) >> 14;
-       compressed = ilog2(size) + 1;
-       if (compressed >= SMC_RMBE_SIZES)
-               compressed = SMC_RMBE_SIZES - 1;
+       size = (size - 1) >> 14;  /* convert to 16K multiple */
+       compressed = min_t(u8, ilog2(size) + 1,
+                          is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
+
+       if (!is_smcd && is_rmb)
+               /* RMBs are backed by & limited to max size of scatterlists */
+               compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
+
         return compressed;
  }
  
@@ -1982,17 +1991,12 @@ out:
         return rc;
  }
  
-#define SMCD_DMBE_SIZES                6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
-
  static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
                                                 bool is_dmb, int bufsize)
  {
         struct smc_buf_desc *buf_desc;
         int rc;
  
-       if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
-               return ERR_PTR(-EAGAIN);
-
         /* try to alloc a new DMB */
         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
         if (!buf_desc)
@@ -2041,9 +2045,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                 /* use socket send buffer size (w/o overhead) as start value */
                 sk_buf_size = smc->sk.sk_sndbuf / 2;
  
-       for (bufsize_short = smc_compress_bufsize(sk_buf_size);
+       for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
              bufsize_short >= 0; bufsize_short--) {
-
                 if (is_rmb) {
                         lock = &lgr->rmbs_lock;
                         buf_list = &lgr->rmbs[bufsize_short];
@@ -2052,8 +2055,6 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                         buf_list = &lgr->sndbufs[bufsize_short];
                 }
                 bufsize = smc_uncompress_bufsize(bufsize_short);
-               if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
-                       continue;
  
                 /* check for reusable slot in the link group */
                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c

index 7d7ba03..a884534 100644 (file)
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -753,8 +753,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event)
                         if (!libdev->ops.get_netdev)
                                 continue;
                         lndev = libdev->ops.get_netdev(libdev, i + 1);
-                       if (lndev)
-                               dev_put(lndev);
+                       dev_put(lndev);
                         if (lndev != ndev)
                                 continue;
                         if (event == NETDEV_REGISTER)
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c

index 6f6d33e..4a964e9 100644 (file)
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -394,8 +394,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
         return 0;
  
  out_put:
-       if (ndev)
-               dev_put(ndev);
+       dev_put(ndev);
         return rc;
  }
  
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c

index 0ae3478..0b2c18e 100644 (file)
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -809,3 +809,51 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
         return err;
  }
  EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
+
+int switchdev_bridge_port_offload(struct net_device *brport_dev,
+                                 struct net_device *dev, const void *ctx,
+                                 struct notifier_block *atomic_nb,
+                                 struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
+                                 struct netlink_ext_ack *extack)
+{
+       struct switchdev_notifier_brport_info brport_info = {
+               .brport = {
+                       .dev = dev,
+                       .ctx = ctx,
+                       .atomic_nb = atomic_nb,
+                       .blocking_nb = blocking_nb,
+                       .tx_fwd_offload = tx_fwd_offload,
+               },
+       };
+       int err;
+
+       ASSERT_RTNL();
+
+       err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED,
+                                               brport_dev, &brport_info.info,
+                                               extack);
+       return notifier_to_errno(err);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload);
+
+void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
+                                    const void *ctx,
+                                    struct notifier_block *atomic_nb,
+                                    struct notifier_block *blocking_nb)
+{
+       struct switchdev_notifier_brport_info brport_info = {
+               .brport = {
+                       .ctx = ctx,
+                       .atomic_nb = atomic_nb,
+                       .blocking_nb = blocking_nb,
+               },
+       };
+
+       ASSERT_RTNL();
+
+       call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED,
+                                         brport_dev, &brport_info.info,
+                                         NULL);
+}
+EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload);
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c

index e5c43d4..c9391d3 100644 (file)
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -898,16 +898,10 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
         if (unlikely(!aead))
                 return -ENOKEY;
  
-       /* Cow skb data if needed */
-       if (likely(!skb_cloned(skb) &&
-                  (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) {
-               nsg = 1 + skb_shinfo(skb)->nr_frags;
-       } else {
-               nsg = skb_cow_data(skb, 0, &unused);
-               if (unlikely(nsg < 0)) {
-                       pr_err("RX: skb_cow_data() returned %d\n", nsg);
-                       return nsg;
-               }
+       nsg = skb_cow_data(skb, 0, &unused);
+       if (unlikely(nsg < 0)) {
+               pr_err("RX: skb_cow_data() returned %d\n", nsg);
+               return nsg;
         }
  
         /* Allocate memory for the AEAD operation */
diff --git a/net/tipc/socket.c b/net/tipc/socket.c

index b0dd183..b15b2b1 100644 (file)
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -158,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk);
  static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
  static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
  static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
+static int tipc_wait_for_connect(struct socket *sock, long *timeo_p);
  
  static const struct proto_ops packet_ops;
  static const struct proto_ops stream_ops;
@@ -1515,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
                 rc = 0;
         }
  
-       if (unlikely(syn && !rc))
+       if (unlikely(syn && !rc)) {
                 tipc_set_sk_state(sk, TIPC_CONNECTING);
+               if (timeout) {
+                       timeout = msecs_to_jiffies(timeout);
+                       tipc_wait_for_connect(sock, &timeout);
+               }
+       }
  
         return rc ? rc : dlen;
  }
@@ -1564,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
                 return -EMSGSIZE;
  
         /* Handle implicit connection setup */
-       if (unlikely(dest)) {
+       if (unlikely(dest && sk->sk_state == TIPC_OPEN)) {
                 rc = __tipc_sendmsg(sock, m, dlen);
                 if (dlen && dlen == rc) {
                         tsk->peer_caps = tipc_node_get_capabilities(net, dnode);
@@ -2666,7 +2672,7 @@ static int tipc_listen(struct socket *sock, int len)
  static int tipc_wait_for_accept(struct socket *sock, long timeo)
  {
         struct sock *sk = sock->sk;
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
         int err;
  
         /* True wake-one mechanism for incoming connections: only
@@ -2675,12 +2681,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
          * anymore, the common case will execute the loop only once.
         */
         for (;;) {
-               prepare_to_wait_exclusive(sk_sleep(sk), &wait,
-                                         TASK_INTERRUPTIBLE);
                 if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+                       add_wait_queue(sk_sleep(sk), &wait);
                         release_sock(sk);
-                       timeo = schedule_timeout(timeo);
+                       timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
                         lock_sock(sk);
+                       remove_wait_queue(sk_sleep(sk), &wait);
                 }
                 err = 0;
                 if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -2692,7 +2698,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
                 if (signal_pending(current))
                         break;
         }
-       finish_wait(sk_sleep(sk), &wait);
         return err;
  }
  
@@ -2709,9 +2714,10 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
                        bool kern)
  {
         struct sock *new_sk, *sk = sock->sk;
-       struct sk_buff *buf;
         struct tipc_sock *new_tsock;
+       struct msghdr m = {NULL,};
         struct tipc_msg *msg;
+       struct sk_buff *buf;
         long timeo;
         int res;
  
@@ -2757,19 +2763,17 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
         }
  
         /*
-        * Respond to 'SYN-' by discarding it & returning 'ACK'-.
-        * Respond to 'SYN+' by queuing it on new socket.
+        * Respond to 'SYN-' by discarding it & returning 'ACK'.
+        * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'.
          */
         if (!msg_data_sz(msg)) {
-               struct msghdr m = {NULL,};
-
                 tsk_advance_rx_queue(sk);
-               __tipc_sendstream(new_sock, &m, 0);
         } else {
                 __skb_dequeue(&sk->sk_receive_queue);
                 __skb_queue_head(&new_sk->sk_receive_queue, buf);
                 skb_set_owner_r(buf, new_sk);
         }
+       __tipc_sendstream(new_sock, &m, 0);
         release_sock(new_sk);
  exit:
         release_sock(sk);
diff --git a/net/unix/Kconfig b/net/unix/Kconfig

index b6c4282..b7f8112 100644 (file)
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -25,6 +25,11 @@ config UNIX_SCM
         depends on UNIX
         default y
  
+config AF_UNIX_OOB
+       bool
+       depends on UNIX
+       default y
+
  config UNIX_DIAG
         tristate "UNIX: socket monitoring interface"
         depends on UNIX
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

index 8992767..ec02e70 100644 (file)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -503,6 +503,12 @@ static void unix_sock_destructor(struct sock *sk)
  
         skb_queue_purge(&sk->sk_receive_queue);
  
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+       if (u->oob_skb) {
+               kfree_skb(u->oob_skb);
+               u->oob_skb = NULL;
+       }
+#endif
         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
         WARN_ON(!sk_unhashed(sk));
         WARN_ON(sk->sk_socket);
@@ -1543,6 +1549,53 @@ out:
         return err;
  }
  
+static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+       scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+
+       /*
+        * Garbage collection of unix sockets starts by selecting a set of
+        * candidate sockets which have reference only from being in flight
+        * (total_refs == inflight_refs).  This condition is checked once during
+        * the candidate collection phase, and candidates are marked as such, so
+        * that non-candidates can later be ignored.  While inflight_refs is
+        * protected by unix_gc_lock, total_refs (file count) is not, hence this
+        * is an instantaneous decision.
+        *
+        * Once a candidate, however, the socket must not be reinstalled into a
+        * file descriptor while the garbage collection is in progress.
+        *
+        * If the above conditions are met, then the directed graph of
+        * candidates (*) does not change while unix_gc_lock is held.
+        *
+        * Any operations that changes the file count through file descriptors
+        * (dup, close, sendmsg) does not change the graph since candidates are
+        * not installed in fds.
+        *
+        * Dequeing a candidate via recvmsg would install it into an fd, but
+        * that takes unix_gc_lock to decrement the inflight count, so it's
+        * serialized with garbage collection.
+        *
+        * MSG_PEEK is special in that it does not change the inflight count,
+        * yet does install the socket into an fd.  The following lock/unlock
+        * pair is to ensure serialization with garbage collection.  It must be
+        * done between incrementing the file count and installing the file into
+        * an fd.
+        *
+        * If garbage collection starts after the barrier provided by the
+        * lock/unlock, then it will see the elevated refcount and not mark this
+        * as a candidate.  If a garbage collection is already in progress
+        * before the file count was incremented, then the lock/unlock pair will
+        * ensure that garbage collection is finished before progressing to
+        * installing the fd.
+        *
+        * (*) A -> B where B is on the queue of A or B is on the queue of C
+        * which is on the queue of listening socket A.
+        */
+       spin_lock(&unix_gc_lock);
+       spin_unlock(&unix_gc_lock);
+}
+
  static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
  {
         int err = 0;
@@ -1842,6 +1895,46 @@ out:
   */
  #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
  
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
+{
+       struct unix_sock *ousk = unix_sk(other);
+       struct sk_buff *skb;
+       int err = 0;
+
+       skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
+
+       if (!skb)
+               return err;
+
+       skb_put(skb, 1);
+       skb->len = 1;
+       err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
+
+       if (err) {
+               kfree_skb(skb);
+               return err;
+       }
+
+       unix_state_lock(other);
+       maybe_add_creds(skb, sock, other);
+       skb_get(skb);
+
+       if (ousk->oob_skb)
+               kfree_skb(ousk->oob_skb);
+
+       ousk->oob_skb = skb;
+
+       scm_stat_add(other, skb);
+       skb_queue_tail(&other->sk_receive_queue, skb);
+       sk_send_sigurg(other);
+       unix_state_unlock(other);
+       other->sk_data_ready(other);
+
+       return err;
+}
+#endif
+
  static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                                size_t len)
  {
@@ -1860,8 +1953,14 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                 return err;
  
         err = -EOPNOTSUPP;
-       if (msg->msg_flags&MSG_OOB)
-               goto out_err;
+       if (msg->msg_flags & MSG_OOB) {
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+               if (len)
+                       len--;
+               else
+#endif
+                       goto out_err;
+       }
  
         if (msg->msg_namelen) {
                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
@@ -1926,6 +2025,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
                 sent += size;
         }
  
+#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
+       if (msg->msg_flags & MSG_OOB) {
+               err = queue_oob(sock, msg, other);
+               if (err)
+                       goto out_err;
+               sent++;
+       }
+#endif
+
         scm_destroy(&scm);
  
         return sent;
@@ -2192,7 +2300,7 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
                 sk_peek_offset_fwd(sk, size);
  
                 if (UNIXCB(skb).fp)
-                       scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+                       unix_peek_fds(&scm, skb);
         }
         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
  
@@ -2311,6 +2419,59 @@ struct unix_stream_read_state {
         unsigned int splice_flags;
  };
  
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+static int unix_stream_recv_urg(struct unix_stream_read_state *state)
+{
+       struct socket *sock = state->socket;
+       struct sock *sk = sock->sk;
+       struct unix_sock *u = unix_sk(sk);
+       int chunk = 1;
+
+       if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb)
+               return -EINVAL;
+
+       chunk = state->recv_actor(u->oob_skb, 0, chunk, state);
+       if (chunk < 0)
+               return -EFAULT;
+
+       if (!(state->flags & MSG_PEEK)) {
+               UNIXCB(u->oob_skb).consumed += 1;
+               kfree_skb(u->oob_skb);
+               u->oob_skb = NULL;
+       }
+       state->msg->msg_flags |= MSG_OOB;
+       return 1;
+}
+
+static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
+                                 int flags, int copied)
+{
+       struct unix_sock *u = unix_sk(sk);
+
+       if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
+               skb_unlink(skb, &sk->sk_receive_queue);
+               consume_skb(skb);
+               skb = NULL;
+       } else {
+               if (skb == u->oob_skb) {
+                       if (copied) {
+                               skb = NULL;
+                       } else if (sock_flag(sk, SOCK_URGINLINE)) {
+                               if (!(flags & MSG_PEEK)) {
+                                       u->oob_skb = NULL;
+                                       consume_skb(skb);
+                               }
+                       } else if (!(flags & MSG_PEEK)) {
+                               skb_unlink(skb, &sk->sk_receive_queue);
+                               consume_skb(skb);
+                               skb = skb_peek(&sk->sk_receive_queue);
+                       }
+               }
+       }
+       return skb;
+}
+#endif
+
  static int unix_stream_read_generic(struct unix_stream_read_state *state,
                                     bool freezable)
  {
@@ -2336,6 +2497,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
  
         if (unlikely(flags & MSG_OOB)) {
                 err = -EOPNOTSUPP;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+               mutex_lock(&u->iolock);
+               unix_state_lock(sk);
+
+               err = unix_stream_recv_urg(state);
+
+               unix_state_unlock(sk);
+               mutex_unlock(&u->iolock);
+#endif
                 goto out;
         }
  
@@ -2364,6 +2534,18 @@ redo:
                 }
                 last = skb = skb_peek(&sk->sk_receive_queue);
                 last_len = last ? last->len : 0;
+
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+               if (skb) {
+                       skb = manage_oob(skb, sk, flags, copied);
+                       if (!skb) {
+                               unix_state_unlock(sk);
+                               if (copied)
+                                       break;
+                               goto redo;
+                       }
+               }
+#endif
  again:
                 if (skb == NULL) {
                         if (copied >= target)
@@ -2482,7 +2664,7 @@ unlock:
                         /* It is questionable, see note in unix_dgram_recvmsg.
                          */
                         if (UNIXCB(skb).fp)
-                               scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+                               unix_peek_fds(&scm, skb);
  
                         sk_peek_offset_fwd(sk, chunk);
  
@@ -2699,6 +2881,20 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
         case SIOCUNIXFILE:
                 err = unix_open_file(sk);
                 break;
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+       case SIOCATMARK:
+               {
+                       struct sk_buff *skb;
+                       struct unix_sock *u = unix_sk(sk);
+                       int answ = 0;
+
+                       skb = skb_peek(&sk->sk_receive_queue);
+                       if (skb && skb == u->oob_skb)
+                               answ = 1;
+                       err = put_user(answ, (int __user *)arg);
+               }
+               break;
+#endif
         default:
                 err = -ENOIOCTLCMD;
                 break;
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c

index db0cda2..20f5357 100644 (file)
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -44,7 +44,7 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
  {
         struct unix_sock *u = unix_sk(sk);
         struct sk_psock *psock;
-       int copied, ret;
+       int copied;
  
         psock = sk_psock_get(sk);
         if (unlikely(!psock))
@@ -53,8 +53,9 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
         mutex_lock(&u->iolock);
         if (!skb_queue_empty(&sk->sk_receive_queue) &&
             sk_psock_queue_empty(psock)) {
-               ret = __unix_dgram_recvmsg(sk, msg, len, flags);
-               goto out;
+               mutex_unlock(&u->iolock);
+               sk_psock_put(sk, psock);
+               return __unix_dgram_recvmsg(sk, msg, len, flags);
         }
  
  msg_bytes_ready:
@@ -68,16 +69,15 @@ msg_bytes_ready:
                 if (data) {
                         if (!sk_psock_queue_empty(psock))
                                 goto msg_bytes_ready;
-                       ret = __unix_dgram_recvmsg(sk, msg, len, flags);
-                       goto out;
+                       mutex_unlock(&u->iolock);
+                       sk_psock_put(sk, psock);
+                       return __unix_dgram_recvmsg(sk, msg, len, flags);
                 }
                 copied = -EAGAIN;
         }
-       ret = copied;
-out:
         mutex_unlock(&u->iolock);
         sk_psock_put(sk, psock);
-       return ret;
+       return copied;
  }
  
  static struct proto *unix_prot_saved __read_mostly;
@@ -105,6 +105,9 @@ static void unix_bpf_check_needs_rebuild(struct proto *ops)
  
  int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
  {
+       if (sk->sk_type != SOCK_DGRAM)
+               return -EOPNOTSUPP;
+
         if (restore) {
                 sk->sk_write_space = psock->saved_write_space;
                 WRITE_ONCE(sk->sk_prot, psock->sk_proto);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c

index 169ba8b..081e7ae 100644 (file)
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1079,6 +1079,9 @@ virtio_transport_recv_connected(struct sock *sk,
                 virtio_transport_recv_enqueue(vsk, pkt);
                 sk->sk_data_ready(sk);
                 return err;
+       case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
+               virtio_transport_send_credit_update(vsk);
+               break;
         case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
                 sk->sk_write_space(sk);
                 break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c

index 50eb405..dceed5b 100644 (file)
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2351,7 +2351,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
                         goto nla_put_failure;
  
                 for (band = state->band_start;
-                    band < NUM_NL80211_BANDS; band++) {
+                    band < (state->split ?
+                               NUM_NL80211_BANDS :
+                               NL80211_BAND_60GHZ + 1);
+                    band++) {
                         struct ieee80211_supported_band *sband;
  
                         /* omit higher bands for ancient software */
@@ -6524,8 +6527,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
         err = rdev_change_station(rdev, dev, mac_addr, &params);
  
   out_put_vlan:
-       if (params.vlan)
-               dev_put(params.vlan);
+       dev_put(params.vlan);
  
         return err;
  }
@@ -6760,8 +6762,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
  
         err = rdev_add_station(rdev, dev, mac_addr, &params);
  
-       if (params.vlan)
-               dev_put(params.vlan);
+       dev_put(params.vlan);
         return err;
  }
  
@@ -8486,8 +8487,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
                 goto out_free;
  
         nl80211_send_scan_start(rdev, wdev);
-       if (wdev->netdev)
-               dev_hold(wdev->netdev);
+       dev_hold(wdev->netdev);
  
         return 0;
  
@@ -14857,9 +14857,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
                         return -ENETDOWN;
                 }
  
-               if (dev)
-                       dev_hold(dev);
-
+               dev_hold(dev);
                 info->user_ptr[0] = rdev;
         }
  
@@ -14881,8 +14879,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
                 if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) {
                         struct wireless_dev *wdev = info->user_ptr[1];
  
-                       if (wdev->netdev)
-                               dev_put(wdev->netdev);
+                       dev_put(wdev->netdev);
                 } else {
                         dev_put(info->user_ptr[1]);
                 }
diff --git a/net/wireless/scan.c b/net/wireless/scan.c

index f03c7ac..11c68b1 100644 (file)
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -975,8 +975,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
         }
  #endif
  
-       if (wdev->netdev)
-               dev_put(wdev->netdev);
+       dev_put(wdev->netdev);
  
         kfree(rdev->int_scan_req);
         rdev->int_scan_req = NULL;
@@ -1754,16 +1753,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
                          * be grouped with this beacon for updates ...
                          */
                         if (!cfg80211_combine_bsses(rdev, new)) {
-                               kfree(new);
+                               bss_ref_put(rdev, new);
                                 goto drop;
                         }
                 }
  
                 if (rdev->bss_entries >= bss_entries_limit &&
                     !cfg80211_bss_expire_oldest(rdev)) {
-                       if (!list_empty(&new->hidden_list))
-                               list_del(&new->hidden_list);
-                       kfree(new);
+                       bss_ref_put(rdev, new);
                         goto drop;
                 }
  
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c

index a20aec9..2bf2693 100644 (file)
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -298,8 +298,16 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
         len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]);
  
         nla_for_each_attr(nla, attrs, len, remaining) {
-               int err = xfrm_xlate64_attr(dst, nla);
+               int err;
  
+               switch (type) {
+               case XFRM_MSG_NEWSPDINFO:
+                       err = xfrm_nla_cpy(dst, nla, nla_len(nla));
+                       break;
+               default:
+                       err = xfrm_xlate64_attr(dst, nla);
+                       break;
+               }
                 if (err)
                         return err;
         }
@@ -341,7 +349,8 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src
  
  /* Calculates len of translated 64-bit message. */
  static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
-                                           struct nlattr *attrs[XFRMA_MAX+1])
+                                           struct nlattr *attrs[XFRMA_MAX + 1],
+                                           int maxtype)
  {
         size_t len = nlmsg_len(src);
  
@@ -358,10 +367,20 @@ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src,
         case XFRM_MSG_POLEXPIRE:
                 len += 8;
                 break;
+       case XFRM_MSG_NEWSPDINFO:
+               /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+               return len;
         default:
                 break;
         }
  
+       /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please
+        * correct both 64=>32-bit and 32=>64-bit translators to copy
+        * new attributes.
+        */
+       if (WARN_ON_ONCE(maxtype))
+               return len;
+
         if (attrs[XFRMA_SA])
                 len += 4;
         if (attrs[XFRMA_POLICY])
@@ -440,7 +459,8 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
  
  static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
                         struct nlattr *attrs[XFRMA_MAX+1],
-                       size_t size, u8 type, struct netlink_ext_ack *extack)
+                       size_t size, u8 type, int maxtype,
+                       struct netlink_ext_ack *extack)
  {
         size_t pos;
         int i;
@@ -520,6 +540,25 @@ static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src,
         }
         pos = dst->nlmsg_len;
  
+       if (maxtype) {
+               /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */
+               WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO);
+
+               for (i = 1; i <= maxtype; i++) {
+                       int err;
+
+                       if (!attrs[i])
+                               continue;
+
+                       /* just copy - no need for translation */
+                       err = xfrm_attr_cpy32(dst, &pos, attrs[i], size,
+                                       nla_len(attrs[i]), nla_len(attrs[i]));
+                       if (err)
+                               return err;
+               }
+               return 0;
+       }
+
         for (i = 1; i < XFRMA_MAX + 1; i++) {
                 int err;
  
@@ -564,7 +603,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
         if (err < 0)
                 return ERR_PTR(err);
  
-       len = xfrm_user_rcv_calculate_len64(h32, attrs);
+       len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype);
         /* The message doesn't need translation */
         if (len == nlmsg_len(h32))
                 return NULL;
@@ -574,7 +613,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32,
         if (!h64)
                 return ERR_PTR(-ENOMEM);
  
-       err = xfrm_xlate32(h64, h32, attrs, len, type, extack);
+       err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack);
         if (err < 0) {
                 kvfree(h64);
                 return ERR_PTR(err);
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c

index 2e8afe0..cb40ff0 100644 (file)
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -241,7 +241,7 @@ static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms)
                         break;
         }
  
-       WARN_ON(!pos);
+       WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list));
  
         if (--pos->users)
                 return;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c

index 827d842..7f881f5 100644 (file)
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
                                                 __read_mostly;
  
  static struct kmem_cache *xfrm_dst_cache __ro_after_init;
-static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation;
  
  static struct rhashtable xfrm_policy_inexact_table;
  static const struct rhashtable_params xfrm_pol_inexact_params;
@@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
                 return;
  
         spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-       write_seqcount_begin(&xfrm_policy_hash_generation);
+       write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
  
         odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
                                 lockdep_is_held(&net->xfrm.xfrm_policy_lock));
@@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir)
         rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
         net->xfrm.policy_bydst[dir].hmask = nhashmask;
  
-       write_seqcount_end(&xfrm_policy_hash_generation);
+       write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
         spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  
         synchronize_rcu();
@@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
         } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
  
         spin_lock_bh(&net->xfrm.xfrm_policy_lock);
-       write_seqcount_begin(&xfrm_policy_hash_generation);
+       write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
  
         /* make sure that we can insert the indirect policies again before
          * we start with destructive action.
@@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
  
  out_unlock:
         __xfrm_policy_inexact_flush(net);
-       write_seqcount_end(&xfrm_policy_hash_generation);
+       write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation);
         spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  
         mutex_unlock(&hash_resize_mutex);
@@ -2091,15 +2090,12 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
         if (unlikely(!daddr || !saddr))
                 return NULL;
  
- retry:
-       sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
         rcu_read_lock();
-
-       chain = policy_hash_direct(net, daddr, saddr, family, dir);
-       if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
-               rcu_read_unlock();
-               goto retry;
-       }
+ retry:
+       do {
+               sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation);
+               chain = policy_hash_direct(net, daddr, saddr, family, dir);
+       } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence));
  
         ret = NULL;
         hlist_for_each_entry_rcu(pol, chain, bydst) {
@@ -2130,15 +2126,11 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
         }
  
  skip_inexact:
-       if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) {
-               rcu_read_unlock();
+       if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence))
                 goto retry;
-       }
  
-       if (ret && !xfrm_pol_hold_rcu(ret)) {
-               rcu_read_unlock();
+       if (ret && !xfrm_pol_hold_rcu(ret))
                 goto retry;
-       }
  fail:
         rcu_read_unlock();
  
@@ -4089,6 +4081,7 @@ static int __net_init xfrm_net_init(struct net *net)
         /* Initialize the per-net locks here */
         spin_lock_init(&net->xfrm.xfrm_state_lock);
         spin_lock_init(&net->xfrm.xfrm_policy_lock);
+       seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock);
         mutex_init(&net->xfrm.xfrm_cfg_mutex);
  
         rv = xfrm_statistics_init(net);
@@ -4133,7 +4126,6 @@ void __init xfrm_init(void)
  {
         register_pernet_subsys(&xfrm_net_ops);
         xfrm_dev_init();
-       seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex);
         xfrm_input_init();
  
  #ifdef CONFIG_XFRM_ESPINTCP
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c

index b47d613..7aff641 100644 (file)
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2811,6 +2811,16 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
  
         err = link->doit(skb, nlh, attrs);
  
+       /* We need to free skb allocated in xfrm_alloc_compat() before
+        * returning from this function, because consume_skb() won't take
+        * care of frag_list since netlink destructor sets
+        * sbk->head to NULL. (see netlink_skb_destructor())
+        */
+       if (skb_has_frag_list(skb)) {
+               kfree_skb(skb_shinfo(skb)->frag_list);
+               skb_shinfo(skb)->frag_list = NULL;
+       }
+
  err:
         kvfree(nlh64);
         return err;
diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore

index 0b9548e..fcba217 100644 (file)
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -45,11 +45,13 @@ xdp_monitor
  xdp_redirect
  xdp_redirect_cpu
  xdp_redirect_map
+xdp_redirect_map_multi
  xdp_router_ipv4
  xdp_rxq_info
  xdp_sample_pkts
  xdp_tx_iptunnel
  xdpsock
+xdpsock_ctrl_proc
  xsk_fwd
  testfile.img
  hbm_out.log
diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh

index e68b9ee..35db26f 100755 (executable)
--- a/samples/bpf/test_override_return.sh
+++ b/samples/bpf/test_override_return.sh
@@ -1,5 +1,6 @@
  #!/bin/bash
  
+rm -r tmpmnt
  rm -f testfile.img
  dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1
  DEVICE=$(losetup --show -f testfile.img)
diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c

index fdcd658..8be7ce1 100644 (file)
--- a/samples/bpf/tracex7_user.c
+++ b/samples/bpf/tracex7_user.c
@@ -14,6 +14,11 @@ int main(int argc, char **argv)
         int ret = 0;
         FILE *f;
  
+       if (!argv[1]) {
+               fprintf(stderr, "ERROR: Run with the btrfs device argument!\n");
+               return 0;
+       }
+
         snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
         obj = bpf_object__open_file(filename, NULL);
         if (libbpf_get_error(obj)) {
diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c

index 34b6439..f0c5d95 100644 (file)
--- a/samples/bpf/xdp1_kern.c
+++ b/samples/bpf/xdp1_kern.c
@@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx)
  
         h_proto = eth->h_proto;
  
+       /* Handle VLAN tagged packet */
         if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                 struct vlan_hdr *vhdr;
  
@@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx)
                         return rc;
                 h_proto = vhdr->h_vlan_encapsulated_proto;
         }
+       /* Handle double VLAN tagged packet */
         if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                 struct vlan_hdr *vhdr;
  
diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c

index c787f4b..d8a64ab 100644 (file)
--- a/samples/bpf/xdp2_kern.c
+++ b/samples/bpf/xdp2_kern.c
@@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx)
  
         h_proto = eth->h_proto;
  
+       /* Handle VLAN tagged packet */
         if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                 struct vlan_hdr *vhdr;
  
@@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx)
                         return rc;
                 h_proto = vhdr->h_vlan_encapsulated_proto;
         }
+       /* Handle double VLAN tagged packet */
         if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
                 struct vlan_hdr *vhdr;
  
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c

index d3ecdc1..9e225c9 100644 (file)
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -841,7 +841,7 @@ int main(int argc, char **argv)
         memset(cpu, 0, n_cpus * sizeof(int));
  
         /* Parse commands line args */
-       while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:",
+       while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n",
                                   long_options, &longindex)) != -1) {
                 switch (opt) {
                 case 'd':
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c

index 33d0bde..49d7a6a 100644 (file)
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1,12 +1,10 @@
  // SPDX-License-Identifier: GPL-2.0
  /* Copyright(c) 2017 - 2018 Intel Corporation. */
  
-#include <asm/barrier.h>
  #include <errno.h>
  #include <getopt.h>
  #include <libgen.h>
  #include <linux/bpf.h>
-#include <linux/compiler.h>
  #include <linux/if_link.h>
  #include <linux/if_xdp.h>
  #include <linux/if_ether.h>
@@ -653,17 +651,15 @@ out:
         return result;
  }
  
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
  /*
   *     This is a version of ip_compute_csum() optimized for IP headers,
   *     which always checksum on 4 octet boundaries.
   *     This function code has been taken from
   *     Linux kernel lib/checksum.c
   */
-__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
  {
-       return (__force __sum16)~do_csum(iph, ihl * 4);
+       return (__sum16)~do_csum(iph, ihl * 4);
  }
  
  /*
@@ -673,11 +669,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
   */
  static inline __sum16 csum_fold(__wsum csum)
  {
-       u32 sum = (__force u32)csum;
+       u32 sum = (u32)csum;
  
         sum = (sum & 0xffff) + (sum >> 16);
         sum = (sum & 0xffff) + (sum >> 16);
-       return (__force __sum16)~sum;
+       return (__sum16)~sum;
  }
  
  /*
@@ -703,16 +699,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
  __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
                           __u32 len, __u8 proto, __wsum sum)
  {
-       unsigned long long s = (__force u32)sum;
+       unsigned long long s = (u32)sum;
  
-       s += (__force u32)saddr;
-       s += (__force u32)daddr;
+       s += (u32)saddr;
+       s += (u32)daddr;
  #ifdef __BIG_ENDIAN__
         s += proto + len;
  #else
         s += (proto + len) << 8;
  #endif
-       return (__force __wsum)from64to32(s);
+       return (__wsum)from64to32(s);
  }
  
  /*
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl

index c17e480..8f6b13a 100755 (executable)
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -173,39 +173,6 @@ my $mcount_regex;  # Find the call site to mcount (return offset)
  my $mcount_adjust;     # Address adjustment to mcount offset
  my $alignment;         # The .align value to use for $mcount_section
  my $section_type;      # Section header plus possible alignment command
-my $can_use_local = 0;         # If we can use local function references
-
-# Shut up recordmcount if user has older objcopy
-my $quiet_recordmcount = ".tmp_quiet_recordmcount";
-my $print_warning = 1;
-$print_warning = 0 if ( -f $quiet_recordmcount);
-
-##
-# check_objcopy - whether objcopy supports --globalize-symbols
-#
-#  --globalize-symbols came out in 2.17, we must test the version
-#  of objcopy, and if it is less than 2.17, then we can not
-#  record local functions.
-sub check_objcopy
-{
-    open (IN, "$objcopy --version |") or die "error running $objcopy";
-    while (<IN>) {
-       if (/objcopy.*\s(\d+)\.(\d+)/) {
-           $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17));
-           last;
-       }
-    }
-    close (IN);
-
-    if (!$can_use_local && $print_warning) {
-       print STDERR "WARNING: could not find objcopy version or version " .
-           "is less than 2.17.\n" .
-           "\tLocal function references are disabled.\n";
-       open (QUIET, ">$quiet_recordmcount");
-       printf QUIET "Disables the warning from recordmcount.pl\n";
-       close QUIET;
-    }
-}
  
  if ($arch =~ /(x86(_64)?)|(i386)/) {
      if ($bits == 64) {
@@ -434,8 +401,6 @@ if ($filename =~ m,^(.*)(\.\S),) {
  my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s";
  my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o";
  
-check_objcopy();
-
  #
  # Step 1: find all the local (static functions) and weak symbols.
  #         't' is local, 'w/W' is weak
@@ -473,11 +438,6 @@ sub update_funcs
  
      # is this function static? If so, note this fact.
      if (defined $locals{$ref_func}) {
-
-       # only use locals if objcopy supports globalize-symbols
-       if (!$can_use_local) {
-           return;
-       }
         $convert{$ref_func} = 1;
      }
  
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py

index 74f8aad..7011fbe 100755 (executable)
--- a/scripts/tracing/draw_functrace.py
+++ b/scripts/tracing/draw_functrace.py
@@ -17,7 +17,7 @@ Usage:
         $ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func
         Wait some times but not too much, the script is a bit slow.
         Break the pipe (Ctrl + Z)
-       $ scripts/draw_functrace.py < raw_trace_func > draw_functrace
+       $ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace
         Then you have your drawn trace in draw_functrace
  """
  
@@ -103,10 +103,10 @@ def parseLine(line):
         line = line.strip()
         if line.startswith("#"):
                 raise CommentLineException
-       m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line)
+       m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line)
         if m is None:
                 raise BrokenLineException
-       return (m.group(1), m.group(2), m.group(3))
+       return (m.group(2), m.group(3), m.group(4))
  
  
  def main():
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c

index defc5ef..0ae1b71 100644 (file)
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -874,7 +874,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
         rc = sidtab_init(s);
         if (rc) {
                 pr_err("SELinux:  out of memory on SID table init\n");
-               goto out;
+               return rc;
         }
  
         head = p->ocontexts[OCON_ISID];
@@ -885,7 +885,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
                 if (sid == SECSID_NULL) {
                         pr_err("SELinux:  SID 0 was assigned a context.\n");
                         sidtab_destroy(s);
-                       goto out;
+                       return -EINVAL;
                 }
  
                 /* Ignore initial SIDs unused by this kernel. */
@@ -897,12 +897,10 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s)
                         pr_err("SELinux:  unable to load initial SID %s.\n",
                                name);
                         sidtab_destroy(s);
-                       goto out;
+                       return rc;
                 }
         }
-       rc = 0;
-out:
-       return rc;
+       return 0;
  }
  
  int policydb_class_isvalid(struct policydb *p, unsigned int class)
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c

index 14e3282..6a2971a 100644 (file)
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -246,12 +246,18 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream)
         if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP))
                 return false;
  
-       if (substream->ops->mmap ||
-           (substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV &&
-            substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV_UC))
+       if (substream->ops->mmap)
                 return true;
  
-       return dma_can_mmap(substream->dma_buffer.dev.dev);
+       switch (substream->dma_buffer.dev.type) {
+       case SNDRV_DMA_TYPE_UNKNOWN:
+               return false;
+       case SNDRV_DMA_TYPE_CONTINUOUS:
+       case SNDRV_DMA_TYPE_VMALLOC:
+               return true;
+       default:
+               return dma_can_mmap(substream->dma_buffer.dev.dev);
+       }
  }
  
  static int constrain_mask_params(struct snd_pcm_substream *substream,
@@ -3063,9 +3069,14 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
                 boundary = 0x7fffffff;
         snd_pcm_stream_lock_irq(substream);
         /* FIXME: we should consider the boundary for the sync from app */
-       if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
-               control->appl_ptr = scontrol.appl_ptr;
-       else
+       if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) {
+               err = pcm_lib_apply_appl_ptr(substream,
+                               scontrol.appl_ptr);
+               if (err < 0) {
+                       snd_pcm_stream_unlock_irq(substream);
+                       return err;
+               }
+       } else
                 scontrol.appl_ptr = control->appl_ptr % boundary;
         if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN))
                 control->avail_min = scontrol.avail_min;
@@ -3664,6 +3675,8 @@ static vm_fault_t snd_pcm_mmap_data_fault(struct vm_fault *vmf)
                 return VM_FAULT_SIGBUS;
         if (substream->ops->page)
                 page = substream->ops->page(substream, offset);
+       else if (!snd_pcm_get_dma_buf(substream))
+               page = virt_to_page(runtime->dma_area + offset);
         else
                 page = snd_sgbuf_get_page(snd_pcm_get_dma_buf(substream), offset);
         if (!page)
diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c

index d8be146..c9d0ba3 100644 (file)
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -319,6 +319,10 @@ static const struct config_entry config_table[] = {
                 .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC,
                 .device = 0x4b55,
         },
+       {
+               .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC,
+               .device = 0x4b58,
+       },
  #endif
  
  /* Alder Lake */
diff --git a/sound/isa/sb/sb16_csp.c b/sound/isa/sb/sb16_csp.c

index 5bbe669..7ad8c5f 100644 (file)
--- a/sound/isa/sb/sb16_csp.c
+++ b/sound/isa/sb/sb16_csp.c
@@ -816,6 +816,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel
         mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1);
         snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7);
         snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7);
+       spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
  
         spin_lock(&p->chip->reg_lock);
         set_mode_register(p->chip, 0xc0);       /* c0 = STOP */
@@ -855,6 +856,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel
         spin_unlock(&p->chip->reg_lock);
  
         /* restore PCM volume */
+       spin_lock_irqsave(&p->chip->mixer_lock, flags);
         snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL);
         snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR);
         spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
@@ -880,6 +882,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p)
         mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1);
         snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7);
         snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7);
+       spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
  
         spin_lock(&p->chip->reg_lock);
         if (p->running & SNDRV_SB_CSP_ST_QSOUND) {
@@ -894,6 +897,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p)
         spin_unlock(&p->chip->reg_lock);
  
         /* restore PCM volume */
+       spin_lock_irqsave(&p->chip->mixer_lock, flags);
         snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL);
         snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR);
         spin_unlock_irqrestore(&p->chip->mixer_lock, flags);
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c

index 4b2cc8c..e143e69 100644 (file)
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1940,6 +1940,8 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid)
  static const struct snd_pci_quirk force_connect_list[] = {
         SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1),
         SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1),
+       SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1),
+       SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1),
         {}
  };
  
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c

index 1389cfd..caaf0e8 100644 (file)
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8626,6 +8626,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
         SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
         SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
         SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
         SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
         SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
         SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP),
diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c

index 84e3906..9449fb4 100644 (file)
--- a/sound/soc/amd/acp-da7219-max98357a.c
+++ b/sound/soc/amd/acp-da7219-max98357a.c
@@ -576,6 +576,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                                 | SND_SOC_DAIFMT_CBM_CFM,
                 .init = cz_rt5682_init,
                 .dpcm_playback = 1,
+               .stop_dma_first = 1,
                 .ops = &cz_rt5682_play_ops,
                 SND_SOC_DAILINK_REG(designware1, rt5682, platform),
         },
@@ -585,6 +586,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                 .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
                                 | SND_SOC_DAIFMT_CBM_CFM,
                 .dpcm_capture = 1,
+               .stop_dma_first = 1,
                 .ops = &cz_rt5682_cap_ops,
                 SND_SOC_DAILINK_REG(designware2, rt5682, platform),
         },
@@ -594,6 +596,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                 .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
                                 | SND_SOC_DAIFMT_CBM_CFM,
                 .dpcm_playback = 1,
+               .stop_dma_first = 1,
                 .ops = &cz_rt5682_max_play_ops,
                 SND_SOC_DAILINK_REG(designware3, mx, platform),
         },
@@ -604,6 +607,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                 .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
                                 | SND_SOC_DAIFMT_CBM_CFM,
                 .dpcm_capture = 1,
+               .stop_dma_first = 1,
                 .ops = &cz_rt5682_dmic0_cap_ops,
                 SND_SOC_DAILINK_REG(designware3, adau, platform),
         },
@@ -614,6 +618,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = {
                 .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
                                 | SND_SOC_DAIFMT_CBM_CFM,
                 .dpcm_capture = 1,
+               .stop_dma_first = 1,
                 .ops = &cz_rt5682_dmic1_cap_ops,
                 SND_SOC_DAILINK_REG(designware2, adau, platform),
         },
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig

index 7ebae3f..a3b784e 100644 (file)
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -1325,7 +1325,7 @@ config SND_SOC_SSM2305
           high-efficiency mono Class-D audio power amplifiers.
  
  config SND_SOC_SSM2518
-       tristate
+       tristate "Analog Devices SSM2518 Class-D Amplifier"
         depends on I2C
  
  config SND_SOC_SSM2602
@@ -1557,6 +1557,7 @@ config SND_SOC_WCD934X
           Qualcomm SoCs like SDM845.
  
  config SND_SOC_WCD938X
+       depends on SND_SOC_WCD938X_SDW
         tristate
  
  config SND_SOC_WCD938X_SDW
@@ -1813,11 +1814,6 @@ config SND_SOC_ZL38060
           which consists of a Digital Signal Processor (DSP), several Digital
           Audio Interfaces (DAIs), analog outputs, and a block of 14 GPIOs.
  
-config SND_SOC_ZX_AUD96P22
-       tristate "ZTE ZX AUD96P22 CODEC"
-       depends on I2C
-       select REGMAP_I2C
-
  # Amp
  config SND_SOC_LM4857
         tristate
diff --git a/sound/soc/codecs/rt5631.c b/sound/soc/codecs/rt5631.c

index 3000bc1..38356ea 100644 (file)
--- a/sound/soc/codecs/rt5631.c
+++ b/sound/soc/codecs/rt5631.c
@@ -1695,6 +1695,8 @@ static const struct regmap_config rt5631_regmap_config = {
         .reg_defaults = rt5631_reg,
         .num_reg_defaults = ARRAY_SIZE(rt5631_reg),
         .cache_type = REGCACHE_RBTREE,
+       .use_single_read = true,
+       .use_single_write = true,
  };
  
  static int rt5631_i2c_probe(struct i2c_client *i2c,
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c

index e4c9157..abcd6f4 100644 (file)
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -973,10 +973,14 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert)
                 rt5682_enable_push_button_irq(component, false);
                 snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1,
                         RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW);
-               if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS"))
+               if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS") &&
+                       !snd_soc_dapm_get_pin_status(dapm, "PLL1") &&
+                       !snd_soc_dapm_get_pin_status(dapm, "PLL2B"))
                         snd_soc_component_update_bits(component,
                                 RT5682_PWR_ANLG_1, RT5682_PWR_MB, 0);
-               if (!snd_soc_dapm_get_pin_status(dapm, "Vref2"))
+               if (!snd_soc_dapm_get_pin_status(dapm, "Vref2") &&
+                       !snd_soc_dapm_get_pin_status(dapm, "PLL1") &&
+                       !snd_soc_dapm_get_pin_status(dapm, "PLL2B"))
                         snd_soc_component_update_bits(component,
                                 RT5682_PWR_ANLG_1, RT5682_PWR_VREF2, 0);
                 snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c

index 51870d5..b504d63 100644 (file)
--- a/sound/soc/codecs/tlv320aic31xx.c
+++ b/sound/soc/codecs/tlv320aic31xx.c
@@ -1604,6 +1604,8 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c,
                         ret);
                 return ret;
         }
+       regcache_cache_only(aic31xx->regmap, true);
+
         aic31xx->dev = &i2c->dev;
         aic31xx->irq = i2c->irq;
  
diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h

index 8195298..2513922 100644 (file)
--- a/sound/soc/codecs/tlv320aic31xx.h
+++ b/sound/soc/codecs/tlv320aic31xx.h
@@ -151,8 +151,8 @@ struct aic31xx_pdata {
  #define AIC31XX_WORD_LEN_24BITS                0x02
  #define AIC31XX_WORD_LEN_32BITS                0x03
  #define AIC31XX_IFACE1_MASTER_MASK     GENMASK(3, 2)
-#define AIC31XX_BCLK_MASTER            BIT(2)
-#define AIC31XX_WCLK_MASTER            BIT(3)
+#define AIC31XX_BCLK_MASTER            BIT(3)
+#define AIC31XX_WCLK_MASTER            BIT(2)
  
  /* AIC31XX_DATA_OFFSET */
  #define AIC31XX_DATA_OFFSET_MASK       GENMASK(7, 0)
diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c

index c63b717..dcd8aeb 100644 (file)
--- a/sound/soc/codecs/tlv320aic32x4.c
+++ b/sound/soc/codecs/tlv320aic32x4.c
@@ -250,8 +250,8 @@ static DECLARE_TLV_DB_SCALE(tlv_pcm, -6350, 50, 0);
  static DECLARE_TLV_DB_SCALE(tlv_driver_gain, -600, 100, 0);
  /* -12dB min, 0.5dB steps */
  static DECLARE_TLV_DB_SCALE(tlv_adc_vol, -1200, 50, 0);
-
-static DECLARE_TLV_DB_LINEAR(tlv_spk_vol, TLV_DB_GAIN_MUTE, 0);
+/* -6dB min, 1dB steps */
+static DECLARE_TLV_DB_SCALE(tlv_tas_driver_gain, -5850, 50, 0);
  static DECLARE_TLV_DB_SCALE(tlv_amp_vol, 0, 600, 1);
  
  static const char * const lo_cm_text[] = {
@@ -1063,21 +1063,20 @@ static const struct snd_soc_component_driver soc_component_dev_aic32x4 = {
  };
  
  static const struct snd_kcontrol_new aic32x4_tas2505_snd_controls[] = {
-       SOC_DOUBLE_R_S_TLV("PCM Playback Volume", AIC32X4_LDACVOL,
-                       AIC32X4_LDACVOL, 0, -0x7f, 0x30, 7, 0, tlv_pcm),
+       SOC_SINGLE_S8_TLV("PCM Playback Volume",
+                         AIC32X4_LDACVOL, -0x7f, 0x30, tlv_pcm),
         SOC_ENUM("DAC Playback PowerTune Switch", l_ptm_enum),
-       SOC_DOUBLE_R_S_TLV("HP Driver Playback Volume", AIC32X4_HPLGAIN,
-                       AIC32X4_HPLGAIN, 0, -0x6, 0x1d, 5, 0,
-                       tlv_driver_gain),
-       SOC_DOUBLE_R("HP DAC Playback Switch", AIC32X4_HPLGAIN,
-                       AIC32X4_HPLGAIN, 6, 0x01, 1),
  
-       SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0),
+       SOC_SINGLE_TLV("HP Driver Gain Volume",
+                       AIC32X4_HPLGAIN, 0, 0x74, 1, tlv_tas_driver_gain),
+       SOC_SINGLE("HP DAC Playback Switch", AIC32X4_HPLGAIN, 6, 1, 1),
  
-       SOC_SINGLE_RANGE_TLV("Speaker Driver Playback Volume", TAS2505_SPKVOL1,
-                       0, 0, 117, 1, tlv_spk_vol),
-       SOC_SINGLE_TLV("Speaker Amplifier Playback Volume", TAS2505_SPKVOL2,
-                       4, 5, 0, tlv_amp_vol),
+       SOC_SINGLE_TLV("Speaker Driver Playback Volume",
+                       TAS2505_SPKVOL1, 0, 0x74, 1, tlv_tas_driver_gain),
+       SOC_SINGLE_TLV("Speaker Amplifier Playback Volume",
+                       TAS2505_SPKVOL2, 4, 5, 0, tlv_amp_vol),
+
+       SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0),
  };
  
  static const struct snd_kcontrol_new hp_output_mixer_controls[] = {
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c

index 78b76ec..2fcc973 100644 (file)
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -3317,13 +3317,6 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
                              (WCD938X_DIGITAL_INTR_LEVEL_0 + i), 0);
         }
  
-       ret = wcd938x_irq_init(wcd938x, component->dev);
-       if (ret) {
-               dev_err(component->dev, "%s: IRQ init failed: %d\n",
-                       __func__, ret);
-               return ret;
-       }
-
         wcd938x->hphr_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
                                                        WCD938X_IRQ_HPHR_PDM_WD_INT);
         wcd938x->hphl_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
@@ -3553,7 +3546,6 @@ static int wcd938x_bind(struct device *dev)
         }
         wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev);
         wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x;
-       wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq;
  
         wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode);
         if (!wcd938x->txdev) {
@@ -3562,7 +3554,6 @@ static int wcd938x_bind(struct device *dev)
         }
         wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev);
         wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x;
-       wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq;
         wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev);
         if (!wcd938x->tx_sdw_dev) {
                 dev_err(dev, "could not get txslave with matching of dev\n");
@@ -3595,6 +3586,15 @@ static int wcd938x_bind(struct device *dev)
                 return PTR_ERR(wcd938x->regmap);
         }
  
+       ret = wcd938x_irq_init(wcd938x, dev);
+       if (ret) {
+               dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret);
+               return ret;
+       }
+
+       wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq;
+       wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq;
+
         ret = wcd938x_set_micbias_data(wcd938x);
         if (ret < 0) {
                 dev_err(dev, "%s: bad micbias pdata\n", __func__);
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c

index 37aa020..549d982 100644 (file)
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -282,6 +282,7 @@
  /*
   * HALO_CCM_CORE_CONTROL
   */
+#define HALO_CORE_RESET                     0x00000200
  #define HALO_CORE_EN                        0x00000001
  
  /*
@@ -1213,7 +1214,7 @@ static int wm_coeff_tlv_get(struct snd_kcontrol *kctl,
  
         mutex_lock(&ctl->dsp->pwr_lock);
  
-       ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, size);
+       ret = wm_coeff_read_ctrl(ctl, ctl->cache, size);
  
         if (!ret && copy_to_user(bytes, ctl->cache, size))
                 ret = -EFAULT;
@@ -3333,7 +3334,8 @@ static int wm_halo_start_core(struct wm_adsp *dsp)
  {
         return regmap_update_bits(dsp->regmap,
                                   dsp->base + HALO_CCM_CORE_CONTROL,
-                                 HALO_CORE_EN, HALO_CORE_EN);
+                                 HALO_CORE_RESET | HALO_CORE_EN,
+                                 HALO_CORE_RESET | HALO_CORE_EN);
  }
  
  static void wm_halo_stop_core(struct wm_adsp *dsp)
diff --git a/sound/soc/intel/boards/sof_sdw_max98373.c b/sound/soc/intel/boards/sof_sdw_max98373.c

index 0e7ed90..25daef9 100644 (file)
--- a/sound/soc/intel/boards/sof_sdw_max98373.c
+++ b/sound/soc/intel/boards/sof_sdw_max98373.c
@@ -55,43 +55,68 @@ static int spk_init(struct snd_soc_pcm_runtime *rtd)
         return ret;
  }
  
-static int max98373_sdw_trigger(struct snd_pcm_substream *substream, int cmd)
+static int mx8373_enable_spk_pin(struct snd_pcm_substream *substream, bool enable)
  {
+       struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+       struct snd_soc_dai *codec_dai;
+       struct snd_soc_dai *cpu_dai;
         int ret;
+       int j;
  
-       switch (cmd) {
-       case SNDRV_PCM_TRIGGER_START:
-       case SNDRV_PCM_TRIGGER_RESUME:
-       case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
-               /* enable max98373 first */
-               ret = max_98373_trigger(substream, cmd);
-               if (ret < 0)
-                       break;
-
-               ret = sdw_trigger(substream, cmd);
-               break;
-       case SNDRV_PCM_TRIGGER_STOP:
-       case SNDRV_PCM_TRIGGER_SUSPEND:
-       case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-               ret = sdw_trigger(substream, cmd);
-               if (ret < 0)
-                       break;
-
-               ret = max_98373_trigger(substream, cmd);
-               break;
-       default:
-               ret = -EINVAL;
-               break;
+       /* set spk pin by playback only */
+       if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+               return 0;
+
+       cpu_dai = asoc_rtd_to_cpu(rtd, 0);
+       for_each_rtd_codec_dais(rtd, j, codec_dai) {
+               struct snd_soc_dapm_context *dapm =
+                               snd_soc_component_get_dapm(cpu_dai->component);
+               char pin_name[16];
+
+               snprintf(pin_name, ARRAY_SIZE(pin_name), "%s Spk",
+                        codec_dai->component->name_prefix);
+
+               if (enable)
+                       ret = snd_soc_dapm_enable_pin(dapm, pin_name);
+               else
+                       ret = snd_soc_dapm_disable_pin(dapm, pin_name);
+
+               if (!ret)
+                       snd_soc_dapm_sync(dapm);
         }
  
-       return ret;
+       return 0;
+}
+
+static int mx8373_sdw_prepare(struct snd_pcm_substream *substream)
+{
+       int ret = 0;
+
+       /* according to soc_pcm_prepare dai link prepare is called first */
+       ret = sdw_prepare(substream);
+       if (ret < 0)
+               return ret;
+
+       return mx8373_enable_spk_pin(substream, true);
+}
+
+static int mx8373_sdw_hw_free(struct snd_pcm_substream *substream)
+{
+       int ret = 0;
+
+       /* according to soc_pcm_hw_free dai link free is called first */
+       ret = sdw_hw_free(substream);
+       if (ret < 0)
+               return ret;
+
+       return mx8373_enable_spk_pin(substream, false);
  }
  
  static const struct snd_soc_ops max_98373_sdw_ops = {
         .startup = sdw_startup,
-       .prepare = sdw_prepare,
-       .trigger = max98373_sdw_trigger,
-       .hw_free = sdw_hw_free,
+       .prepare = mx8373_sdw_prepare,
+       .trigger = sdw_trigger,
+       .hw_free = mx8373_sdw_hw_free,
         .shutdown = sdw_shutdown,
  };
  
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c

index 46513bb..d1c570c 100644 (file)
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1015,6 +1015,7 @@ out:
  
  static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
  {
+       struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
         int ret = -EINVAL, _ret = 0;
         int rollback = 0;
  
@@ -1055,14 +1056,23 @@ start_err:
         case SNDRV_PCM_TRIGGER_STOP:
         case SNDRV_PCM_TRIGGER_SUSPEND:
         case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-               ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback);
-               if (ret < 0)
-                       break;
+               if (rtd->dai_link->stop_dma_first) {
+                       ret = snd_soc_pcm_component_trigger(substream, cmd, rollback);
+                       if (ret < 0)
+                               break;
  
-               ret = snd_soc_pcm_component_trigger(substream, cmd, rollback);
-               if (ret < 0)
-                       break;
+                       ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback);
+                       if (ret < 0)
+                               break;
+               } else {
+                       ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback);
+                       if (ret < 0)
+                               break;
  
+                       ret = snd_soc_pcm_component_trigger(substream, cmd, rollback);
+                       if (ret < 0)
+                               break;
+               }
                 ret = snd_soc_link_trigger(substream, cmd, rollback);
                 break;
         }
diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c

index a002621..d04ce84 100644 (file)
--- a/sound/soc/sof/intel/pci-tgl.c
+++ b/sound/soc/sof/intel/pci-tgl.c
@@ -89,6 +89,7 @@ static const struct sof_dev_desc adls_desc = {
  static const struct sof_dev_desc adl_desc = {
         .machines               = snd_soc_acpi_intel_adl_machines,
         .alt_machines           = snd_soc_acpi_intel_adl_sdw_machines,
+       .use_acpi_target_states = true,
         .resindex_lpe_base      = 0,
         .resindex_pcicfg_base   = -1,
         .resindex_imr_base      = -1,
diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c

index 573374b..d3276b4 100644 (file)
--- a/sound/soc/tegra/tegra_pcm.c
+++ b/sound/soc/tegra/tegra_pcm.c
@@ -213,19 +213,19 @@ snd_pcm_uframes_t tegra_pcm_pointer(struct snd_soc_component *component,
  }
  EXPORT_SYMBOL_GPL(tegra_pcm_pointer);
  
-static int tegra_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream,
+static int tegra_pcm_preallocate_dma_buffer(struct device *dev, struct snd_pcm *pcm, int stream,
                                             size_t size)
  {
         struct snd_pcm_substream *substream = pcm->streams[stream].substream;
         struct snd_dma_buffer *buf = &substream->dma_buffer;
  
-       buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL);
+       buf->area = dma_alloc_wc(dev, size, &buf->addr, GFP_KERNEL);
         if (!buf->area)
                 return -ENOMEM;
  
         buf->private_data = NULL;
         buf->dev.type = SNDRV_DMA_TYPE_DEV;
-       buf->dev.dev = pcm->card->dev;
+       buf->dev.dev = dev;
         buf->bytes = size;
  
         return 0;
@@ -244,31 +244,28 @@ static void tegra_pcm_deallocate_dma_buffer(struct snd_pcm *pcm, int stream)
         if (!buf->area)
                 return;
  
-       dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr);
+       dma_free_wc(buf->dev.dev, buf->bytes, buf->area, buf->addr);
         buf->area = NULL;
  }
  
-static int tegra_pcm_dma_allocate(struct snd_soc_pcm_runtime *rtd,
+static int tegra_pcm_dma_allocate(struct device *dev, struct snd_soc_pcm_runtime *rtd,
                                   size_t size)
  {
-       struct snd_card *card = rtd->card->snd_card;
         struct snd_pcm *pcm = rtd->pcm;
         int ret;
  
-       ret = dma_set_mask_and_coherent(card->dev, DMA_BIT_MASK(32));
+       ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
         if (ret < 0)
                 return ret;
  
         if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) {
-               ret = tegra_pcm_preallocate_dma_buffer(pcm,
-                       SNDRV_PCM_STREAM_PLAYBACK, size);
+               ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_PLAYBACK, size);
                 if (ret)
                         goto err;
         }
  
         if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) {
-               ret = tegra_pcm_preallocate_dma_buffer(pcm,
-                       SNDRV_PCM_STREAM_CAPTURE, size);
+               ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_CAPTURE, size);
                 if (ret)
                         goto err_free_play;
         }
@@ -284,7 +281,16 @@ err:
  int tegra_pcm_construct(struct snd_soc_component *component,
                         struct snd_soc_pcm_runtime *rtd)
  {
-       return tegra_pcm_dma_allocate(rtd, tegra_pcm_hardware.buffer_bytes_max);
+       struct device *dev = component->dev;
+
+       /*
+        * Fallback for backwards-compatibility with older device trees that
+        * have the iommus property in the virtual, top-level "sound" node.
+        */
+       if (!of_get_property(dev->of_node, "iommus", NULL))
+               dev = rtd->card->snd_card->dev;
+
+       return tegra_pcm_dma_allocate(dev, rtd, tegra_pcm_hardware.buffer_bytes_max);
  }
  EXPORT_SYMBOL_GPL(tegra_pcm_construct);
  
diff --git a/sound/soc/ti/j721e-evm.c b/sound/soc/ti/j721e-evm.c

index a7c0484..265bbc5 100644 (file)
--- a/sound/soc/ti/j721e-evm.c
+++ b/sound/soc/ti/j721e-evm.c
@@ -197,7 +197,7 @@ static int j721e_configure_refclk(struct j721e_priv *priv,
                 return ret;
         }
  
-       if (priv->hsdiv_rates[domain->parent_clk_id] != scki) {
+       if (domain->parent_clk_id == -1 || priv->hsdiv_rates[domain->parent_clk_id] != scki) {
                 dev_dbg(priv->dev,
                         "%s configuration for %u Hz: %s, %dxFS (SCKI: %u Hz)\n",
                         audio_domain == J721E_AUDIO_DOMAIN_CPB ? "CPB" : "IVI",
@@ -278,23 +278,29 @@ static int j721e_audio_startup(struct snd_pcm_substream *substream)
                                           j721e_rule_rate, &priv->rate_range,
                                           SNDRV_PCM_HW_PARAM_RATE, -1);
  
-       mutex_unlock(&priv->mutex);
  
         if (ret)
-               return ret;
+               goto out;
  
         /* Reset TDM slots to 32 */
         ret = snd_soc_dai_set_tdm_slot(cpu_dai, 0x3, 0x3, 2, 32);
         if (ret && ret != -ENOTSUPP)
-               return ret;
+               goto out;
  
         for_each_rtd_codec_dais(rtd, i, codec_dai) {
                 ret = snd_soc_dai_set_tdm_slot(codec_dai, 0x3, 0x3, 2, 32);
                 if (ret && ret != -ENOTSUPP)
-                       return ret;
+                       goto out;
         }
  
-       return 0;
+       if (ret == -ENOTSUPP)
+               ret = 0;
+out:
+       if (ret)
+               domain->active--;
+       mutex_unlock(&priv->mutex);
+
+       return ret;
  }
  
  static int j721e_audio_hw_params(struct snd_pcm_substream *substream,
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c

index 30b3e12..f4cdaf1 100644 (file)
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -3295,7 +3295,15 @@ static void snd_usb_mixer_dump_cval(struct snd_info_buffer *buffer,
  {
         struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list);
         static const char * const val_types[] = {
-               "BOOLEAN", "INV_BOOLEAN", "S8", "U8", "S16", "U16", "S32", "U32",
+               [USB_MIXER_BOOLEAN] = "BOOLEAN",
+               [USB_MIXER_INV_BOOLEAN] = "INV_BOOLEAN",
+               [USB_MIXER_S8] = "S8",
+               [USB_MIXER_U8] = "U8",
+               [USB_MIXER_S16] = "S16",
+               [USB_MIXER_U16] = "U16",
+               [USB_MIXER_S32] = "S32",
+               [USB_MIXER_U32] = "U32",
+               [USB_MIXER_BESPOKEN] = "BESPOKEN",
         };
         snd_iprintf(buffer, "    Info: id=%i, control=%i, cmask=0x%x, "
                             "channels=%i, type=\"%s\"\n", cval->head.id,
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c

index 8b8bee3..e7accd8 100644 (file)
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1897,6 +1897,9 @@ static const struct registration_quirk registration_quirks[] = {
         REG_QUIRK_ENTRY(0x0951, 0x16d8, 2),     /* Kingston HyperX AMP */
         REG_QUIRK_ENTRY(0x0951, 0x16ed, 2),     /* Kingston HyperX Cloud Alpha S */
         REG_QUIRK_ENTRY(0x0951, 0x16ea, 2),     /* Kingston HyperX Cloud Flight S */
+       REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2),     /* JBL Quantum 600 */
+       REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2),     /* JBL Quantum 400 */
+       REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2),     /* JBL Quantum 800 */
         { 0 }                                   /* terminator */
  };
  
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst

index ff4d327..88b28aa 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -12,7 +12,8 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **btf** *COMMAND*
  
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } |
+               { **-B** | **--base-btf** } }
  
         *COMMANDS* := { **dump** | **help** }
  
@@ -73,6 +74,20 @@ OPTIONS
  =======
         .. include:: common_options.rst
  
+       -B, --base-btf *FILE*
+                 Pass a base BTF object. Base BTF objects are typically used
+                 with BTF objects for kernel modules. To avoid duplicating
+                 all kernel symbols required by modules, BTF objects for
+                 modules are "split", they are built incrementally on top of
+                 the kernel (vmlinux) BTF object. So the base BTF reference
+                 should usually point to the kernel BTF.
+
+                 When the main BTF object to process (for example, the
+                 module BTF to dump) is passed as a *FILE*, bpftool attempts
+                 to autodetect the path for the base object, and passing
+                 this option is optional. When the main BTF object is passed
+                 through other handles, this option becomes necessary.
+
  EXAMPLES
  ========
  **# bpftool btf dump id 1226**
@@ -217,3 +232,34 @@ All the standard ways to specify map or program are supported:
  **# bpftool btf dump prog tag b88e0a09b1d9759d**
  
  **# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
+
+|
+| **# bpftool btf dump file /sys/kernel/btf/i2c_smbus**
+| (or)
+| **# I2C_SMBUS_ID=$(bpftool btf show -p | jq '.[] | select(.name=="i2c_smbus").id')**
+| **# bpftool btf dump id ${I2C_SMBUS_ID} -B /sys/kernel/btf/vmlinux**
+
+::
+
+  [104848] STRUCT 'i2c_smbus_alert' size=40 vlen=2
+          'alert' type_id=393 bits_offset=0
+          'ara' type_id=56050 bits_offset=256
+  [104849] STRUCT 'alert_data' size=12 vlen=3
+          'addr' type_id=16 bits_offset=0
+          'type' type_id=56053 bits_offset=32
+          'data' type_id=7 bits_offset=64
+  [104850] PTR '(anon)' type_id=104848
+  [104851] PTR '(anon)' type_id=104849
+  [104852] FUNC 'i2c_register_spd' type_id=84745 linkage=static
+  [104853] FUNC 'smbalert_driver_init' type_id=1213 linkage=static
+  [104854] FUNC_PROTO '(anon)' ret_type_id=18 vlen=1
+          'ara' type_id=56050
+  [104855] FUNC 'i2c_handle_smbus_alert' type_id=104854 linkage=static
+  [104856] FUNC 'smbalert_remove' type_id=104854 linkage=static
+  [104857] FUNC_PROTO '(anon)' ret_type_id=18 vlen=2
+          'ara' type_id=56050
+          'id' type_id=56056
+  [104858] FUNC 'smbalert_probe' type_id=104857 linkage=static
+  [104859] FUNC 'smbalert_work' type_id=9695 linkage=static
+  [104860] FUNC 'smbus_alert' type_id=71367 linkage=static
+  [104861] FUNC 'smbus_do_alert' type_id=84827 linkage=static
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst

index baee859..3e4395e 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -12,7 +12,8 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **cgroup** *COMMAND*
  
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } }
  
         *COMMANDS* :=
         { **show** | **list** | **tree** | **attach** | **detach** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst

index dd3771b..ab9f57e 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -12,7 +12,7 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **feature** *COMMAND*
  
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
  
         *COMMANDS* := { **probe** | **help** }
  
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst

index 7cd6681..2ef2f2d 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -12,7 +12,8 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **gen** *COMMAND*
  
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-L** | **--use-loader** } }
  
         *COMMAND* := { **object** | **skeleton** | **help** }
  
@@ -152,6 +153,12 @@ OPTIONS
  =======
         .. include:: common_options.rst
  
+       -L, --use-loader
+                 For skeletons, generate a "light" skeleton (also known as "loader"
+                 skeleton). A light skeleton contains a loader eBPF program. It does
+                 not use the majority of the libbpf infrastructure, and does not need
+                 libelf.
+
  EXAMPLES
  ========
  **$ cat example1.bpf.c**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst

index 51f49be..471f363 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -12,6 +12,8 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **iter** *COMMAND*
  
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+
         *COMMANDS* := { **pin** | **help** }
  
  ITER COMMANDS
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst

index 5f7db2a..0de90f0 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -12,7 +12,8 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **link** *COMMAND*
  
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
  
         *COMMANDS* := { **show** | **list** | **pin** | **help** }
  
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst

index 3d52256..d0c4abe 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -12,7 +12,8 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **map** *COMMAND*
  
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
  
         *COMMANDS* :=
         { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst

index d8165d5..1ae0375 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -12,7 +12,7 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **net** *COMMAND*
  
-       *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
  
         *COMMANDS* :=
         { **show** | **list** | **attach** | **detach** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst

index e958ce9..ce52798 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -12,7 +12,7 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **perf** *COMMAND*
  
-       *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
  
         *COMMANDS* :=
         { **show** | **list** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst

index fe1b38e..91608cb 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -12,7 +12,9 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **prog** *COMMAND*
  
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+               { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
+               { **-L** | **--use-loader** } }
  
         *COMMANDS* :=
         { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
@@ -48,10 +50,11 @@ PROG COMMANDS
  |              **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
  |      }
  |       *ATTACH_TYPE* := {
-|              **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
+|              **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
  |      }
  |      *METRICs* := {
-|              **cycles** | **instructions** | **l1d_loads** | **llc_misses**
+|              **cycles** | **instructions** | **l1d_loads** | **llc_misses** |
+|              **itlb_misses** | **dtlb_misses**
  |      }
  
  
@@ -223,6 +226,20 @@ OPTIONS
                   Do not automatically attempt to mount any virtual file system
                   (such as tracefs or BPF virtual file system) when necessary.
  
+       -L, --use-loader
+                 Load program as a "loader" program. This is useful to debug
+                 the generation of such programs. When this option is in
+                 use, bpftool attempts to load the programs from the object
+                 file into the kernel, but does not pin them (therefore, the
+                 *PATH* must not be provided).
+
+                 When combined with the **-d**\ \|\ **--debug** option,
+                 additional debug messages are generated, and the execution
+                 of the loader program will use the **bpf_trace_printk**\ ()
+                 helper to log each step of loading BTF, creating the maps,
+                 and loading the programs (see **bpftool prog tracelog** as
+                 a way to dump those messages).
+
  EXAMPLES
  ========
  **# bpftool prog show**
@@ -326,3 +343,16 @@ EXAMPLES
        40176203 cycles                                                 (83.05%)
        42518139 instructions    #   1.06 insns per cycle               (83.39%)
             123 llc_misses      #   2.89 LLC misses per million insns  (83.15%)
+
+|
+| Output below is for the trace logs.
+| Run in separate terminals:
+| **# bpftool prog tracelog**
+| **# bpftool prog load -L -d file.o**
+
+::
+
+    bpftool-620059  [004] d... 2634685.517903: bpf_trace_printk: btf_load size 665 r=5
+    bpftool-620059  [004] d... 2634685.517912: bpf_trace_printk: map_create sample_map idx 0 type 2 value_size 4 value_btf_id 0 r=6
+    bpftool-620059  [004] d... 2634685.517997: bpf_trace_printk: prog_load sample insn_cnt 13 r=7
+    bpftool-620059  [004] d... 2634685.517999: bpf_trace_printk: close(5) = 0
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst

index 506e70e..02afc0f 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -12,7 +12,7 @@ SYNOPSIS
  
         **bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
  
-       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
  
         *COMMANDS* :=
         { **show** | **list** | **dump** | **register** | **unregister** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst

index e7d9493..bb23f55 100644 (file)
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -18,15 +18,15 @@ SYNOPSIS
  
         *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
  
-       *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
-       | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+       *OPTIONS* := { { **-V** | **--version** } |
+               { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
  
         *MAP-COMMANDS* :=
-       { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
-       | **delete** | **pin** | **event_pipe** | **help** }
+       { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
+               **delete** | **pin** | **event_pipe** | **help** }
  
-       *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
-       | **load** | **attach** | **detach** | **help** }
+       *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
+               **load** | **attach** | **detach** | **help** }
  
         *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
  
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool

index cc33c58..88e2bcf 100644 (file)
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -260,7 +260,8 @@ _bpftool()
  
      # Deal with options
      if [[ ${words[cword]} == -* ]]; then
-        local c='--version --json --pretty --bpffs --mapcompat --debug'
+        local c='--version --json --pretty --bpffs --mapcompat --debug \
+              --use-loader --base-btf'
          COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
          return 0
      fi
@@ -278,7 +279,7 @@ _bpftool()
              _sysfs_get_netdevs
              return 0
              ;;
-        file|pinned)
+        file|pinned|-B|--base-btf)
              _filedir
              return 0
              ;;
@@ -291,7 +292,8 @@ _bpftool()
      # Remove all options so completions don't have to deal with them.
      local i
      for (( i=1; i < ${#words[@]}; )); do
-        if [[ ${words[i]::1} == - ]]; then
+        if [[ ${words[i]::1} == - ]] &&
+            [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
              words=( "${words[@]:0:i}" "${words[@]:i+1}" )
              [[ $i -le $cword ]] && cword=$(( cword - 1 ))
          else
@@ -343,7 +345,8 @@ _bpftool()
  
              local PROG_TYPE='id pinned tag name'
              local MAP_TYPE='id pinned name'
-            local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
+            local METRIC_TYPE='cycles instructions l1d_loads llc_misses \
+                itlb_misses dtlb_misses'
              case $command in
                  show|list)
                      [[ $prev != "$command" ]] && return 0
@@ -404,8 +407,10 @@ _bpftool()
                              return 0
                              ;;
                          5)
-                            COMPREPLY=( $( compgen -W 'msg_verdict stream_verdict \
-                                stream_parser flow_dissector' -- "$cur" ) )
+                            local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \
+                                skb_verdict stream_verdict stream_parser \
+                                flow_dissector'
+                            COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) )
                              return 0
                              ;;
                          6)
@@ -464,7 +469,7 @@ _bpftool()
  
                      case $prev in
                          type)
-                            COMPREPLY=( $( compgen -W "socket kprobe \
+                            local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \
                                  kretprobe classifier flow_dissector \
                                  action tracepoint raw_tracepoint \
                                  xdp perf_event cgroup/skb cgroup/sock \
@@ -479,8 +484,8 @@ _bpftool()
                                  cgroup/post_bind4 cgroup/post_bind6 \
                                  cgroup/sysctl cgroup/getsockopt \
                                  cgroup/setsockopt cgroup/sock_release struct_ops \
-                                fentry fexit freplace sk_lookup" -- \
-                                                   "$cur" ) )
+                                fentry fexit freplace sk_lookup'
+                            COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) )
                              return 0
                              ;;
                          id)
@@ -698,15 +703,15 @@ _bpftool()
                              return 0
                              ;;
                          type)
-                            COMPREPLY=( $( compgen -W 'hash array prog_array \
-                                perf_event_array percpu_hash percpu_array \
-                                stack_trace cgroup_array lru_hash \
+                            local BPFTOOL_MAP_CREATE_TYPES='hash array \
+                                prog_array perf_event_array percpu_hash \
+                                percpu_array stack_trace cgroup_array lru_hash \
                                  lru_percpu_hash lpm_trie array_of_maps \
                                  hash_of_maps devmap devmap_hash sockmap cpumap \
                                  xskmap sockhash cgroup_storage reuseport_sockarray \
                                  percpu_cgroup_storage queue stack sk_storage \
-                                struct_ops inode_storage task_storage' -- \
-                                                   "$cur" ) )
+                                struct_ops inode_storage task_storage ringbuf'
+                            COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
                              return 0
                              ;;
                          key|value|flags|entries)
@@ -1017,34 +1022,37 @@ _bpftool()
                      return 0
                      ;;
                  attach|detach)
-                    local ATTACH_TYPES='ingress egress sock_create sock_ops \
-                        device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
+                    local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \
+                        sock_create sock_ops device \
+                        bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
                          getpeername4 getpeername6 getsockname4 getsockname6 \
                          sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
                          setsockopt sock_release'
                      local ATTACH_FLAGS='multi override'
                      local PROG_TYPE='id pinned tag name'
-                    case $prev in
-                        $command)
-                            _filedir
-                            return 0
-                            ;;
-                        ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
-                        post_bind4|post_bind6|connect4|connect6|getpeername4|\
-                        getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
-                        recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
+                    # Check for $prev = $command first
+                    if [ $prev = $command ]; then
+                        _filedir
+                        return 0
+                    # Then check for attach type. This is done outside of the
+                    # "case $prev in" to avoid writing the whole list of attach
+                    # types again as pattern to match (where we cannot reuse
+                    # our variable).
+                    elif [[ $BPFTOOL_CGROUP_ATTACH_TYPES =~ $prev ]]; then
                              COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                  "$cur" ) )
                              return 0
-                            ;;
+                    fi
+                    # case/esac for the other cases
+                    case $prev in
                          id)
                              _bpftool_get_prog_ids
                              return 0
                              ;;
                          *)
-                            if ! _bpftool_search_list "$ATTACH_TYPES"; then
-                                COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \
-                                    "$cur" ) )
+                            if ! _bpftool_search_list "$BPFTOOL_CGROUP_ATTACH_TYPES"; then
+                                COMPREPLY=( $( compgen -W \
+                                    "$BPFTOOL_CGROUP_ATTACH_TYPES" -- "$cur" ) )
                              elif [[ "$command" == "attach" ]]; then
                                  # We have an attach type on the command line,
                                  # but it is not the previous word, or
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c

index 385d5c9..f7e5ff3 100644 (file)
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -580,16 +580,12 @@ static int do_dump(int argc, char **argv)
         }
  
         if (!btf) {
-               err = btf__get_from_id(btf_id, &btf);
+               btf = btf__load_from_kernel_by_id_split(btf_id, base_btf);
+               err = libbpf_get_error(btf);
                 if (err) {
                         p_err("get btf by id (%u): %s", btf_id, strerror(err));
                         goto done;
                 }
-               if (!btf) {
-                       err = -ENOENT;
-                       p_err("can't find btf with ID (%u)", btf_id);
-                       goto done;
-               }
         }
  
         if (dump_c) {
@@ -985,7 +981,8 @@ static int do_help(int argc, char **argv)
                 "       FORMAT  := { raw | c }\n"
                 "       " HELP_SPEC_MAP "\n"
                 "       " HELP_SPEC_PROGRAM "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-B|--base-btf} }\n"
                 "",
                 bin_name, "btf");
  
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c

index 7ca54d0..9c25286 100644 (file)
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -64,8 +64,10 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
         }
         info = &prog_info->info;
  
-       if (!info->btf_id || !info->nr_func_info ||
-           btf__get_from_id(info->btf_id, &prog_btf))
+       if (!info->btf_id || !info->nr_func_info)
+               goto print;
+       prog_btf = btf__load_from_kernel_by_id(info->btf_id);
+       if (libbpf_get_error(prog_btf))
                 goto print;
         finfo = u64_to_ptr(info->func_info);
         func_type = btf__type_by_id(prog_btf, finfo->type_id);
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c

index 6e53b1d..3571a28 100644 (file)
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -501,7 +501,8 @@ static int do_help(int argc, char **argv)
                 HELP_SPEC_ATTACH_TYPES "\n"
                 "       " HELP_SPEC_ATTACH_FLAGS "\n"
                 "       " HELP_SPEC_PROGRAM "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} }\n"
                 "",
                 bin_name, argv[-2]);
  
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c

index dc6daa1..d42d930 100644 (file)
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -67,6 +67,12 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
         [BPF_MODIFY_RETURN]             = "mod_ret",
         [BPF_LSM_MAC]                   = "lsm_mac",
         [BPF_SK_LOOKUP]                 = "sk_lookup",
+       [BPF_TRACE_ITER]                = "trace_iter",
+       [BPF_XDP_DEVMAP]                = "xdp_devmap",
+       [BPF_XDP_CPUMAP]                = "xdp_cpumap",
+       [BPF_XDP]                       = "xdp",
+       [BPF_SK_REUSEPORT_SELECT]       = "sk_skb_reuseport_select",
+       [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_skb_reuseport_select_or_migrate",
  };
  
  void p_err(const char *fmt, ...)
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c

index 40a88df..7f36385 100644 (file)
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -1005,6 +1005,7 @@ static int do_help(int argc, char **argv)
                 "       %1$s %2$s help\n"
                 "\n"
                 "       COMPONENT := { kernel | dev NAME }\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                 "",
                 bin_name, argv[-2]);
  
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c

index 1d71ff8..d40d92b 100644 (file)
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -1026,7 +1026,8 @@ static int do_help(int argc, char **argv)
                 "       %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
                 "       %1$s %2$s help\n"
                 "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-L|--use-loader} }\n"
                 "",
                 bin_name, "gen");
  
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c

index 3b1aad7..84a9b01 100644 (file)
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -97,7 +97,9 @@ static int do_help(int argc, char **argv)
         fprintf(stderr,
                 "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
                 "       %1$s %2$s help\n"
+               "\n"
                 "       " HELP_SPEC_MAP "\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                 "",
                 bin_name, "iter");
  
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c

index e77e152..8cc3e36 100644 (file)
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -401,7 +401,8 @@ static int do_help(int argc, char **argv)
                 "       %1$s %2$s help\n"
                 "\n"
                 "       " HELP_SPEC_LINK "\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} | {-n|--nomount} }\n"
                 "",
                 bin_name, argv[-2]);
  
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c

index 3ddfd48..02eaaf0 100644 (file)
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -64,7 +64,8 @@ static int do_help(int argc, char **argv)
                 "       %s version\n"
                 "\n"
                 "       OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-V|--version} }\n"
                 "",
                 bin_name, bin_name, bin_name);
  
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h

index c1cf297..90caa42 100644 (file)
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -57,8 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
  #define HELP_SPEC_PROGRAM                                              \
         "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
  #define HELP_SPEC_OPTIONS                                              \
-       "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n"   \
-       "\t            {-m|--mapcompat} | {-n|--nomount} }"
+       "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
  #define HELP_SPEC_MAP                                                  \
         "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
  #define HELP_SPEC_LINK                                                 \
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c

index 09ae038..407071d 100644 (file)
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -807,10 +807,11 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info)
         } else if (info->btf_value_type_id) {
                 int err;
  
-               err = btf__get_from_id(info->btf_id, &btf);
-               if (err || !btf) {
+               btf = btf__load_from_kernel_by_id(info->btf_id);
+               err = libbpf_get_error(btf);
+               if (err) {
                         p_err("failed to get btf");
-                       btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH);
+                       btf = ERR_PTR(err);
                 }
         }
  
@@ -1039,11 +1040,10 @@ static void print_key_value(struct bpf_map_info *info, void *key,
                             void *value)
  {
         json_writer_t *btf_wtr;
-       struct btf *btf = NULL;
-       int err;
+       struct btf *btf;
  
-       err = btf__get_from_id(info->btf_id, &btf);
-       if (err) {
+       btf = btf__load_from_kernel_by_id(info->btf_id);
+       if (libbpf_get_error(btf)) {
                 p_err("failed to get btf");
                 return;
         }
@@ -1466,8 +1466,9 @@ static int do_help(int argc, char **argv)
                 "                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
                 "                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
                 "                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
-               "                 task_storage }\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "                 task_storage }\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} | {-n|--nomount} }\n"
                 "",
                 bin_name, argv[-2]);
  
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c

index f836d11..6490537 100644 (file)
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -729,6 +729,7 @@ static int do_help(int argc, char **argv)
                 "\n"
                 "       " HELP_SPEC_PROGRAM "\n"
                 "       ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                 "\n"
                 "Note: Only xdp and tc attachments are supported now.\n"
                 "      For progs attached to cgroups, use \"bpftool cgroup\"\n"
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c

index ad23934..50de087 100644 (file)
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -231,7 +231,10 @@ static int do_show(int argc, char **argv)
  static int do_help(int argc, char **argv)
  {
         fprintf(stderr,
-               "Usage: %1$s %2$s { show | list | help }\n"
+               "Usage: %1$s %2$s { show | list }\n"
+               "       %1$s %2$s help }\n"
+               "\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                 "",
                 bin_name, argv[-2]);
  
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c

index cc48726..9c3e343 100644 (file)
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -249,10 +249,10 @@ static void show_prog_metadata(int fd, __u32 num_maps)
         struct bpf_map_info map_info;
         struct btf_var_secinfo *vsi;
         bool printed_header = false;
-       struct btf *btf = NULL;
         unsigned int i, vlen;
         void *value = NULL;
         const char *name;
+       struct btf *btf;
         int err;
  
         if (!num_maps)
@@ -263,8 +263,8 @@ static void show_prog_metadata(int fd, __u32 num_maps)
         if (!value)
                 return;
  
-       err = btf__get_from_id(map_info.btf_id, &btf);
-       if (err || !btf)
+       btf = btf__load_from_kernel_by_id(map_info.btf_id);
+       if (libbpf_get_error(btf))
                 goto out_free;
  
         t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
@@ -646,9 +646,12 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
                 member_len = info->xlated_prog_len;
         }
  
-       if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) {
-               p_err("failed to get btf");
-               return -1;
+       if (info->btf_id) {
+               btf = btf__load_from_kernel_by_id(info->btf_id);
+               if (libbpf_get_error(btf)) {
+                       p_err("failed to get btf");
+                       return -1;
+               }
         }
  
         func_info = u64_to_ptr(info->func_info);
@@ -781,6 +784,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
                 kernel_syms_destroy(&dd);
         }
  
+       btf__free(btf);
+
         return 0;
  }
  
@@ -2002,8 +2007,8 @@ static char *profile_target_name(int tgt_fd)
         struct bpf_prog_info_linear *info_linear;
         struct bpf_func_info *func_info;
         const struct btf_type *t;
+       struct btf *btf = NULL;
         char *name = NULL;
-       struct btf *btf;
  
         info_linear = bpf_program__get_prog_info_linear(
                 tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -2012,12 +2017,17 @@ static char *profile_target_name(int tgt_fd)
                 return NULL;
         }
  
-       if (info_linear->info.btf_id == 0 ||
-           btf__get_from_id(info_linear->info.btf_id, &btf)) {
+       if (info_linear->info.btf_id == 0) {
                 p_err("prog FD %d doesn't have valid btf", tgt_fd);
                 goto out;
         }
  
+       btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+       if (libbpf_get_error(btf)) {
+               p_err("failed to load btf for prog FD %d", tgt_fd);
+               goto out;
+       }
+
         func_info = u64_to_ptr(info_linear->info.func_info);
         t = btf__type_by_id(btf, func_info[0].type_id);
         if (!t) {
@@ -2027,6 +2037,7 @@ static char *profile_target_name(int tgt_fd)
         }
         name = strdup(btf__name_by_offset(btf, t->name_off));
  out:
+       btf__free(btf);
         free(info_linear);
         return name;
  }
@@ -2245,10 +2256,12 @@ static int do_help(int argc, char **argv)
                 "                 cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
                 "                 cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
                 "                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
-               "       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
-               "                        flow_dissector }\n"
+               "       ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n"
+               "                        stream_parser | flow_dissector }\n"
                 "       METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
-               "       " HELP_SPEC_OPTIONS "\n"
+               "       " HELP_SPEC_OPTIONS " |\n"
+               "                    {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
+               "                    {-L|--use-loader} }\n"
                 "",
                 bin_name, argv[-2]);
  
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c

index b58b91f..ab2d229 100644 (file)
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -572,8 +572,8 @@ static int do_help(int argc, char **argv)
                 "       %1$s %2$s unregister STRUCT_OPS_MAP\n"
                 "       %1$s %2$s help\n"
                 "\n"
-               "       OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
                 "       STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+               "       " HELP_SPEC_OPTIONS " }\n"
                 "",
                 bin_name, argv[-2]);
  
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c

index 3ad9301..de6365b 100644 (file)
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -291,7 +291,7 @@ static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
         sh->sh_addralign = expected;
  
         if (gelf_update_shdr(scn, sh) == 0) {
-               printf("FAILED cannot update section header: %s\n",
+               pr_err("FAILED cannot update section header: %s\n",
                         elf_errmsg(-1));
                 return -1;
         }
@@ -317,6 +317,7 @@ static int elf_collect(struct object *obj)
  
         elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
         if (!elf) {
+               close(fd);
                 pr_err("FAILED cannot create ELF descriptor: %s\n",
                         elf_errmsg(-1));
                 return -1;
@@ -484,7 +485,7 @@ static int symbols_resolve(struct object *obj)
         err = libbpf_get_error(btf);
         if (err) {
                 pr_err("FAILED: load BTF from %s: %s\n",
-                       obj->path, strerror(-err));
+                       obj->btf ?: obj->path, strerror(-err));
                 return -1;
         }
  
@@ -555,8 +556,7 @@ static int id_patch(struct object *obj, struct btf_id *id)
         int i;
  
         if (!id->id) {
-               pr_err("FAILED unresolved symbol %s\n", id->name);
-               return -EINVAL;
+               pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
         }
  
         for (i = 0; i < id->addr_cnt; i++) {
@@ -734,8 +734,9 @@ int main(int argc, const char **argv)
  
         err = 0;
  out:
-       if (obj.efile.elf)
+       if (obj.efile.elf) {
                 elf_end(obj.efile.elf);
-       close(obj.efile.fd);
+               close(obj.efile.fd);
+       }
         return err;
  }
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h

index d208b2a..eb15f31 100644 (file)
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -653,6 +653,7 @@ enum {
         IFLA_BOND_AD_ACTOR_SYSTEM,
         IFLA_BOND_TLB_DYNAMIC_LB,
         IFLA_BOND_PEER_NOTIF_DELAY,
+       IFLA_BOND_AD_LACP_ACTIVE,
         __IFLA_BOND_MAX,
  };
  
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build

index 430f687..94f0a14 100644 (file)
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
  libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
             netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
-           btf_dump.o ringbuf.o strset.o linker.o gen_loader.o
+           btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c

index b46760b..85de4fd 100644 (file)
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1180,7 +1180,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
  
  static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
  
-int btf__load(struct btf *btf)
+int btf__load_into_kernel(struct btf *btf)
  {
         __u32 log_buf_size = 0, raw_size;
         char *log_buf = NULL;
@@ -1228,6 +1228,7 @@ done:
         free(log_buf);
         return libbpf_err(err);
  }
+int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));
  
  int btf__fd(const struct btf *btf)
  {
@@ -1382,21 +1383,35 @@ exit_free:
         return btf;
  }
  
-int btf__get_from_id(__u32 id, struct btf **btf)
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
  {
-       struct btf *res;
-       int err, btf_fd;
+       struct btf *btf;
+       int btf_fd;
  
-       *btf = NULL;
         btf_fd = bpf_btf_get_fd_by_id(id);
         if (btf_fd < 0)
-               return libbpf_err(-errno);
-
-       res = btf_get_from_fd(btf_fd, NULL);
-       err = libbpf_get_error(res);
+               return libbpf_err_ptr(-errno);
  
+       btf = btf_get_from_fd(btf_fd, base_btf);
         close(btf_fd);
  
+       return libbpf_ptr(btf);
+}
+
+struct btf *btf__load_from_kernel_by_id(__u32 id)
+{
+       return btf__load_from_kernel_by_id_split(id, NULL);
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+       struct btf *res;
+       int err;
+
+       *btf = NULL;
+       res = btf__load_from_kernel_by_id(id);
+       err = libbpf_get_error(res);
+
         if (err)
                 return libbpf_err(err);
  
@@ -4021,7 +4036,7 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
                  */
                 if (d->hypot_adjust_canon)
                         continue;
-               
+
                 if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
                         d->map[t_id] = c_id;
  
@@ -4394,7 +4409,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
   * Probe few well-known locations for vmlinux kernel image and try to load BTF
   * data out of it to use for target BTF.
   */
-struct btf *libbpf_find_kernel_btf(void)
+struct btf *btf__load_vmlinux_btf(void)
  {
         struct {
                 const char *path_fmt;
@@ -4440,6 +4455,16 @@ struct btf *libbpf_find_kernel_btf(void)
         return libbpf_err_ptr(-ESRCH);
  }
  
+struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf")));
+
+struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf)
+{
+       char path[80];
+
+       snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name);
+       return btf__parse_split(path, vmlinux_btf);
+}
+
  int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
  {
         int i, n, err;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h

index b54f1c3..4a711f9 100644 (file)
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -44,8 +44,17 @@ LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_b
  LIBBPF_API struct btf *btf__parse_raw(const char *path);
  LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf);
  
+LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
+LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
+LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
+LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+
  LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
  LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API int btf__load_into_kernel(struct btf *btf);
  LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
                                    const char *type_name);
  LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
@@ -66,7 +75,6 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
  LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
  LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
  LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
  LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
                                     __u32 expected_key_size,
                                     __u32 expected_value_size,
@@ -89,8 +97,6 @@ int btf_ext__reloc_line_info(const struct btf *btf,
  LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
  LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
  
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
-
  LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
  LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
  LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
@@ -184,6 +190,25 @@ LIBBPF_API int
  btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
                          const struct btf_dump_emit_type_decl_opts *opts);
  
+
+struct btf_dump_type_data_opts {
+       /* size of this struct, for forward/backward compatibility */
+       size_t sz;
+       const char *indent_str;
+       int indent_level;
+       /* below match "show" flags for bpf_show_snprintf() */
+       bool compact;           /* no newlines/indentation */
+       bool skip_names;        /* skip member/type names */
+       bool emit_zeroes;       /* show 0-valued fields */
+       size_t :0;
+};
+#define btf_dump_type_data_opts__last_field emit_zeroes
+
+LIBBPF_API int
+btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+                        const void *data, size_t data_sz,
+                        const struct btf_dump_type_data_opts *opts);
+
  /*
   * A set of helpers for easier BTF types handling
   */
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c

index 5dc6b51..e4b483f 100644 (file)
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -10,6 +10,8 @@
  #include <stddef.h>
  #include <stdlib.h>
  #include <string.h>
+#include <ctype.h>
+#include <endian.h>
  #include <errno.h>
  #include <linux/err.h>
  #include <linux/btf.h>
@@ -53,6 +55,26 @@ struct btf_dump_type_aux_state {
         __u8 referenced: 1;
  };
  
+/* indent string length; one indent string is added for each indent level */
+#define BTF_DATA_INDENT_STR_LEN                        32
+
+/*
+ * Common internal data for BTF type data dump operations.
+ */
+struct btf_dump_data {
+       const void *data_end;           /* end of valid data to show */
+       bool compact;
+       bool skip_names;
+       bool emit_zeroes;
+       __u8 indent_lvl;        /* base indent level */
+       char indent_str[BTF_DATA_INDENT_STR_LEN];
+       /* below are used during iteration */
+       int depth;
+       bool is_array_member;
+       bool is_array_terminated;
+       bool is_array_char;
+};
+
  struct btf_dump {
         const struct btf *btf;
         const struct btf_ext *btf_ext;
@@ -60,6 +82,7 @@ struct btf_dump {
         struct btf_dump_opts opts;
         int ptr_sz;
         bool strip_mods;
+       bool skip_anon_defs;
         int last_id;
  
         /* per-type auxiliary state */
@@ -89,6 +112,10 @@ struct btf_dump {
          * name occurrences
          */
         struct hashmap *ident_names;
+       /*
+        * data for typed display; allocated if needed.
+        */
+       struct btf_dump_data *typed_dump;
  };
  
  static size_t str_hash_fn(const void *key, void *ctx)
@@ -765,11 +792,11 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
                 break;
         case BTF_KIND_FUNC_PROTO: {
                 const struct btf_param *p = btf_params(t);
-               __u16 vlen = btf_vlen(t);
+               __u16 n = btf_vlen(t);
                 int i;
  
                 btf_dump_emit_type(d, t->type, cont_id);
-               for (i = 0; i < vlen; i++, p++)
+               for (i = 0; i < n; i++, p++)
                         btf_dump_emit_type(d, p->type, cont_id);
  
                 break;
@@ -852,8 +879,9 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
  static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
                                      const struct btf_type *t)
  {
-       btf_dump_printf(d, "%s %s",
+       btf_dump_printf(d, "%s%s%s",
                         btf_is_struct(t) ? "struct" : "union",
+                       t->name_off ? " " : "",
                         btf_dump_type_name(d, id));
  }
  
@@ -1259,7 +1287,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
                 case BTF_KIND_UNION:
                         btf_dump_emit_mods(d, decls);
                         /* inline anonymous struct/union */
-                       if (t->name_off == 0)
+                       if (t->name_off == 0 && !d->skip_anon_defs)
                                 btf_dump_emit_struct_def(d, id, t, lvl);
                         else
                                 btf_dump_emit_struct_fwd(d, id, t);
@@ -1267,7 +1295,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
                 case BTF_KIND_ENUM:
                         btf_dump_emit_mods(d, decls);
                         /* inline anonymous enum */
-                       if (t->name_off == 0)
+                       if (t->name_off == 0 && !d->skip_anon_defs)
                                 btf_dump_emit_enum_def(d, id, t, lvl);
                         else
                                 btf_dump_emit_enum_fwd(d, id, t);
@@ -1392,6 +1420,39 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
         btf_dump_emit_name(d, fname, last_was_ptr);
  }
  
+/* show type name as (type_name) */
+static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
+                                   bool top_level)
+{
+       const struct btf_type *t;
+
+       /* for array members, we don't bother emitting type name for each
+        * member to avoid the redundancy of
+        * .name = (char[4])[(char)'f',(char)'o',(char)'o',]
+        */
+       if (d->typed_dump->is_array_member)
+               return;
+
+       /* avoid type name specification for variable/section; it will be done
+        * for the associated variable value(s).
+        */
+       t = btf__type_by_id(d->btf, id);
+       if (btf_is_var(t) || btf_is_datasec(t))
+               return;
+
+       if (top_level)
+               btf_dump_printf(d, "(");
+
+       d->skip_anon_defs = true;
+       d->strip_mods = true;
+       btf_dump_emit_type_decl(d, id, "", 0);
+       d->strip_mods = false;
+       d->skip_anon_defs = false;
+
+       if (top_level)
+               btf_dump_printf(d, ")");
+}
+
  /* return number of duplicates (occurrences) of a given name */
  static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
                                  const char *orig_name)
@@ -1442,3 +1503,803 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
  {
         return btf_dump_resolve_name(d, id, d->ident_names);
  }
+
+static int btf_dump_dump_type_data(struct btf_dump *d,
+                                  const char *fname,
+                                  const struct btf_type *t,
+                                  __u32 id,
+                                  const void *data,
+                                  __u8 bits_offset,
+                                  __u8 bit_sz);
+
+static const char *btf_dump_data_newline(struct btf_dump *d)
+{
+       return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n";
+}
+
+static const char *btf_dump_data_delim(struct btf_dump *d)
+{
+       return d->typed_dump->depth == 0 ? "" : ",";
+}
+
+static void btf_dump_data_pfx(struct btf_dump *d)
+{
+       int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth;
+
+       if (d->typed_dump->compact)
+               return;
+
+       for (i = 0; i < lvl; i++)
+               btf_dump_printf(d, "%s", d->typed_dump->indent_str);
+}
+
+/* A macro is used here as btf_type_value[s]() appends format specifiers
+ * to the format specifier passed in; these do the work of appending
+ * delimiters etc while the caller simply has to specify the type values
+ * in the format specifier + value(s).
+ */
+#define btf_dump_type_values(d, fmt, ...)                              \
+       btf_dump_printf(d, fmt "%s%s",                                  \
+                       ##__VA_ARGS__,                                  \
+                       btf_dump_data_delim(d),                         \
+                       btf_dump_data_newline(d))
+
+static int btf_dump_unsupported_data(struct btf_dump *d,
+                                    const struct btf_type *t,
+                                    __u32 id)
+{
+       btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t));
+       return -ENOTSUP;
+}
+
+static int btf_dump_get_bitfield_value(struct btf_dump *d,
+                                      const struct btf_type *t,
+                                      const void *data,
+                                      __u8 bits_offset,
+                                      __u8 bit_sz,
+                                      __u64 *value)
+{
+       __u16 left_shift_bits, right_shift_bits;
+       __u8 nr_copy_bits, nr_copy_bytes;
+       const __u8 *bytes = data;
+       int sz = t->size;
+       __u64 num = 0;
+       int i;
+
+       /* Maximum supported bitfield size is 64 bits */
+       if (sz > 8) {
+               pr_warn("unexpected bitfield size %d\n", sz);
+               return -EINVAL;
+       }
+
+       /* Bitfield value retrieval is done in two steps; first relevant bytes are
+        * stored in num, then we left/right shift num to eliminate irrelevant bits.
+        */
+       nr_copy_bits = bit_sz + bits_offset;
+       nr_copy_bytes = t->size;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       for (i = nr_copy_bytes - 1; i >= 0; i--)
+               num = num * 256 + bytes[i];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+       for (i = 0; i < nr_copy_bytes; i++)
+               num = num * 256 + bytes[i];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+       left_shift_bits = 64 - nr_copy_bits;
+       right_shift_bits = 64 - bit_sz;
+
+       *value = (num << left_shift_bits) >> right_shift_bits;
+
+       return 0;
+}
+
+static int btf_dump_bitfield_check_zero(struct btf_dump *d,
+                                       const struct btf_type *t,
+                                       const void *data,
+                                       __u8 bits_offset,
+                                       __u8 bit_sz)
+{
+       __u64 check_num;
+       int err;
+
+       err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num);
+       if (err)
+               return err;
+       if (check_num == 0)
+               return -ENODATA;
+       return 0;
+}
+
+static int btf_dump_bitfield_data(struct btf_dump *d,
+                                 const struct btf_type *t,
+                                 const void *data,
+                                 __u8 bits_offset,
+                                 __u8 bit_sz)
+{
+       __u64 print_num;
+       int err;
+
+       err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num);
+       if (err)
+               return err;
+
+       btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num);
+
+       return 0;
+}
+
+/* ints, floats and ptrs */
+static int btf_dump_base_type_check_zero(struct btf_dump *d,
+                                        const struct btf_type *t,
+                                        __u32 id,
+                                        const void *data)
+{
+       static __u8 bytecmp[16] = {};
+       int nr_bytes;
+
+       /* For pointer types, pointer size is not defined on a per-type basis.
+        * On dump creation however, we store the pointer size.
+        */
+       if (btf_kind(t) == BTF_KIND_PTR)
+               nr_bytes = d->ptr_sz;
+       else
+               nr_bytes = t->size;
+
+       if (nr_bytes < 1 || nr_bytes > 16) {
+               pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id);
+               return -EINVAL;
+       }
+
+       if (memcmp(data, bytecmp, nr_bytes) == 0)
+               return -ENODATA;
+       return 0;
+}
+
+static bool ptr_is_aligned(const void *data, int data_sz)
+{
+       return ((uintptr_t)data) % data_sz == 0;
+}
+
+static int btf_dump_int_data(struct btf_dump *d,
+                            const struct btf_type *t,
+                            __u32 type_id,
+                            const void *data,
+                            __u8 bits_offset)
+{
+       __u8 encoding = btf_int_encoding(t);
+       bool sign = encoding & BTF_INT_SIGNED;
+       int sz = t->size;
+
+       if (sz == 0) {
+               pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+               return -EINVAL;
+       }
+
+       /* handle packed int data - accesses of integers not aligned on
+        * int boundaries can cause problems on some platforms.
+        */
+       if (!ptr_is_aligned(data, sz))
+               return btf_dump_bitfield_data(d, t, data, 0, 0);
+
+       switch (sz) {
+       case 16: {
+               const __u64 *ints = data;
+               __u64 lsi, msi;
+
+               /* avoid use of __int128 as some 32-bit platforms do not
+                * support it.
+                */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+               lsi = ints[0];
+               msi = ints[1];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+               lsi = ints[1];
+               msi = ints[0];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+               if (msi == 0)
+                       btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi);
+               else
+                       btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi,
+                                            (unsigned long long)lsi);
+               break;
+       }
+       case 8:
+               if (sign)
+                       btf_dump_type_values(d, "%lld", *(long long *)data);
+               else
+                       btf_dump_type_values(d, "%llu", *(unsigned long long *)data);
+               break;
+       case 4:
+               if (sign)
+                       btf_dump_type_values(d, "%d", *(__s32 *)data);
+               else
+                       btf_dump_type_values(d, "%u", *(__u32 *)data);
+               break;
+       case 2:
+               if (sign)
+                       btf_dump_type_values(d, "%d", *(__s16 *)data);
+               else
+                       btf_dump_type_values(d, "%u", *(__u16 *)data);
+               break;
+       case 1:
+               if (d->typed_dump->is_array_char) {
+                       /* check for null terminator */
+                       if (d->typed_dump->is_array_terminated)
+                               break;
+                       if (*(char *)data == '\0') {
+                               d->typed_dump->is_array_terminated = true;
+                               break;
+                       }
+                       if (isprint(*(char *)data)) {
+                               btf_dump_type_values(d, "'%c'", *(char *)data);
+                               break;
+                       }
+               }
+               if (sign)
+                       btf_dump_type_values(d, "%d", *(__s8 *)data);
+               else
+                       btf_dump_type_values(d, "%u", *(__u8 *)data);
+               break;
+       default:
+               pr_warn("unexpected sz %d for id [%u]\n", sz, type_id);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+union float_data {
+       long double ld;
+       double d;
+       float f;
+};
+
+static int btf_dump_float_data(struct btf_dump *d,
+                              const struct btf_type *t,
+                              __u32 type_id,
+                              const void *data)
+{
+       const union float_data *flp = data;
+       union float_data fl;
+       int sz = t->size;
+
+       /* handle unaligned data; copy to local union */
+       if (!ptr_is_aligned(data, sz)) {
+               memcpy(&fl, data, sz);
+               flp = &fl;
+       }
+
+       switch (sz) {
+       case 16:
+               btf_dump_type_values(d, "%Lf", flp->ld);
+               break;
+       case 8:
+               btf_dump_type_values(d, "%lf", flp->d);
+               break;
+       case 4:
+               btf_dump_type_values(d, "%f", flp->f);
+               break;
+       default:
+               pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int btf_dump_var_data(struct btf_dump *d,
+                            const struct btf_type *v,
+                            __u32 id,
+                            const void *data)
+{
+       enum btf_func_linkage linkage = btf_var(v)->linkage;
+       const struct btf_type *t;
+       const char *l;
+       __u32 type_id;
+
+       switch (linkage) {
+       case BTF_FUNC_STATIC:
+               l = "static ";
+               break;
+       case BTF_FUNC_EXTERN:
+               l = "extern ";
+               break;
+       case BTF_FUNC_GLOBAL:
+       default:
+               l = "";
+               break;
+       }
+
+       /* format of output here is [linkage] [type] [varname] = (type)value,
+        * for example "static int cpu_profile_flip = (int)1"
+        */
+       btf_dump_printf(d, "%s", l);
+       type_id = v->type;
+       t = btf__type_by_id(d->btf, type_id);
+       btf_dump_emit_type_cast(d, type_id, false);
+       btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off));
+       return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
+}
+
+static int btf_dump_array_data(struct btf_dump *d,
+                              const struct btf_type *t,
+                              __u32 id,
+                              const void *data)
+{
+       const struct btf_array *array = btf_array(t);
+       const struct btf_type *elem_type;
+       __u32 i, elem_size = 0, elem_type_id;
+       bool is_array_member;
+
+       elem_type_id = array->type;
+       elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+       elem_size = btf__resolve_size(d->btf, elem_type_id);
+       if (elem_size <= 0) {
+               pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+               return -EINVAL;
+       }
+
+       if (btf_is_int(elem_type)) {
+               /*
+                * BTF_INT_CHAR encoding never seems to be set for
+                * char arrays, so if size is 1 and element is
+                * printable as a char, we'll do that.
+                */
+               if (elem_size == 1)
+                       d->typed_dump->is_array_char = true;
+       }
+
+       /* note that we increment depth before calling btf_dump_print() below;
+        * this is intentional.  btf_dump_data_newline() will not print a
+        * newline for depth 0 (since this leaves us with trailing newlines
+        * at the end of typed display), so depth is incremented first.
+        * For similar reasons, we decrement depth before showing the closing
+        * parenthesis.
+        */
+       d->typed_dump->depth++;
+       btf_dump_printf(d, "[%s", btf_dump_data_newline(d));
+
+       /* may be a multidimensional array, so store current "is array member"
+        * status so we can restore it correctly later.
+        */
+       is_array_member = d->typed_dump->is_array_member;
+       d->typed_dump->is_array_member = true;
+       for (i = 0; i < array->nelems; i++, data += elem_size) {
+               if (d->typed_dump->is_array_terminated)
+                       break;
+               btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0);
+       }
+       d->typed_dump->is_array_member = is_array_member;
+       d->typed_dump->depth--;
+       btf_dump_data_pfx(d);
+       btf_dump_type_values(d, "]");
+
+       return 0;
+}
+
+static int btf_dump_struct_data(struct btf_dump *d,
+                               const struct btf_type *t,
+                               __u32 id,
+                               const void *data)
+{
+       const struct btf_member *m = btf_members(t);
+       __u16 n = btf_vlen(t);
+       int i, err;
+
+       /* note that we increment depth before calling btf_dump_print() below;
+        * this is intentional.  btf_dump_data_newline() will not print a
+        * newline for depth 0 (since this leaves us with trailing newlines
+        * at the end of typed display), so depth is incremented first.
+        * For similar reasons, we decrement depth before showing the closing
+        * parenthesis.
+        */
+       d->typed_dump->depth++;
+       btf_dump_printf(d, "{%s", btf_dump_data_newline(d));
+
+       for (i = 0; i < n; i++, m++) {
+               const struct btf_type *mtype;
+               const char *mname;
+               __u32 moffset;
+               __u8 bit_sz;
+
+               mtype = btf__type_by_id(d->btf, m->type);
+               mname = btf_name_of(d, m->name_off);
+               moffset = btf_member_bit_offset(t, i);
+
+               bit_sz = btf_member_bitfield_size(t, i);
+               err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8,
+                                             moffset % 8, bit_sz);
+               if (err < 0)
+                       return err;
+       }
+       d->typed_dump->depth--;
+       btf_dump_data_pfx(d);
+       btf_dump_type_values(d, "}");
+       return err;
+}
+
+union ptr_data {
+       unsigned int p;
+       unsigned long long lp;
+};
+
+static int btf_dump_ptr_data(struct btf_dump *d,
+                             const struct btf_type *t,
+                             __u32 id,
+                             const void *data)
+{
+       if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) {
+               btf_dump_type_values(d, "%p", *(void **)data);
+       } else {
+               union ptr_data pt;
+
+               memcpy(&pt, data, d->ptr_sz);
+               if (d->ptr_sz == 4)
+                       btf_dump_type_values(d, "0x%x", pt.p);
+               else
+                       btf_dump_type_values(d, "0x%llx", pt.lp);
+       }
+       return 0;
+}
+
+static int btf_dump_get_enum_value(struct btf_dump *d,
+                                  const struct btf_type *t,
+                                  const void *data,
+                                  __u32 id,
+                                  __s64 *value)
+{
+       int sz = t->size;
+
+       /* handle unaligned enum value */
+       if (!ptr_is_aligned(data, sz)) {
+               __u64 val;
+               int err;
+
+               err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val);
+               if (err)
+                       return err;
+               *value = (__s64)val;
+               return 0;
+       }
+
+       switch (t->size) {
+       case 8:
+               *value = *(__s64 *)data;
+               return 0;
+       case 4:
+               *value = *(__s32 *)data;
+               return 0;
+       case 2:
+               *value = *(__s16 *)data;
+               return 0;
+       case 1:
+               *value = *(__s8 *)data;
+               return 0;
+       default:
+               pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
+               return -EINVAL;
+       }
+}
+
+static int btf_dump_enum_data(struct btf_dump *d,
+                             const struct btf_type *t,
+                             __u32 id,
+                             const void *data)
+{
+       const struct btf_enum *e;
+       __s64 value;
+       int i, err;
+
+       err = btf_dump_get_enum_value(d, t, data, id, &value);
+       if (err)
+               return err;
+
+       for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+               if (value != e->val)
+                       continue;
+               btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+               return 0;
+       }
+
+       btf_dump_type_values(d, "%d", value);
+       return 0;
+}
+
+static int btf_dump_datasec_data(struct btf_dump *d,
+                                const struct btf_type *t,
+                                __u32 id,
+                                const void *data)
+{
+       const struct btf_var_secinfo *vsi;
+       const struct btf_type *var;
+       __u32 i;
+       int err;
+
+       btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off));
+
+       for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) {
+               var = btf__type_by_id(d->btf, vsi->type);
+               err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0);
+               if (err < 0)
+                       return err;
+               btf_dump_printf(d, ";");
+       }
+       return 0;
+}
+
+/* return size of type, or if base type overflows, return -E2BIG. */
+static int btf_dump_type_data_check_overflow(struct btf_dump *d,
+                                            const struct btf_type *t,
+                                            __u32 id,
+                                            const void *data,
+                                            __u8 bits_offset)
+{
+       __s64 size = btf__resolve_size(d->btf, id);
+
+       if (size < 0 || size >= INT_MAX) {
+               pr_warn("unexpected size [%zu] for id [%u]\n",
+                       (size_t)size, id);
+               return -EINVAL;
+       }
+
+       /* Only do overflow checking for base types; we do not want to
+        * avoid showing part of a struct, union or array, even if we
+        * do not have enough data to show the full object.  By
+        * restricting overflow checking to base types we can ensure
+        * that partial display succeeds, while avoiding overflowing
+        * and using bogus data for display.
+        */
+       t = skip_mods_and_typedefs(d->btf, id, NULL);
+       if (!t) {
+               pr_warn("unexpected error skipping mods/typedefs for id [%u]\n",
+                       id);
+               return -EINVAL;
+       }
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_INT:
+       case BTF_KIND_FLOAT:
+       case BTF_KIND_PTR:
+       case BTF_KIND_ENUM:
+               if (data + bits_offset / 8 + size > d->typed_dump->data_end)
+                       return -E2BIG;
+               break;
+       default:
+               break;
+       }
+       return (int)size;
+}
+
+static int btf_dump_type_data_check_zero(struct btf_dump *d,
+                                        const struct btf_type *t,
+                                        __u32 id,
+                                        const void *data,
+                                        __u8 bits_offset,
+                                        __u8 bit_sz)
+{
+       __s64 value;
+       int i, err;
+
+       /* toplevel exceptions; we show zero values if
+        * - we ask for them (emit_zeros)
+        * - if we are at top-level so we see "struct empty { }"
+        * - or if we are an array member and the array is non-empty and
+        *   not a char array; we don't want to be in a situation where we
+        *   have an integer array 0, 1, 0, 1 and only show non-zero values.
+        *   If the array contains zeroes only, or is a char array starting
+        *   with a '\0', the array-level check_zero() will prevent showing it;
+        *   we are concerned with determining zero value at the array member
+        *   level here.
+        */
+       if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 ||
+           (d->typed_dump->is_array_member &&
+            !d->typed_dump->is_array_char))
+               return 0;
+
+       t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_INT:
+               if (bit_sz)
+                       return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz);
+               return btf_dump_base_type_check_zero(d, t, id, data);
+       case BTF_KIND_FLOAT:
+       case BTF_KIND_PTR:
+               return btf_dump_base_type_check_zero(d, t, id, data);
+       case BTF_KIND_ARRAY: {
+               const struct btf_array *array = btf_array(t);
+               const struct btf_type *elem_type;
+               __u32 elem_type_id, elem_size;
+               bool ischar;
+
+               elem_type_id = array->type;
+               elem_size = btf__resolve_size(d->btf, elem_type_id);
+               elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+
+               ischar = btf_is_int(elem_type) && elem_size == 1;
+
+               /* check all elements; if _any_ element is nonzero, all
+                * of array is displayed.  We make an exception however
+                * for char arrays where the first element is 0; these
+                * are considered zeroed also, even if later elements are
+                * non-zero because the string is terminated.
+                */
+               for (i = 0; i < array->nelems; i++) {
+                       if (i == 0 && ischar && *(char *)data == 0)
+                               return -ENODATA;
+                       err = btf_dump_type_data_check_zero(d, elem_type,
+                                                           elem_type_id,
+                                                           data +
+                                                           (i * elem_size),
+                                                           bits_offset, 0);
+                       if (err != -ENODATA)
+                               return err;
+               }
+               return -ENODATA;
+       }
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION: {
+               const struct btf_member *m = btf_members(t);
+               __u16 n = btf_vlen(t);
+
+               /* if any struct/union member is non-zero, the struct/union
+                * is considered non-zero and dumped.
+                */
+               for (i = 0; i < n; i++, m++) {
+                       const struct btf_type *mtype;
+                       __u32 moffset;
+
+                       mtype = btf__type_by_id(d->btf, m->type);
+                       moffset = btf_member_bit_offset(t, i);
+
+                       /* btf_int_bits() does not store member bitfield size;
+                        * bitfield size needs to be stored here so int display
+                        * of member can retrieve it.
+                        */
+                       bit_sz = btf_member_bitfield_size(t, i);
+                       err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8,
+                                                           moffset % 8, bit_sz);
+                       if (err != ENODATA)
+                               return err;
+               }
+               return -ENODATA;
+       }
+       case BTF_KIND_ENUM:
+               err = btf_dump_get_enum_value(d, t, data, id, &value);
+               if (err)
+                       return err;
+               if (value == 0)
+                       return -ENODATA;
+               return 0;
+       default:
+               return 0;
+       }
+}
+
+/* returns size of data dumped, or error. */
+static int btf_dump_dump_type_data(struct btf_dump *d,
+                                  const char *fname,
+                                  const struct btf_type *t,
+                                  __u32 id,
+                                  const void *data,
+                                  __u8 bits_offset,
+                                  __u8 bit_sz)
+{
+       int size, err;
+
+       size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
+       if (size < 0)
+               return size;
+       err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
+       if (err) {
+               /* zeroed data is expected and not an error, so simply skip
+                * dumping such data.  Record other errors however.
+                */
+               if (err == -ENODATA)
+                       return size;
+               return err;
+       }
+       btf_dump_data_pfx(d);
+
+       if (!d->typed_dump->skip_names) {
+               if (fname && strlen(fname) > 0)
+                       btf_dump_printf(d, ".%s = ", fname);
+               btf_dump_emit_type_cast(d, id, true);
+       }
+
+       t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+       switch (btf_kind(t)) {
+       case BTF_KIND_UNKN:
+       case BTF_KIND_FWD:
+       case BTF_KIND_FUNC:
+       case BTF_KIND_FUNC_PROTO:
+               err = btf_dump_unsupported_data(d, t, id);
+               break;
+       case BTF_KIND_INT:
+               if (bit_sz)
+                       err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz);
+               else
+                       err = btf_dump_int_data(d, t, id, data, bits_offset);
+               break;
+       case BTF_KIND_FLOAT:
+               err = btf_dump_float_data(d, t, id, data);
+               break;
+       case BTF_KIND_PTR:
+               err = btf_dump_ptr_data(d, t, id, data);
+               break;
+       case BTF_KIND_ARRAY:
+               err = btf_dump_array_data(d, t, id, data);
+               break;
+       case BTF_KIND_STRUCT:
+       case BTF_KIND_UNION:
+               err = btf_dump_struct_data(d, t, id, data);
+               break;
+       case BTF_KIND_ENUM:
+               /* handle bitfield and int enum values */
+               if (bit_sz) {
+                       __u64 print_num;
+                       __s64 enum_val;
+
+                       err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz,
+                                                         &print_num);
+                       if (err)
+                               break;
+                       enum_val = (__s64)print_num;
+                       err = btf_dump_enum_data(d, t, id, &enum_val);
+               } else
+                       err = btf_dump_enum_data(d, t, id, data);
+               break;
+       case BTF_KIND_VAR:
+               err = btf_dump_var_data(d, t, id, data);
+               break;
+       case BTF_KIND_DATASEC:
+               err = btf_dump_datasec_data(d, t, id, data);
+               break;
+       default:
+               pr_warn("unexpected kind [%u] for id [%u]\n",
+                       BTF_INFO_KIND(t->info), id);
+               return -EINVAL;
+       }
+       if (err < 0)
+               return err;
+       return size;
+}
+
+int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+                            const void *data, size_t data_sz,
+                            const struct btf_dump_type_data_opts *opts)
+{
+       struct btf_dump_data typed_dump = {};
+       const struct btf_type *t;
+       int ret;
+
+       if (!OPTS_VALID(opts, btf_dump_type_data_opts))
+               return libbpf_err(-EINVAL);
+
+       t = btf__type_by_id(d->btf, id);
+       if (!t)
+               return libbpf_err(-ENOENT);
+
+       d->typed_dump = &typed_dump;
+       d->typed_dump->data_end = data + data_sz;
+       d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
+
+       /* default indent string is a tab */
+       if (!opts->indent_str)
+               d->typed_dump->indent_str[0] = '\t';
+       else
+               strncat(d->typed_dump->indent_str, opts->indent_str,
+                       sizeof(d->typed_dump->indent_str) - 1);
+
+       d->typed_dump->compact = OPTS_GET(opts, compact, false);
+       d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
+       d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+
+       ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
+
+       d->typed_dump = NULL;
+
+       return libbpf_err(ret);
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c

index 4ccfae3..cb106e8 100644 (file)
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -498,6 +498,10 @@ struct bpf_object {
          * it at load time.
          */
         struct btf *btf_vmlinux;
+       /* Path to the custom BTF to be used for BPF CO-RE relocations as an
+        * override for vmlinux BTF.
+        */
+       char *btf_custom_path;
         /* vmlinux BTF override for CO-RE relocations */
         struct btf *btf_vmlinux_override;
         /* Lazily initialized kernel module BTFs */
@@ -591,11 +595,6 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
                insn->off == 0;
  }
  
-static bool is_ldimm64_insn(struct bpf_insn *insn)
-{
-       return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
  static bool is_call_insn(const struct bpf_insn *insn)
  {
         return insn->code == (BPF_JMP | BPF_CALL);
@@ -2645,8 +2644,10 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
         struct bpf_program *prog;
         int i;
  
-       /* CO-RE relocations need kernel BTF */
-       if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
+       /* CO-RE relocations need kernel BTF, only when btf_custom_path
+        * is not specified
+        */
+       if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
                 return true;
  
         /* Support for typed ksyms needs kernel BTF */
@@ -2679,7 +2680,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
         if (!force && !obj_needs_vmlinux_btf(obj))
                 return 0;
  
-       obj->btf_vmlinux = libbpf_find_kernel_btf();
+       obj->btf_vmlinux = btf__load_vmlinux_btf();
         err = libbpf_get_error(obj->btf_vmlinux);
         if (err) {
                 pr_warn("Error loading vmlinux BTF: %d\n", err);
@@ -2768,7 +2769,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
                  */
                 btf__set_fd(kern_btf, 0);
         } else {
-               err = btf__load(kern_btf);
+               err = btf__load_into_kernel(kern_btf);
         }
         if (sanitize) {
                 if (!err) {
@@ -4521,6 +4522,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
  {
         struct bpf_create_map_attr create_attr;
         struct bpf_map_def *def = &map->def;
+       int err = 0;
  
         memset(&create_attr, 0, sizeof(create_attr));
  
@@ -4563,8 +4565,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
  
         if (bpf_map_type__is_map_in_map(def->type)) {
                 if (map->inner_map) {
-                       int err;
-
                         err = bpf_object__create_map(obj, map->inner_map, true);
                         if (err) {
                                 pr_warn("map '%s': failed to create inner map: %d\n",
@@ -4589,8 +4589,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
         if (map->fd < 0 && (create_attr.btf_key_type_id ||
                             create_attr.btf_value_type_id)) {
                 char *cp, errmsg[STRERR_BUFSIZE];
-               int err = -errno;
  
+               err = -errno;
                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
                         map->name, cp, err);
@@ -4602,8 +4602,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
                 map->fd = bpf_create_map_xattr(&create_attr);
         }
  
-       if (map->fd < 0)
-               return -errno;
+       err = map->fd < 0 ? -errno : 0;
  
         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
                 if (obj->gen_loader)
@@ -4612,7 +4611,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
                 zfree(&map->inner_map);
         }
  
-       return 0;
+       return err;
  }
  
  static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
@@ -4658,10 +4657,13 @@ bpf_object__create_maps(struct bpf_object *obj)
         char *cp, errmsg[STRERR_BUFSIZE];
         unsigned int i, j;
         int err;
+       bool retried;
  
         for (i = 0; i < obj->nr_maps; i++) {
                 map = &obj->maps[i];
  
+               retried = false;
+retry:
                 if (map->pin_path) {
                         err = bpf_object__reuse_map(map);
                         if (err) {
@@ -4669,6 +4671,12 @@ bpf_object__create_maps(struct bpf_object *obj)
                                         map->name);
                                 goto err_out;
                         }
+                       if (retried && map->fd < 0) {
+                               pr_warn("map '%s': cannot find pinned map\n",
+                                       map->name);
+                               err = -ENOENT;
+                               goto err_out;
+                       }
                 }
  
                 if (map->fd >= 0) {
@@ -4702,9 +4710,13 @@ bpf_object__create_maps(struct bpf_object *obj)
                 if (map->pin_path && !map->pinned) {
                         err = bpf_map__pin(map, NULL);
                         if (err) {
+                               zclose(map->fd);
+                               if (!retried && err == -EEXIST) {
+                                       retried = true;
+                                       goto retry;
+                               }
                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
                                         map->name, map->pin_path, err);
-                               zclose(map->fd);
                                 goto err_out;
                         }
                 }
@@ -4721,279 +4733,6 @@ err_out:
         return err;
  }
  
-#define BPF_CORE_SPEC_MAX_LEN 64
-
-/* represents BPF CO-RE field or array element accessor */
-struct bpf_core_accessor {
-       __u32 type_id;          /* struct/union type or array element type */
-       __u32 idx;              /* field index or array index */
-       const char *name;       /* field name or NULL for array accessor */
-};
-
-struct bpf_core_spec {
-       const struct btf *btf;
-       /* high-level spec: named fields and array indices only */
-       struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
-       /* original unresolved (no skip_mods_or_typedefs) root type ID */
-       __u32 root_type_id;
-       /* CO-RE relocation kind */
-       enum bpf_core_relo_kind relo_kind;
-       /* high-level spec length */
-       int len;
-       /* raw, low-level spec: 1-to-1 with accessor spec string */
-       int raw_spec[BPF_CORE_SPEC_MAX_LEN];
-       /* raw spec length */
-       int raw_len;
-       /* field bit offset represented by spec */
-       __u32 bit_offset;
-};
-
-static bool str_is_empty(const char *s)
-{
-       return !s || !s[0];
-}
-
-static bool is_flex_arr(const struct btf *btf,
-                       const struct bpf_core_accessor *acc,
-                       const struct btf_array *arr)
-{
-       const struct btf_type *t;
-
-       /* not a flexible array, if not inside a struct or has non-zero size */
-       if (!acc->name || arr->nelems > 0)
-               return false;
-
-       /* has to be the last member of enclosing struct */
-       t = btf__type_by_id(btf, acc->type_id);
-       return acc->idx == btf_vlen(t) - 1;
-}
-
-static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_FIELD_BYTE_OFFSET: return "byte_off";
-       case BPF_FIELD_BYTE_SIZE: return "byte_sz";
-       case BPF_FIELD_EXISTS: return "field_exists";
-       case BPF_FIELD_SIGNED: return "signed";
-       case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
-       case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
-       case BPF_TYPE_ID_LOCAL: return "local_type_id";
-       case BPF_TYPE_ID_TARGET: return "target_type_id";
-       case BPF_TYPE_EXISTS: return "type_exists";
-       case BPF_TYPE_SIZE: return "type_size";
-       case BPF_ENUMVAL_EXISTS: return "enumval_exists";
-       case BPF_ENUMVAL_VALUE: return "enumval_value";
-       default: return "unknown";
-       }
-}
-
-static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_FIELD_BYTE_OFFSET:
-       case BPF_FIELD_BYTE_SIZE:
-       case BPF_FIELD_EXISTS:
-       case BPF_FIELD_SIGNED:
-       case BPF_FIELD_LSHIFT_U64:
-       case BPF_FIELD_RSHIFT_U64:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_TYPE_ID_LOCAL:
-       case BPF_TYPE_ID_TARGET:
-       case BPF_TYPE_EXISTS:
-       case BPF_TYPE_SIZE:
-               return true;
-       default:
-               return false;
-       }
-}
-
-static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
-{
-       switch (kind) {
-       case BPF_ENUMVAL_EXISTS:
-       case BPF_ENUMVAL_VALUE:
-               return true;
-       default:
-               return false;
-       }
-}
-
-/*
- * Turn bpf_core_relo into a low- and high-level spec representation,
- * validating correctness along the way, as well as calculating resulting
- * field bit offset, specified by accessor string. Low-level spec captures
- * every single level of nestedness, including traversing anonymous
- * struct/union members. High-level one only captures semantically meaningful
- * "turning points": named fields and array indicies.
- * E.g., for this case:
- *
- *   struct sample {
- *       int __unimportant;
- *       struct {
- *           int __1;
- *           int __2;
- *           int a[7];
- *       };
- *   };
- *
- *   struct sample *s = ...;
- *
- *   int x = &s->a[3]; // access string = '0:1:2:3'
- *
- * Low-level spec has 1:1 mapping with each element of access string (it's
- * just a parsed access string representation): [0, 1, 2, 3].
- *
- * High-level spec will capture only 3 points:
- *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
- *   - field 'a' access (corresponds to '2' in low-level spec);
- *   - array element #3 access (corresponds to '3' in low-level spec).
- *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
- * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
- * spec and raw_spec are kept empty.
- *
- * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
- * string to specify enumerator's value index that need to be relocated.
- */
-static int bpf_core_parse_spec(const struct btf *btf,
-                              __u32 type_id,
-                              const char *spec_str,
-                              enum bpf_core_relo_kind relo_kind,
-                              struct bpf_core_spec *spec)
-{
-       int access_idx, parsed_len, i;
-       struct bpf_core_accessor *acc;
-       const struct btf_type *t;
-       const char *name;
-       __u32 id;
-       __s64 sz;
-
-       if (str_is_empty(spec_str) || *spec_str == ':')
-               return -EINVAL;
-
-       memset(spec, 0, sizeof(*spec));
-       spec->btf = btf;
-       spec->root_type_id = type_id;
-       spec->relo_kind = relo_kind;
-
-       /* type-based relocations don't have a field access string */
-       if (core_relo_is_type_based(relo_kind)) {
-               if (strcmp(spec_str, "0"))
-                       return -EINVAL;
-               return 0;
-       }
-
-       /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
-       while (*spec_str) {
-               if (*spec_str == ':')
-                       ++spec_str;
-               if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
-                       return -EINVAL;
-               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
-                       return -E2BIG;
-               spec_str += parsed_len;
-               spec->raw_spec[spec->raw_len++] = access_idx;
-       }
-
-       if (spec->raw_len == 0)
-               return -EINVAL;
-
-       t = skip_mods_and_typedefs(btf, type_id, &id);
-       if (!t)
-               return -EINVAL;
-
-       access_idx = spec->raw_spec[0];
-       acc = &spec->spec[0];
-       acc->type_id = id;
-       acc->idx = access_idx;
-       spec->len++;
-
-       if (core_relo_is_enumval_based(relo_kind)) {
-               if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
-                       return -EINVAL;
-
-               /* record enumerator name in a first accessor */
-               acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
-               return 0;
-       }
-
-       if (!core_relo_is_field_based(relo_kind))
-               return -EINVAL;
-
-       sz = btf__resolve_size(btf, id);
-       if (sz < 0)
-               return sz;
-       spec->bit_offset = access_idx * sz * 8;
-
-       for (i = 1; i < spec->raw_len; i++) {
-               t = skip_mods_and_typedefs(btf, id, &id);
-               if (!t)
-                       return -EINVAL;
-
-               access_idx = spec->raw_spec[i];
-               acc = &spec->spec[spec->len];
-
-               if (btf_is_composite(t)) {
-                       const struct btf_member *m;
-                       __u32 bit_offset;
-
-                       if (access_idx >= btf_vlen(t))
-                               return -EINVAL;
-
-                       bit_offset = btf_member_bit_offset(t, access_idx);
-                       spec->bit_offset += bit_offset;
-
-                       m = btf_members(t) + access_idx;
-                       if (m->name_off) {
-                               name = btf__name_by_offset(btf, m->name_off);
-                               if (str_is_empty(name))
-                                       return -EINVAL;
-
-                               acc->type_id = id;
-                               acc->idx = access_idx;
-                               acc->name = name;
-                               spec->len++;
-                       }
-
-                       id = m->type;
-               } else if (btf_is_array(t)) {
-                       const struct btf_array *a = btf_array(t);
-                       bool flex;
-
-                       t = skip_mods_and_typedefs(btf, a->type, &id);
-                       if (!t)
-                               return -EINVAL;
-
-                       flex = is_flex_arr(btf, acc - 1, a);
-                       if (!flex && access_idx >= a->nelems)
-                               return -EINVAL;
-
-                       spec->spec[spec->len].type_id = id;
-                       spec->spec[spec->len].idx = access_idx;
-                       spec->len++;
-
-                       sz = btf__resolve_size(btf, id);
-                       if (sz < 0)
-                               return sz;
-                       spec->bit_offset += access_idx * sz * 8;
-               } else {
-                       pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
-                               type_id, spec_str, i, id, btf_kind_str(t));
-                       return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-
  static bool bpf_core_is_flavor_sep(const char *s)
  {
         /* check X___Y name pattern, where X and Y are not underscores */
@@ -5006,7 +4745,7 @@ static bool bpf_core_is_flavor_sep(const char *s)
   * before last triple underscore. Struct name part after last triple
   * underscore is ignored by BPF CO-RE relocation during relocation matching.
   */
-static size_t bpf_core_essential_name_len(const char *name)
+size_t bpf_core_essential_name_len(const char *name)
  {
         size_t n = strlen(name);
         int i;
@@ -5018,34 +4757,20 @@ static size_t bpf_core_essential_name_len(const char *name)
         return n;
  }
  
-struct core_cand
-{
-       const struct btf *btf;
-       const struct btf_type *t;
-       const char *name;
-       __u32 id;
-};
-
-/* dynamically sized list of type IDs and its associated struct btf */
-struct core_cand_list {
-       struct core_cand *cands;
-       int len;
-};
-
-static void bpf_core_free_cands(struct core_cand_list *cands)
+static void bpf_core_free_cands(struct bpf_core_cand_list *cands)
  {
         free(cands->cands);
         free(cands);
  }
  
-static int bpf_core_add_cands(struct core_cand *local_cand,
+static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
                               size_t local_essent_len,
                               const struct btf *targ_btf,
                               const char *targ_btf_name,
                               int targ_start_id,
-                             struct core_cand_list *cands)
+                             struct bpf_core_cand_list *cands)
  {
-       struct core_cand *new_cands, *cand;
+       struct bpf_core_cand *new_cands, *cand;
         const struct btf_type *t;
         const char *targ_name;
         size_t targ_essent_len;
@@ -5181,11 +4906,11 @@ err_out:
         return 0;
  }
  
-static struct core_cand_list *
+static struct bpf_core_cand_list *
  bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
  {
-       struct core_cand local_cand = {};
-       struct core_cand_list *cands;
+       struct bpf_core_cand local_cand = {};
+       struct bpf_core_cand_list *cands;
         const struct btf *main_btf;
         size_t local_essent_len;
         int err, i;
@@ -5239,165 +4964,6 @@ err_out:
         return ERR_PTR(err);
  }
  
-/* Check two types for compatibility for the purpose of field access
- * relocation. const/volatile/restrict and typedefs are skipped to ensure we
- * are relocating semantically compatible entities:
- *   - any two STRUCTs/UNIONs are compatible and can be mixed;
- *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
- *   - any two PTRs are always compatible;
- *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
- *     least one of enums should be anonymous;
- *   - for ENUMs, check sizes, names are ignored;
- *   - for INT, size and signedness are ignored;
- *   - any two FLOATs are always compatible;
- *   - for ARRAY, dimensionality is ignored, element types are checked for
- *     compatibility recursively;
- *   - everything else shouldn't be ever a target of relocation.
- * These rules are not set in stone and probably will be adjusted as we get
- * more experience with using BPF CO-RE relocations.
- */
-static int bpf_core_fields_are_compat(const struct btf *local_btf,
-                                     __u32 local_id,
-                                     const struct btf *targ_btf,
-                                     __u32 targ_id)
-{
-       const struct btf_type *local_type, *targ_type;
-
-recur:
-       local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
-       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
-       if (!local_type || !targ_type)
-               return -EINVAL;
-
-       if (btf_is_composite(local_type) && btf_is_composite(targ_type))
-               return 1;
-       if (btf_kind(local_type) != btf_kind(targ_type))
-               return 0;
-
-       switch (btf_kind(local_type)) {
-       case BTF_KIND_PTR:
-       case BTF_KIND_FLOAT:
-               return 1;
-       case BTF_KIND_FWD:
-       case BTF_KIND_ENUM: {
-               const char *local_name, *targ_name;
-               size_t local_len, targ_len;
-
-               local_name = btf__name_by_offset(local_btf,
-                                                local_type->name_off);
-               targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
-               local_len = bpf_core_essential_name_len(local_name);
-               targ_len = bpf_core_essential_name_len(targ_name);
-               /* one of them is anonymous or both w/ same flavor-less names */
-               return local_len == 0 || targ_len == 0 ||
-                      (local_len == targ_len &&
-                       strncmp(local_name, targ_name, local_len) == 0);
-       }
-       case BTF_KIND_INT:
-               /* just reject deprecated bitfield-like integers; all other
-                * integers are by default compatible between each other
-                */
-               return btf_int_offset(local_type) == 0 &&
-                      btf_int_offset(targ_type) == 0;
-       case BTF_KIND_ARRAY:
-               local_id = btf_array(local_type)->type;
-               targ_id = btf_array(targ_type)->type;
-               goto recur;
-       default:
-               pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
-                       btf_kind(local_type), local_id, targ_id);
-               return 0;
-       }
-}
-
-/*
- * Given single high-level named field accessor in local type, find
- * corresponding high-level accessor for a target type. Along the way,
- * maintain low-level spec for target as well. Also keep updating target
- * bit offset.
- *
- * Searching is performed through recursive exhaustive enumeration of all
- * fields of a struct/union. If there are any anonymous (embedded)
- * structs/unions, they are recursively searched as well. If field with
- * desired name is found, check compatibility between local and target types,
- * before returning result.
- *
- * 1 is returned, if field is found.
- * 0 is returned if no compatible field is found.
- * <0 is returned on error.
- */
-static int bpf_core_match_member(const struct btf *local_btf,
-                                const struct bpf_core_accessor *local_acc,
-                                const struct btf *targ_btf,
-                                __u32 targ_id,
-                                struct bpf_core_spec *spec,
-                                __u32 *next_targ_id)
-{
-       const struct btf_type *local_type, *targ_type;
-       const struct btf_member *local_member, *m;
-       const char *local_name, *targ_name;
-       __u32 local_id;
-       int i, n, found;
-
-       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
-       if (!targ_type)
-               return -EINVAL;
-       if (!btf_is_composite(targ_type))
-               return 0;
-
-       local_id = local_acc->type_id;
-       local_type = btf__type_by_id(local_btf, local_id);
-       local_member = btf_members(local_type) + local_acc->idx;
-       local_name = btf__name_by_offset(local_btf, local_member->name_off);
-
-       n = btf_vlen(targ_type);
-       m = btf_members(targ_type);
-       for (i = 0; i < n; i++, m++) {
-               __u32 bit_offset;
-
-               bit_offset = btf_member_bit_offset(targ_type, i);
-
-               /* too deep struct/union/array nesting */
-               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
-                       return -E2BIG;
-
-               /* speculate this member will be the good one */
-               spec->bit_offset += bit_offset;
-               spec->raw_spec[spec->raw_len++] = i;
-
-               targ_name = btf__name_by_offset(targ_btf, m->name_off);
-               if (str_is_empty(targ_name)) {
-                       /* embedded struct/union, we need to go deeper */
-                       found = bpf_core_match_member(local_btf, local_acc,
-                                                     targ_btf, m->type,
-                                                     spec, next_targ_id);
-                       if (found) /* either found or error */
-                               return found;
-               } else if (strcmp(local_name, targ_name) == 0) {
-                       /* matching named field */
-                       struct bpf_core_accessor *targ_acc;
-
-                       targ_acc = &spec->spec[spec->len++];
-                       targ_acc->type_id = targ_id;
-                       targ_acc->idx = i;
-                       targ_acc->name = targ_name;
-
-                       *next_targ_id = m->type;
-                       found = bpf_core_fields_are_compat(local_btf,
-                                                          local_member->type,
-                                                          targ_btf, m->type);
-                       if (!found)
-                               spec->len--; /* pop accessor */
-                       return found;
-               }
-               /* member turned out not to be what we looked for */
-               spec->bit_offset -= bit_offset;
-               spec->raw_len--;
-       }
-
-       return 0;
-}
-
  /* Check local and target types for compatibility. This check is used for
   * type-based CO-RE relocations and follow slightly different rules than
   * field-based relocations. This function assumes that root types were already
@@ -5417,8 +4983,8 @@ static int bpf_core_match_member(const struct btf *local_btf,
   * These rules are not set in stone and probably will be adjusted as we get
   * more experience with using BPF CO-RE relocations.
   */
-static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
-                                    const struct btf *targ_btf, __u32 targ_id)
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+                             const struct btf *targ_btf, __u32 targ_id)
  {
         const struct btf_type *local_type, *targ_type;
         int depth = 32; /* max recursion depth */
@@ -5492,671 +5058,6 @@ recur:
         }
  }
  
-/*
- * Try to match local spec to a target type and, if successful, produce full
- * target spec (high-level, low-level + bit offset).
- */
-static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
-                              const struct btf *targ_btf, __u32 targ_id,
-                              struct bpf_core_spec *targ_spec)
-{
-       const struct btf_type *targ_type;
-       const struct bpf_core_accessor *local_acc;
-       struct bpf_core_accessor *targ_acc;
-       int i, sz, matched;
-
-       memset(targ_spec, 0, sizeof(*targ_spec));
-       targ_spec->btf = targ_btf;
-       targ_spec->root_type_id = targ_id;
-       targ_spec->relo_kind = local_spec->relo_kind;
-
-       if (core_relo_is_type_based(local_spec->relo_kind)) {
-               return bpf_core_types_are_compat(local_spec->btf,
-                                                local_spec->root_type_id,
-                                                targ_btf, targ_id);
-       }
-
-       local_acc = &local_spec->spec[0];
-       targ_acc = &targ_spec->spec[0];
-
-       if (core_relo_is_enumval_based(local_spec->relo_kind)) {
-               size_t local_essent_len, targ_essent_len;
-               const struct btf_enum *e;
-               const char *targ_name;
-
-               /* has to resolve to an enum */
-               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
-               if (!btf_is_enum(targ_type))
-                       return 0;
-
-               local_essent_len = bpf_core_essential_name_len(local_acc->name);
-
-               for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
-                       targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
-                       targ_essent_len = bpf_core_essential_name_len(targ_name);
-                       if (targ_essent_len != local_essent_len)
-                               continue;
-                       if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
-                               targ_acc->type_id = targ_id;
-                               targ_acc->idx = i;
-                               targ_acc->name = targ_name;
-                               targ_spec->len++;
-                               targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
-                               targ_spec->raw_len++;
-                               return 1;
-                       }
-               }
-               return 0;
-       }
-
-       if (!core_relo_is_field_based(local_spec->relo_kind))
-               return -EINVAL;
-
-       for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
-               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
-                                                  &targ_id);
-               if (!targ_type)
-                       return -EINVAL;
-
-               if (local_acc->name) {
-                       matched = bpf_core_match_member(local_spec->btf,
-                                                       local_acc,
-                                                       targ_btf, targ_id,
-                                                       targ_spec, &targ_id);
-                       if (matched <= 0)
-                               return matched;
-               } else {
-                       /* for i=0, targ_id is already treated as array element
-                        * type (because it's the original struct), for others
-                        * we should find array element type first
-                        */
-                       if (i > 0) {
-                               const struct btf_array *a;
-                               bool flex;
-
-                               if (!btf_is_array(targ_type))
-                                       return 0;
-
-                               a = btf_array(targ_type);
-                               flex = is_flex_arr(targ_btf, targ_acc - 1, a);
-                               if (!flex && local_acc->idx >= a->nelems)
-                                       return 0;
-                               if (!skip_mods_and_typedefs(targ_btf, a->type,
-                                                           &targ_id))
-                                       return -EINVAL;
-                       }
-
-                       /* too deep struct/union/array nesting */
-                       if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
-                               return -E2BIG;
-
-                       targ_acc->type_id = targ_id;
-                       targ_acc->idx = local_acc->idx;
-                       targ_acc->name = NULL;
-                       targ_spec->len++;
-                       targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
-                       targ_spec->raw_len++;
-
-                       sz = btf__resolve_size(targ_btf, targ_id);
-                       if (sz < 0)
-                               return sz;
-                       targ_spec->bit_offset += local_acc->idx * sz * 8;
-               }
-       }
-
-       return 1;
-}
-
-static int bpf_core_calc_field_relo(const struct bpf_program *prog,
-                                   const struct bpf_core_relo *relo,
-                                   const struct bpf_core_spec *spec,
-                                   __u32 *val, __u32 *field_sz, __u32 *type_id,
-                                   bool *validate)
-{
-       const struct bpf_core_accessor *acc;
-       const struct btf_type *t;
-       __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
-       const struct btf_member *m;
-       const struct btf_type *mt;
-       bool bitfield;
-       __s64 sz;
-
-       *field_sz = 0;
-
-       if (relo->kind == BPF_FIELD_EXISTS) {
-               *val = spec ? 1 : 0;
-               return 0;
-       }
-
-       if (!spec)
-               return -EUCLEAN; /* request instruction poisoning */
-
-       acc = &spec->spec[spec->len - 1];
-       t = btf__type_by_id(spec->btf, acc->type_id);
-
-       /* a[n] accessor needs special handling */
-       if (!acc->name) {
-               if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
-                       *val = spec->bit_offset / 8;
-                       /* remember field size for load/store mem size */
-                       sz = btf__resolve_size(spec->btf, acc->type_id);
-                       if (sz < 0)
-                               return -EINVAL;
-                       *field_sz = sz;
-                       *type_id = acc->type_id;
-               } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
-                       sz = btf__resolve_size(spec->btf, acc->type_id);
-                       if (sz < 0)
-                               return -EINVAL;
-                       *val = sz;
-               } else {
-                       pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
-                               prog->name, relo->kind, relo->insn_off / 8);
-                       return -EINVAL;
-               }
-               if (validate)
-                       *validate = true;
-               return 0;
-       }
-
-       m = btf_members(t) + acc->idx;
-       mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
-       bit_off = spec->bit_offset;
-       bit_sz = btf_member_bitfield_size(t, acc->idx);
-
-       bitfield = bit_sz > 0;
-       if (bitfield) {
-               byte_sz = mt->size;
-               byte_off = bit_off / 8 / byte_sz * byte_sz;
-               /* figure out smallest int size necessary for bitfield load */
-               while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
-                       if (byte_sz >= 8) {
-                               /* bitfield can't be read with 64-bit read */
-                               pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
-                                       prog->name, relo->kind, relo->insn_off / 8);
-                               return -E2BIG;
-                       }
-                       byte_sz *= 2;
-                       byte_off = bit_off / 8 / byte_sz * byte_sz;
-               }
-       } else {
-               sz = btf__resolve_size(spec->btf, field_type_id);
-               if (sz < 0)
-                       return -EINVAL;
-               byte_sz = sz;
-               byte_off = spec->bit_offset / 8;
-               bit_sz = byte_sz * 8;
-       }
-
-       /* for bitfields, all the relocatable aspects are ambiguous and we
-        * might disagree with compiler, so turn off validation of expected
-        * value, except for signedness
-        */
-       if (validate)
-               *validate = !bitfield;
-
-       switch (relo->kind) {
-       case BPF_FIELD_BYTE_OFFSET:
-               *val = byte_off;
-               if (!bitfield) {
-                       *field_sz = byte_sz;
-                       *type_id = field_type_id;
-               }
-               break;
-       case BPF_FIELD_BYTE_SIZE:
-               *val = byte_sz;
-               break;
-       case BPF_FIELD_SIGNED:
-               /* enums will be assumed unsigned */
-               *val = btf_is_enum(mt) ||
-                      (btf_int_encoding(mt) & BTF_INT_SIGNED);
-               if (validate)
-                       *validate = true; /* signedness is never ambiguous */
-               break;
-       case BPF_FIELD_LSHIFT_U64:
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-               *val = 64 - (bit_off + bit_sz - byte_off  * 8);
-#else
-               *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
-#endif
-               break;
-       case BPF_FIELD_RSHIFT_U64:
-               *val = 64 - bit_sz;
-               if (validate)
-                       *validate = true; /* right shift is never ambiguous */
-               break;
-       case BPF_FIELD_EXISTS:
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
-                                  const struct bpf_core_spec *spec,
-                                  __u32 *val)
-{
-       __s64 sz;
-
-       /* type-based relos return zero when target type is not found */
-       if (!spec) {
-               *val = 0;
-               return 0;
-       }
-
-       switch (relo->kind) {
-       case BPF_TYPE_ID_TARGET:
-               *val = spec->root_type_id;
-               break;
-       case BPF_TYPE_EXISTS:
-               *val = 1;
-               break;
-       case BPF_TYPE_SIZE:
-               sz = btf__resolve_size(spec->btf, spec->root_type_id);
-               if (sz < 0)
-                       return -EINVAL;
-               *val = sz;
-               break;
-       case BPF_TYPE_ID_LOCAL:
-       /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
-                                     const struct bpf_core_spec *spec,
-                                     __u32 *val)
-{
-       const struct btf_type *t;
-       const struct btf_enum *e;
-
-       switch (relo->kind) {
-       case BPF_ENUMVAL_EXISTS:
-               *val = spec ? 1 : 0;
-               break;
-       case BPF_ENUMVAL_VALUE:
-               if (!spec)
-                       return -EUCLEAN; /* request instruction poisoning */
-               t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
-               e = btf_enum(t) + spec->spec[0].idx;
-               *val = e->val;
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       return 0;
-}
-
-struct bpf_core_relo_res
-{
-       /* expected value in the instruction, unless validate == false */
-       __u32 orig_val;
-       /* new value that needs to be patched up to */
-       __u32 new_val;
-       /* relocation unsuccessful, poison instruction, but don't fail load */
-       bool poison;
-       /* some relocations can't be validated against orig_val */
-       bool validate;
-       /* for field byte offset relocations or the forms:
-        *     *(T *)(rX + <off>) = rY
-        *     rX = *(T *)(rY + <off>),
-        * we remember original and resolved field size to adjust direct
-        * memory loads of pointers and integers; this is necessary for 32-bit
-        * host kernel architectures, but also allows to automatically
-        * relocate fields that were resized from, e.g., u32 to u64, etc.
-        */
-       bool fail_memsz_adjust;
-       __u32 orig_sz;
-       __u32 orig_type_id;
-       __u32 new_sz;
-       __u32 new_type_id;
-};
-
-/* Calculate original and target relocation values, given local and target
- * specs and relocation kind. These values are calculated for each candidate.
- * If there are multiple candidates, resulting values should all be consistent
- * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
- * If instruction has to be poisoned, *poison will be set to true.
- */
-static int bpf_core_calc_relo(const struct bpf_program *prog,
-                             const struct bpf_core_relo *relo,
-                             int relo_idx,
-                             const struct bpf_core_spec *local_spec,
-                             const struct bpf_core_spec *targ_spec,
-                             struct bpf_core_relo_res *res)
-{
-       int err = -EOPNOTSUPP;
-
-       res->orig_val = 0;
-       res->new_val = 0;
-       res->poison = false;
-       res->validate = true;
-       res->fail_memsz_adjust = false;
-       res->orig_sz = res->new_sz = 0;
-       res->orig_type_id = res->new_type_id = 0;
-
-       if (core_relo_is_field_based(relo->kind)) {
-               err = bpf_core_calc_field_relo(prog, relo, local_spec,
-                                              &res->orig_val, &res->orig_sz,
-                                              &res->orig_type_id, &res->validate);
-               err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
-                                                     &res->new_val, &res->new_sz,
-                                                     &res->new_type_id, NULL);
-               if (err)
-                       goto done;
-               /* Validate if it's safe to adjust load/store memory size.
-                * Adjustments are performed only if original and new memory
-                * sizes differ.
-                */
-               res->fail_memsz_adjust = false;
-               if (res->orig_sz != res->new_sz) {
-                       const struct btf_type *orig_t, *new_t;
-
-                       orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
-                       new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
-
-                       /* There are two use cases in which it's safe to
-                        * adjust load/store's mem size:
-                        *   - reading a 32-bit kernel pointer, while on BPF
-                        *   size pointers are always 64-bit; in this case
-                        *   it's safe to "downsize" instruction size due to
-                        *   pointer being treated as unsigned integer with
-                        *   zero-extended upper 32-bits;
-                        *   - reading unsigned integers, again due to
-                        *   zero-extension is preserving the value correctly.
-                        *
-                        * In all other cases it's incorrect to attempt to
-                        * load/store field because read value will be
-                        * incorrect, so we poison relocated instruction.
-                        */
-                       if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
-                               goto done;
-                       if (btf_is_int(orig_t) && btf_is_int(new_t) &&
-                           btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
-                           btf_int_encoding(new_t) != BTF_INT_SIGNED)
-                               goto done;
-
-                       /* mark as invalid mem size adjustment, but this will
-                        * only be checked for LDX/STX/ST insns
-                        */
-                       res->fail_memsz_adjust = true;
-               }
-       } else if (core_relo_is_type_based(relo->kind)) {
-               err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
-               err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
-       } else if (core_relo_is_enumval_based(relo->kind)) {
-               err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
-               err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
-       }
-
-done:
-       if (err == -EUCLEAN) {
-               /* EUCLEAN is used to signal instruction poisoning request */
-               res->poison = true;
-               err = 0;
-       } else if (err == -EOPNOTSUPP) {
-               /* EOPNOTSUPP means unknown/unsupported relocation */
-               pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
-                       prog->name, relo_idx, core_relo_kind_str(relo->kind),
-                       relo->kind, relo->insn_off / 8);
-       }
-
-       return err;
-}
-
-/*
- * Turn instruction for which CO_RE relocation failed into invalid one with
- * distinct signature.
- */
-static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
-                                int insn_idx, struct bpf_insn *insn)
-{
-       pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
-                prog->name, relo_idx, insn_idx);
-       insn->code = BPF_JMP | BPF_CALL;
-       insn->dst_reg = 0;
-       insn->src_reg = 0;
-       insn->off = 0;
-       /* if this instruction is reachable (not a dead code),
-        * verifier will complain with the following message:
-        * invalid func unknown#195896080
-        */
-       insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
-}
-
-static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
-{
-       switch (BPF_SIZE(insn->code)) {
-       case BPF_DW: return 8;
-       case BPF_W: return 4;
-       case BPF_H: return 2;
-       case BPF_B: return 1;
-       default: return -1;
-       }
-}
-
-static int insn_bytes_to_bpf_size(__u32 sz)
-{
-       switch (sz) {
-       case 8: return BPF_DW;
-       case 4: return BPF_W;
-       case 2: return BPF_H;
-       case 1: return BPF_B;
-       default: return -1;
-       }
-}
-
-/*
- * Patch relocatable BPF instruction.
- *
- * Patched value is determined by relocation kind and target specification.
- * For existence relocations target spec will be NULL if field/type is not found.
- * Expected insn->imm value is determined using relocation kind and local
- * spec, and is checked before patching instruction. If actual insn->imm value
- * is wrong, bail out with error.
- *
- * Currently supported classes of BPF instruction are:
- * 1. rX = <imm> (assignment with immediate operand);
- * 2. rX += <imm> (arithmetic operations with immediate operand);
- * 3. rX = <imm64> (load with 64-bit immediate value);
- * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
- * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
- * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
- */
-static int bpf_core_patch_insn(struct bpf_program *prog,
-                              const struct bpf_core_relo *relo,
-                              int relo_idx,
-                              const struct bpf_core_relo_res *res)
-{
-       __u32 orig_val, new_val;
-       struct bpf_insn *insn;
-       int insn_idx;
-       __u8 class;
-
-       if (relo->insn_off % BPF_INSN_SZ)
-               return -EINVAL;
-       insn_idx = relo->insn_off / BPF_INSN_SZ;
-       /* adjust insn_idx from section frame of reference to the local
-        * program's frame of reference; (sub-)program code is not yet
-        * relocated, so it's enough to just subtract in-section offset
-        */
-       insn_idx = insn_idx - prog->sec_insn_off;
-       insn = &prog->insns[insn_idx];
-       class = BPF_CLASS(insn->code);
-
-       if (res->poison) {
-poison:
-               /* poison second part of ldimm64 to avoid confusing error from
-                * verifier about "unknown opcode 00"
-                */
-               if (is_ldimm64_insn(insn))
-                       bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
-               bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
-               return 0;
-       }
-
-       orig_val = res->orig_val;
-       new_val = res->new_val;
-
-       switch (class) {
-       case BPF_ALU:
-       case BPF_ALU64:
-               if (BPF_SRC(insn->code) != BPF_K)
-                       return -EINVAL;
-               if (res->validate && insn->imm != orig_val) {
-                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
-                               prog->name, relo_idx,
-                               insn_idx, insn->imm, orig_val, new_val);
-                       return -EINVAL;
-               }
-               orig_val = insn->imm;
-               insn->imm = new_val;
-               pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
-                        prog->name, relo_idx, insn_idx,
-                        orig_val, new_val);
-               break;
-       case BPF_LDX:
-       case BPF_ST:
-       case BPF_STX:
-               if (res->validate && insn->off != orig_val) {
-                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
-                               prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
-                       return -EINVAL;
-               }
-               if (new_val > SHRT_MAX) {
-                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
-                               prog->name, relo_idx, insn_idx, new_val);
-                       return -ERANGE;
-               }
-               if (res->fail_memsz_adjust) {
-                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
-                               "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
-                               prog->name, relo_idx, insn_idx);
-                       goto poison;
-               }
-
-               orig_val = insn->off;
-               insn->off = new_val;
-               pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
-                        prog->name, relo_idx, insn_idx, orig_val, new_val);
-
-               if (res->new_sz != res->orig_sz) {
-                       int insn_bytes_sz, insn_bpf_sz;
-
-                       insn_bytes_sz = insn_bpf_size_to_bytes(insn);
-                       if (insn_bytes_sz != res->orig_sz) {
-                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
-                                       prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
-                               return -EINVAL;
-                       }
-
-                       insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
-                       if (insn_bpf_sz < 0) {
-                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
-                                       prog->name, relo_idx, insn_idx, res->new_sz);
-                               return -EINVAL;
-                       }
-
-                       insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
-                       pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
-                                prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
-               }
-               break;
-       case BPF_LD: {
-               __u64 imm;
-
-               if (!is_ldimm64_insn(insn) ||
-                   insn[0].src_reg != 0 || insn[0].off != 0 ||
-                   insn_idx + 1 >= prog->insns_cnt ||
-                   insn[1].code != 0 || insn[1].dst_reg != 0 ||
-                   insn[1].src_reg != 0 || insn[1].off != 0) {
-                       pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
-                               prog->name, relo_idx, insn_idx);
-                       return -EINVAL;
-               }
-
-               imm = insn[0].imm + ((__u64)insn[1].imm << 32);
-               if (res->validate && imm != orig_val) {
-                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
-                               prog->name, relo_idx,
-                               insn_idx, (unsigned long long)imm,
-                               orig_val, new_val);
-                       return -EINVAL;
-               }
-
-               insn[0].imm = new_val;
-               insn[1].imm = 0; /* currently only 32-bit values are supported */
-               pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
-                        prog->name, relo_idx, insn_idx,
-                        (unsigned long long)imm, new_val);
-               break;
-       }
-       default:
-               pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
-                       prog->name, relo_idx, insn_idx, insn->code,
-                       insn->src_reg, insn->dst_reg, insn->off, insn->imm);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-/* Output spec definition in the format:
- * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
- * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
- */
-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
-{
-       const struct btf_type *t;
-       const struct btf_enum *e;
-       const char *s;
-       __u32 type_id;
-       int i;
-
-       type_id = spec->root_type_id;
-       t = btf__type_by_id(spec->btf, type_id);
-       s = btf__name_by_offset(spec->btf, t->name_off);
-
-       libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
-
-       if (core_relo_is_type_based(spec->relo_kind))
-               return;
-
-       if (core_relo_is_enumval_based(spec->relo_kind)) {
-               t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
-               e = btf_enum(t) + spec->raw_spec[0];
-               s = btf__name_by_offset(spec->btf, e->name_off);
-
-               libbpf_print(level, "::%s = %u", s, e->val);
-               return;
-       }
-
-       if (core_relo_is_field_based(spec->relo_kind)) {
-               for (i = 0; i < spec->len; i++) {
-                       if (spec->spec[i].name)
-                               libbpf_print(level, ".%s", spec->spec[i].name);
-                       else if (i > 0 || spec->spec[i].idx > 0)
-                               libbpf_print(level, "[%u]", spec->spec[i].idx);
-               }
-
-               libbpf_print(level, " (");
-               for (i = 0; i < spec->raw_len; i++)
-                       libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
-
-               if (spec->bit_offset % 8)
-                       libbpf_print(level, " @ offset %u.%u)",
-                                    spec->bit_offset / 8, spec->bit_offset % 8);
-               else
-                       libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
-               return;
-       }
-}
-
  static size_t bpf_core_hash_fn(const void *key, void *ctx)
  {
         return (size_t)key;
@@ -6172,73 +5073,33 @@ static void *u32_as_hash_key(__u32 x)
         return (void *)(uintptr_t)x;
  }
  
-/*
- * CO-RE relocate single instruction.
- *
- * The outline and important points of the algorithm:
- * 1. For given local type, find corresponding candidate target types.
- *    Candidate type is a type with the same "essential" name, ignoring
- *    everything after last triple underscore (___). E.g., `sample`,
- *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
- *    for each other. Names with triple underscore are referred to as
- *    "flavors" and are useful, among other things, to allow to
- *    specify/support incompatible variations of the same kernel struct, which
- *    might differ between different kernel versions and/or build
- *    configurations.
- *
- *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
- *    converter, when deduplicated BTF of a kernel still contains more than
- *    one different types with the same name. In that case, ___2, ___3, etc
- *    are appended starting from second name conflict. But start flavors are
- *    also useful to be defined "locally", in BPF program, to extract same
- *    data from incompatible changes between different kernel
- *    versions/configurations. For instance, to handle field renames between
- *    kernel versions, one can use two flavors of the struct name with the
- *    same common name and use conditional relocations to extract that field,
- *    depending on target kernel version.
- * 2. For each candidate type, try to match local specification to this
- *    candidate target type. Matching involves finding corresponding
- *    high-level spec accessors, meaning that all named fields should match,
- *    as well as all array accesses should be within the actual bounds. Also,
- *    types should be compatible (see bpf_core_fields_are_compat for details).
- * 3. It is supported and expected that there might be multiple flavors
- *    matching the spec. As long as all the specs resolve to the same set of
- *    offsets across all candidates, there is no error. If there is any
- *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- *    imprefection of BTF deduplication, which can cause slight duplication of
- *    the same BTF type, if some directly or indirectly referenced (by
- *    pointer) type gets resolved to different actual types in different
- *    object files. If such situation occurs, deduplicated BTF will end up
- *    with two (or more) structurally identical types, which differ only in
- *    types they refer to through pointer. This should be OK in most cases and
- *    is not an error.
- * 4. Candidate types search is performed by linearly scanning through all
- *    types in target BTF. It is anticipated that this is overall more
- *    efficient memory-wise and not significantly worse (if not better)
- *    CPU-wise compared to prebuilding a map from all local type names to
- *    a list of candidate type names. It's also sped up by caching resolved
- *    list of matching candidates per each local "root" type ID, that has at
- *    least one bpf_core_relo associated with it. This list is shared
- *    between multiple relocations for the same type ID and is updated as some
- *    of the candidates are pruned due to structural incompatibility.
- */
  static int bpf_core_apply_relo(struct bpf_program *prog,
                                const struct bpf_core_relo *relo,
                                int relo_idx,
                                const struct btf *local_btf,
                                struct hashmap *cand_cache)
  {
-       struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
         const void *type_key = u32_as_hash_key(relo->type_id);
-       struct bpf_core_relo_res cand_res, targ_res;
+       struct bpf_core_cand_list *cands = NULL;
+       const char *prog_name = prog->name;
         const struct btf_type *local_type;
         const char *local_name;
-       struct core_cand_list *cands = NULL;
-       __u32 local_id;
-       const char *spec_str;
-       int i, j, err;
+       __u32 local_id = relo->type_id;
+       struct bpf_insn *insn;
+       int insn_idx, err;
+
+       if (relo->insn_off % BPF_INSN_SZ)
+               return -EINVAL;
+       insn_idx = relo->insn_off / BPF_INSN_SZ;
+       /* adjust insn_idx from section frame of reference to the local
+        * program's frame of reference; (sub-)program code is not yet
+        * relocated, so it's enough to just subtract in-section offset
+        */
+       insn_idx = insn_idx - prog->sec_insn_off;
+       if (insn_idx > prog->insns_cnt)
+               return -EINVAL;
+       insn = &prog->insns[insn_idx];
  
-       local_id = relo->type_id;
         local_type = btf__type_by_id(local_btf, local_id);
         if (!local_type)
                 return -EINVAL;
@@ -6247,51 +5108,19 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
         if (!local_name)
                 return -EINVAL;
  
-       spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
-       if (str_is_empty(spec_str))
-               return -EINVAL;
-
         if (prog->obj->gen_loader) {
-               pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
+               pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",
                         prog - prog->obj->programs, relo->insn_off / 8,
-                       local_name, spec_str, relo->kind);
+                       local_name, relo->kind);
                 return -ENOTSUP;
         }
-       err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
-       if (err) {
-               pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
-                       prog->name, relo_idx, local_id, btf_kind_str(local_type),
-                       str_is_empty(local_name) ? "<anon>" : local_name,
-                       spec_str, err);
-               return -EINVAL;
-       }
-
-       pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
-                relo_idx, core_relo_kind_str(relo->kind), relo->kind);
-       bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
-       libbpf_print(LIBBPF_DEBUG, "\n");
-
-       /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
-       if (relo->kind == BPF_TYPE_ID_LOCAL) {
-               targ_res.validate = true;
-               targ_res.poison = false;
-               targ_res.orig_val = local_spec.root_type_id;
-               targ_res.new_val = local_spec.root_type_id;
-               goto patch_insn;
-       }
  
-       /* libbpf doesn't support candidate search for anonymous types */
-       if (str_is_empty(spec_str)) {
-               pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
-                       prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
-               return -EOPNOTSUPP;
-       }
-
-       if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
+       if (relo->kind != BPF_TYPE_ID_LOCAL &&
+           !hashmap__find(cand_cache, type_key, (void **)&cands)) {
                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
                 if (IS_ERR(cands)) {
                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
-                               prog->name, relo_idx, local_id, btf_kind_str(local_type),
+                               prog_name, relo_idx, local_id, btf_kind_str(local_type),
                                 local_name, PTR_ERR(cands));
                         return PTR_ERR(cands);
                 }
@@ -6302,97 +5131,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
                 }
         }
  
-       for (i = 0, j = 0; i < cands->len; i++) {
-               err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
-                                         cands->cands[i].id, &cand_spec);
-               if (err < 0) {
-                       pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
-                               prog->name, relo_idx, i);
-                       bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
-                       libbpf_print(LIBBPF_WARN, ": %d\n", err);
-                       return err;
-               }
-
-               pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
-                        relo_idx, err == 0 ? "non-matching" : "matching", i);
-               bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
-               libbpf_print(LIBBPF_DEBUG, "\n");
-
-               if (err == 0)
-                       continue;
-
-               err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
-               if (err)
-                       return err;
-
-               if (j == 0) {
-                       targ_res = cand_res;
-                       targ_spec = cand_spec;
-               } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
-                       /* if there are many field relo candidates, they
-                        * should all resolve to the same bit offset
-                        */
-                       pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
-                               prog->name, relo_idx, cand_spec.bit_offset,
-                               targ_spec.bit_offset);
-                       return -EINVAL;
-               } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
-                       /* all candidates should result in the same relocation
-                        * decision and value, otherwise it's dangerous to
-                        * proceed due to ambiguity
-                        */
-                       pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
-                               prog->name, relo_idx,
-                               cand_res.poison ? "failure" : "success", cand_res.new_val,
-                               targ_res.poison ? "failure" : "success", targ_res.new_val);
-                       return -EINVAL;
-               }
-
-               cands->cands[j++] = cands->cands[i];
-       }
-
-       /*
-        * For BPF_FIELD_EXISTS relo or when used BPF program has field
-        * existence checks or kernel version/config checks, it's expected
-        * that we might not find any candidates. In this case, if field
-        * wasn't found in any candidate, the list of candidates shouldn't
-        * change at all, we'll just handle relocating appropriately,
-        * depending on relo's kind.
-        */
-       if (j > 0)
-               cands->len = j;
-
-       /*
-        * If no candidates were found, it might be both a programmer error,
-        * as well as expected case, depending whether instruction w/
-        * relocation is guarded in some way that makes it unreachable (dead
-        * code) if relocation can't be resolved. This is handled in
-        * bpf_core_patch_insn() uniformly by replacing that instruction with
-        * BPF helper call insn (using invalid helper ID). If that instruction
-        * is indeed unreachable, then it will be ignored and eliminated by
-        * verifier. If it was an error, then verifier will complain and point
-        * to a specific instruction number in its log.
-        */
-       if (j == 0) {
-               pr_debug("prog '%s': relo #%d: no matching targets found\n",
-                        prog->name, relo_idx);
-
-               /* calculate single target relo result explicitly */
-               err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
-               if (err)
-                       return err;
-       }
-
-patch_insn:
-       /* bpf_core_patch_insn() should know how to handle missing targ_spec */
-       err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
-       if (err) {
-               pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
-                       prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
-               return -EINVAL;
-       }
-
-       return 0;
+       return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);
  }
  
  static int
@@ -7232,7 +5971,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
  
         for (i = 0; i < obj->nr_programs; i++) {
                 struct bpf_program *p = &obj->programs[i];
-               
+
                 if (!p->nr_reloc)
                         continue;
  
@@ -7596,7 +6335,7 @@ static struct bpf_object *
  __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
                    const struct bpf_object_open_opts *opts)
  {
-       const char *obj_name, *kconfig;
+       const char *obj_name, *kconfig, *btf_tmp_path;
         struct bpf_program *prog;
         struct bpf_object *obj;
         char tmp_name[64];
@@ -7627,11 +6366,26 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
         if (IS_ERR(obj))
                 return obj;
  
+       btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
+       if (btf_tmp_path) {
+               if (strlen(btf_tmp_path) >= PATH_MAX) {
+                       err = -ENAMETOOLONG;
+                       goto out;
+               }
+               obj->btf_custom_path = strdup(btf_tmp_path);
+               if (!obj->btf_custom_path) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+       }
+
         kconfig = OPTS_GET(opts, kconfig, NULL);
         if (kconfig) {
                 obj->kconfig = strdup(kconfig);
-               if (!obj->kconfig)
-                       return ERR_PTR(-ENOMEM);
+               if (!obj->kconfig) {
+                       err = -ENOMEM;
+                       goto out;
+               }
         }
  
         err = bpf_object__elf_init(obj);
@@ -8097,7 +6851,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
         err = err ? : bpf_object__sanitize_maps(obj);
         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
         err = err ? : bpf_object__create_maps(obj);
-       err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
+       err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
         err = err ? : bpf_object__load_progs(obj, attr->log_level);
  
         if (obj->gen_loader) {
@@ -8492,6 +7246,11 @@ const char *bpf_map__get_pin_path(const struct bpf_map *map)
         return map->pin_path;
  }
  
+const char *bpf_map__pin_path(const struct bpf_map *map)
+{
+       return map->pin_path;
+}
+
  bool bpf_map__is_pinned(const struct bpf_map *map)
  {
         return map->pinned;
@@ -8744,6 +7503,7 @@ void bpf_object__close(struct bpf_object *obj)
         for (i = 0; i < obj->nr_maps; i++)
                 bpf_map__destroy(&obj->maps[i]);
  
+       zfree(&obj->btf_custom_path);
         zfree(&obj->kconfig);
         zfree(&obj->externs);
         obj->nr_extern = 0;
@@ -9513,7 +8273,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
         ret = snprintf(btf_type_name, sizeof(btf_type_name),
                        "%s%s", prefix, name);
         /* snprintf returns the number of characters written excluding the
-        * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
+        * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
          * indicates truncation.
          */
         if (ret < 0 || ret >= sizeof(btf_type_name))
@@ -9537,7 +8297,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
         struct btf *btf;
         int err;
  
-       btf = libbpf_find_kernel_btf();
+       btf = btf__load_vmlinux_btf();
         err = libbpf_get_error(btf);
         if (err) {
                 pr_warn("vmlinux BTF is not found\n");
@@ -9556,8 +8316,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
  {
         struct bpf_prog_info_linear *info_linear;
         struct bpf_prog_info *info;
-       struct btf *btf = NULL;
-       int err = -EINVAL;
+       struct btf *btf;
+       int err;
  
         info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
         err = libbpf_get_error(info_linear);
@@ -9566,12 +8326,15 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
                         attach_prog_fd);
                 return err;
         }
+
+       err = -EINVAL;
         info = &info_linear->info;
         if (!info->btf_id) {
                 pr_warn("The target program doesn't have BTF\n");
                 goto out;
         }
-       if (btf__get_from_id(info->btf_id, &btf)) {
+       btf = btf__load_from_kernel_by_id(info->btf_id);
+       if (libbpf_get_error(btf)) {
                 pr_warn("Failed to get BTF of the program\n");
                 goto out;
         }
@@ -10055,7 +8818,7 @@ struct bpf_link {
  int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
  {
         int ret;
-       
+
         ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
         return libbpf_err_errno(ret);
  }
@@ -10346,25 +9109,28 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
         return pfd;
  }
  
-struct bpf_program_attach_kprobe_opts {
-       bool retprobe;
-       unsigned long offset;
-};
-
-static struct bpf_link*
+struct bpf_link *
  bpf_program__attach_kprobe_opts(struct bpf_program *prog,
                                 const char *func_name,
-                               struct bpf_program_attach_kprobe_opts *opts)
+                               struct bpf_kprobe_opts *opts)
  {
         char errmsg[STRERR_BUFSIZE];
         struct bpf_link *link;
+       unsigned long offset;
+       bool retprobe;
         int pfd, err;
  
-       pfd = perf_event_open_probe(false /* uprobe */, opts->retprobe, func_name,
-                                   opts->offset, -1 /* pid */);
+       if (!OPTS_VALID(opts, bpf_kprobe_opts))
+               return libbpf_err_ptr(-EINVAL);
+
+       retprobe = OPTS_GET(opts, retprobe, false);
+       offset = OPTS_GET(opts, offset, 0);
+
+       pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
+                                   offset, -1 /* pid */);
         if (pfd < 0) {
                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
-                       prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
+                       prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
                 return libbpf_err_ptr(pfd);
         }
@@ -10373,7 +9139,7 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog,
         if (err) {
                 close(pfd);
                 pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
-                       prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
+                       prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
                 return libbpf_err_ptr(err);
         }
@@ -10384,9 +9150,9 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
                                             bool retprobe,
                                             const char *func_name)
  {
-       struct bpf_program_attach_kprobe_opts opts = {
+       DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
                 .retprobe = retprobe,
-       };
+       );
  
         return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
  }
@@ -10394,7 +9160,7 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
  static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
                                       struct bpf_program *prog)
  {
-       struct bpf_program_attach_kprobe_opts opts;
+       DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
         unsigned long offset = 0;
         struct bpf_link *link;
         const char *func_name;
@@ -10404,13 +9170,14 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
         func_name = prog->sec_name + sec->len;
         opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
  
-       n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%lx", &func, &offset);
+       n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
         if (n < 1) {
                 err = -EINVAL;
                 pr_warn("kprobe name is invalid: %s\n", func_name);
                 return libbpf_err_ptr(err);
         }
         if (opts.retprobe && offset != 0) {
+               free(func);
                 err = -EINVAL;
                 pr_warn("kretprobes do not support offset specification\n");
                 return libbpf_err_ptr(err);
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h

index 6e61342..1271d99 100644 (file)
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -94,8 +94,26 @@ struct bpf_object_open_opts {
          * system Kconfig for CONFIG_xxx externs.
          */
         const char *kconfig;
+       /* Path to the custom BTF to be used for BPF CO-RE relocations.
+        * This custom BTF completely replaces the use of vmlinux BTF
+        * for the purpose of CO-RE relocations.
+        * NOTE: any other BPF feature (e.g., fentry/fexit programs,
+        * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux.
+        */
+       const char *btf_custom_path;
  };
-#define bpf_object_open_opts__last_field kconfig
+#define bpf_object_open_opts__last_field btf_custom_path
+
+struct bpf_kprobe_opts {
+       /* size of this struct, for forward/backward compatiblity */
+       size_t sz;
+       /* function's offset to install kprobe to */
+       unsigned long offset;
+       /* kprobe is return probe */
+       bool retprobe;
+       size_t :0;
+};
+#define bpf_kprobe_opts__last_field retprobe
  
  LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
  LIBBPF_API struct bpf_object *
@@ -243,6 +261,10 @@ LIBBPF_API struct bpf_link *
  bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
                            const char *func_name);
  LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+                                const char *func_name,
+                                struct bpf_kprobe_opts *opts);
+LIBBPF_API struct bpf_link *
  bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
                            pid_t pid, const char *binary_path,
                            size_t func_offset);
@@ -477,6 +499,7 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
  LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
  LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
  LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
  LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
  LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
  LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map

index 944c99d..58e0fb2 100644 (file)
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -371,7 +371,15 @@ LIBBPF_0.4.0 {
  LIBBPF_0.5.0 {
         global:
                 bpf_map__initial_value;
+               bpf_map__pin_path;
                 bpf_map_lookup_and_delete_elem_flags;
+               bpf_program__attach_kprobe_opts;
                 bpf_object__gen_loader;
+               btf__load_from_kernel_by_id;
+               btf__load_from_kernel_by_id_split;
+               btf__load_into_kernel;
+               btf__load_module_btf;
+               btf__load_vmlinux_btf;
+               btf_dump__dump_type_data;
                 libbpf_set_strict_mode;
  } LIBBPF_0.4.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h

index 016ca7c..f7b691d 100644 (file)
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -14,6 +14,7 @@
  #include <errno.h>
  #include <linux/err.h>
  #include "libbpf_legacy.h"
+#include "relo_core.h"
  
  /* make sure libbpf doesn't use kernel-only integer typedefs */
  #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -366,76 +367,6 @@ struct bpf_line_info_min {
         __u32   line_col;
  };
  
-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
- * has to be adjusted by relocations.
- */
-enum bpf_core_relo_kind {
-       BPF_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
-       BPF_FIELD_BYTE_SIZE = 1,        /* field size in bytes */
-       BPF_FIELD_EXISTS = 2,           /* field existence in target kernel */
-       BPF_FIELD_SIGNED = 3,           /* field signedness (0 - unsigned, 1 - signed) */
-       BPF_FIELD_LSHIFT_U64 = 4,       /* bitfield-specific left bitshift */
-       BPF_FIELD_RSHIFT_U64 = 5,       /* bitfield-specific right bitshift */
-       BPF_TYPE_ID_LOCAL = 6,          /* type ID in local BPF object */
-       BPF_TYPE_ID_TARGET = 7,         /* type ID in target kernel */
-       BPF_TYPE_EXISTS = 8,            /* type existence in target kernel */
-       BPF_TYPE_SIZE = 9,              /* type size in bytes */
-       BPF_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
-       BPF_ENUMVAL_VALUE = 11,         /* enum value integer value */
-};
-
-/* The minimum bpf_core_relo checked by the loader
- *
- * CO-RE relocation captures the following data:
- * - insn_off - instruction offset (in bytes) within a BPF program that needs
- *   its insn->imm field to be relocated with actual field info;
- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- *   type or field;
- * - access_str_off - offset into corresponding .BTF string section. String
- *   interpretation depends on specific relocation kind:
- *     - for field-based relocations, string encodes an accessed field using
- *     a sequence of field and array indices, separated by colon (:). It's
- *     conceptually very close to LLVM's getelementptr ([0]) instruction's
- *     arguments for identifying offset to a field.
- *     - for type-based relocations, strings is expected to be just "0";
- *     - for enum value-based relocations, string contains an index of enum
- *     value within its enum type;
- *
- * Example to provide a better feel.
- *
- *   struct sample {
- *       int a;
- *       struct {
- *           int b[10];
- *       };
- *   };
- *
- *   struct sample *s = ...;
- *   int x = &s->a;     // encoded as "0:0" (a is field #0)
- *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1, 
- *                      // b is field #0 inside anon struct, accessing elem #5)
- *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
- *
- * type_id for all relocs in this example  will capture BTF type id of
- * `struct sample`.
- *
- * Such relocation is emitted when using __builtin_preserve_access_index()
- * Clang built-in, passing expression that captures field address, e.g.:
- *
- * bpf_probe_read(&dst, sizeof(dst),
- *               __builtin_preserve_access_index(&src->a.b.c));
- *
- * In this case Clang will emit field relocation recording necessary data to
- * be able to find offset of embedded `a.b.c` field within `src` struct.
- *
- *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
- */
-struct bpf_core_relo {
-       __u32   insn_off;
-       __u32   type_id;
-       __u32   access_str_off;
-       enum bpf_core_relo_kind kind;
-};
  
  typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
  typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
@@ -494,4 +425,14 @@ static inline void *libbpf_ptr(void *ret)
         return ret;
  }
  
+static inline bool str_is_empty(const char *s)
+{
+       return !s || !s[0];
+}
+
+static inline bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+       return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
  #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c

new file mode 100644 (file)

index 0000000..4016ed4
--- /dev/null
+++ b/tools/lib/bpf/relo_core.c
@@ -0,0 +1,1295 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Facebook */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+#include <linux/err.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+       __u32 type_id;          /* struct/union type or array element type */
+       __u32 idx;              /* field index or array index */
+       const char *name;       /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+       const struct btf *btf;
+       /* high-level spec: named fields and array indices only */
+       struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+       /* original unresolved (no skip_mods_or_typedefs) root type ID */
+       __u32 root_type_id;
+       /* CO-RE relocation kind */
+       enum bpf_core_relo_kind relo_kind;
+       /* high-level spec length */
+       int len;
+       /* raw, low-level spec: 1-to-1 with accessor spec string */
+       int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+       /* raw spec length */
+       int raw_len;
+       /* field bit offset represented by spec */
+       __u32 bit_offset;
+};
+
+static bool is_flex_arr(const struct btf *btf,
+                       const struct bpf_core_accessor *acc,
+                       const struct btf_array *arr)
+{
+       const struct btf_type *t;
+
+       /* not a flexible array, if not inside a struct or has non-zero size */
+       if (!acc->name || arr->nelems > 0)
+               return false;
+
+       /* has to be the last member of enclosing struct */
+       t = btf__type_by_id(btf, acc->type_id);
+       return acc->idx == btf_vlen(t) - 1;
+}
+
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+       case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+       case BPF_FIELD_EXISTS: return "field_exists";
+       case BPF_FIELD_SIGNED: return "signed";
+       case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+       case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+       case BPF_TYPE_ID_LOCAL: return "local_type_id";
+       case BPF_TYPE_ID_TARGET: return "target_type_id";
+       case BPF_TYPE_EXISTS: return "type_exists";
+       case BPF_TYPE_SIZE: return "type_size";
+       case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+       case BPF_ENUMVAL_VALUE: return "enumval_value";
+       default: return "unknown";
+       }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_FIELD_BYTE_OFFSET:
+       case BPF_FIELD_BYTE_SIZE:
+       case BPF_FIELD_EXISTS:
+       case BPF_FIELD_SIGNED:
+       case BPF_FIELD_LSHIFT_U64:
+       case BPF_FIELD_RSHIFT_U64:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_TYPE_ID_LOCAL:
+       case BPF_TYPE_ID_TARGET:
+       case BPF_TYPE_EXISTS:
+       case BPF_TYPE_SIZE:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+       switch (kind) {
+       case BPF_ENUMVAL_EXISTS:
+       case BPF_ENUMVAL_VALUE:
+               return true;
+       default:
+               return false;
+       }
+}
+
+/*
+ * Turn bpf_core_relo into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ *   struct sample {
+ *       int __unimportant;
+ *       struct {
+ *           int __1;
+ *           int __2;
+ *           int a[7];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *
+ *   int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ *   - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ *   - field 'a' access (corresponds to '2' in low-level spec);
+ *   - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
+ */
+static int bpf_core_parse_spec(const struct btf *btf,
+                              __u32 type_id,
+                              const char *spec_str,
+                              enum bpf_core_relo_kind relo_kind,
+                              struct bpf_core_spec *spec)
+{
+       int access_idx, parsed_len, i;
+       struct bpf_core_accessor *acc;
+       const struct btf_type *t;
+       const char *name;
+       __u32 id;
+       __s64 sz;
+
+       if (str_is_empty(spec_str) || *spec_str == ':')
+               return -EINVAL;
+
+       memset(spec, 0, sizeof(*spec));
+       spec->btf = btf;
+       spec->root_type_id = type_id;
+       spec->relo_kind = relo_kind;
+
+       /* type-based relocations don't have a field access string */
+       if (core_relo_is_type_based(relo_kind)) {
+               if (strcmp(spec_str, "0"))
+                       return -EINVAL;
+               return 0;
+       }
+
+       /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+       while (*spec_str) {
+               if (*spec_str == ':')
+                       ++spec_str;
+               if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+                       return -EINVAL;
+               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+                       return -E2BIG;
+               spec_str += parsed_len;
+               spec->raw_spec[spec->raw_len++] = access_idx;
+       }
+
+       if (spec->raw_len == 0)
+               return -EINVAL;
+
+       t = skip_mods_and_typedefs(btf, type_id, &id);
+       if (!t)
+               return -EINVAL;
+
+       access_idx = spec->raw_spec[0];
+       acc = &spec->spec[0];
+       acc->type_id = id;
+       acc->idx = access_idx;
+       spec->len++;
+
+       if (core_relo_is_enumval_based(relo_kind)) {
+               if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+                       return -EINVAL;
+
+               /* record enumerator name in a first accessor */
+               acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+               return 0;
+       }
+
+       if (!core_relo_is_field_based(relo_kind))
+               return -EINVAL;
+
+       sz = btf__resolve_size(btf, id);
+       if (sz < 0)
+               return sz;
+       spec->bit_offset = access_idx * sz * 8;
+
+       for (i = 1; i < spec->raw_len; i++) {
+               t = skip_mods_and_typedefs(btf, id, &id);
+               if (!t)
+                       return -EINVAL;
+
+               access_idx = spec->raw_spec[i];
+               acc = &spec->spec[spec->len];
+
+               if (btf_is_composite(t)) {
+                       const struct btf_member *m;
+                       __u32 bit_offset;
+
+                       if (access_idx >= btf_vlen(t))
+                               return -EINVAL;
+
+                       bit_offset = btf_member_bit_offset(t, access_idx);
+                       spec->bit_offset += bit_offset;
+
+                       m = btf_members(t) + access_idx;
+                       if (m->name_off) {
+                               name = btf__name_by_offset(btf, m->name_off);
+                               if (str_is_empty(name))
+                                       return -EINVAL;
+
+                               acc->type_id = id;
+                               acc->idx = access_idx;
+                               acc->name = name;
+                               spec->len++;
+                       }
+
+                       id = m->type;
+               } else if (btf_is_array(t)) {
+                       const struct btf_array *a = btf_array(t);
+                       bool flex;
+
+                       t = skip_mods_and_typedefs(btf, a->type, &id);
+                       if (!t)
+                               return -EINVAL;
+
+                       flex = is_flex_arr(btf, acc - 1, a);
+                       if (!flex && access_idx >= a->nelems)
+                               return -EINVAL;
+
+                       spec->spec[spec->len].type_id = id;
+                       spec->spec[spec->len].idx = access_idx;
+                       spec->len++;
+
+                       sz = btf__resolve_size(btf, id);
+                       if (sz < 0)
+                               return sz;
+                       spec->bit_offset += access_idx * sz * 8;
+               } else {
+                       pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+                               type_id, spec_str, i, id, btf_kind_str(t));
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
+ *   - any two STRUCTs/UNIONs are compatible and can be mixed;
+ *   - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ *   - any two PTRs are always compatible;
+ *   - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ *     least one of enums should be anonymous;
+ *   - for ENUMs, check sizes, names are ignored;
+ *   - for INT, size and signedness are ignored;
+ *   - any two FLOATs are always compatible;
+ *   - for ARRAY, dimensionality is ignored, element types are checked for
+ *     compatibility recursively;
+ *   - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+                                     __u32 local_id,
+                                     const struct btf *targ_btf,
+                                     __u32 targ_id)
+{
+       const struct btf_type *local_type, *targ_type;
+
+recur:
+       local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+       if (!local_type || !targ_type)
+               return -EINVAL;
+
+       if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+               return 1;
+       if (btf_kind(local_type) != btf_kind(targ_type))
+               return 0;
+
+       switch (btf_kind(local_type)) {
+       case BTF_KIND_PTR:
+       case BTF_KIND_FLOAT:
+               return 1;
+       case BTF_KIND_FWD:
+       case BTF_KIND_ENUM: {
+               const char *local_name, *targ_name;
+               size_t local_len, targ_len;
+
+               local_name = btf__name_by_offset(local_btf,
+                                                local_type->name_off);
+               targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+               local_len = bpf_core_essential_name_len(local_name);
+               targ_len = bpf_core_essential_name_len(targ_name);
+               /* one of them is anonymous or both w/ same flavor-less names */
+               return local_len == 0 || targ_len == 0 ||
+                      (local_len == targ_len &&
+                       strncmp(local_name, targ_name, local_len) == 0);
+       }
+       case BTF_KIND_INT:
+               /* just reject deprecated bitfield-like integers; all other
+                * integers are by default compatible between each other
+                */
+               return btf_int_offset(local_type) == 0 &&
+                      btf_int_offset(targ_type) == 0;
+       case BTF_KIND_ARRAY:
+               local_id = btf_array(local_type)->type;
+               targ_id = btf_array(targ_type)->type;
+               goto recur;
+       default:
+               pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+                       btf_kind(local_type), local_id, targ_id);
+               return 0;
+       }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+                                const struct bpf_core_accessor *local_acc,
+                                const struct btf *targ_btf,
+                                __u32 targ_id,
+                                struct bpf_core_spec *spec,
+                                __u32 *next_targ_id)
+{
+       const struct btf_type *local_type, *targ_type;
+       const struct btf_member *local_member, *m;
+       const char *local_name, *targ_name;
+       __u32 local_id;
+       int i, n, found;
+
+       targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+       if (!targ_type)
+               return -EINVAL;
+       if (!btf_is_composite(targ_type))
+               return 0;
+
+       local_id = local_acc->type_id;
+       local_type = btf__type_by_id(local_btf, local_id);
+       local_member = btf_members(local_type) + local_acc->idx;
+       local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+       n = btf_vlen(targ_type);
+       m = btf_members(targ_type);
+       for (i = 0; i < n; i++, m++) {
+               __u32 bit_offset;
+
+               bit_offset = btf_member_bit_offset(targ_type, i);
+
+               /* too deep struct/union/array nesting */
+               if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+                       return -E2BIG;
+
+               /* speculate this member will be the good one */
+               spec->bit_offset += bit_offset;
+               spec->raw_spec[spec->raw_len++] = i;
+
+               targ_name = btf__name_by_offset(targ_btf, m->name_off);
+               if (str_is_empty(targ_name)) {
+                       /* embedded struct/union, we need to go deeper */
+                       found = bpf_core_match_member(local_btf, local_acc,
+                                                     targ_btf, m->type,
+                                                     spec, next_targ_id);
+                       if (found) /* either found or error */
+                               return found;
+               } else if (strcmp(local_name, targ_name) == 0) {
+                       /* matching named field */
+                       struct bpf_core_accessor *targ_acc;
+
+                       targ_acc = &spec->spec[spec->len++];
+                       targ_acc->type_id = targ_id;
+                       targ_acc->idx = i;
+                       targ_acc->name = targ_name;
+
+                       *next_targ_id = m->type;
+                       found = bpf_core_fields_are_compat(local_btf,
+                                                          local_member->type,
+                                                          targ_btf, m->type);
+                       if (!found)
+                               spec->len--; /* pop accessor */
+                       return found;
+               }
+               /* member turned out not to be what we looked for */
+               spec->bit_offset -= bit_offset;
+               spec->raw_len--;
+       }
+
+       return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+                              const struct btf *targ_btf, __u32 targ_id,
+                              struct bpf_core_spec *targ_spec)
+{
+       const struct btf_type *targ_type;
+       const struct bpf_core_accessor *local_acc;
+       struct bpf_core_accessor *targ_acc;
+       int i, sz, matched;
+
+       memset(targ_spec, 0, sizeof(*targ_spec));
+       targ_spec->btf = targ_btf;
+       targ_spec->root_type_id = targ_id;
+       targ_spec->relo_kind = local_spec->relo_kind;
+
+       if (core_relo_is_type_based(local_spec->relo_kind)) {
+               return bpf_core_types_are_compat(local_spec->btf,
+                                                local_spec->root_type_id,
+                                                targ_btf, targ_id);
+       }
+
+       local_acc = &local_spec->spec[0];
+       targ_acc = &targ_spec->spec[0];
+
+       if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+               size_t local_essent_len, targ_essent_len;
+               const struct btf_enum *e;
+               const char *targ_name;
+
+               /* has to resolve to an enum */
+               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+               if (!btf_is_enum(targ_type))
+                       return 0;
+
+               local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+               for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+                       targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+                       targ_essent_len = bpf_core_essential_name_len(targ_name);
+                       if (targ_essent_len != local_essent_len)
+                               continue;
+                       if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+                               targ_acc->type_id = targ_id;
+                               targ_acc->idx = i;
+                               targ_acc->name = targ_name;
+                               targ_spec->len++;
+                               targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+                               targ_spec->raw_len++;
+                               return 1;
+                       }
+               }
+               return 0;
+       }
+
+       if (!core_relo_is_field_based(local_spec->relo_kind))
+               return -EINVAL;
+
+       for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+               targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+                                                  &targ_id);
+               if (!targ_type)
+                       return -EINVAL;
+
+               if (local_acc->name) {
+                       matched = bpf_core_match_member(local_spec->btf,
+                                                       local_acc,
+                                                       targ_btf, targ_id,
+                                                       targ_spec, &targ_id);
+                       if (matched <= 0)
+                               return matched;
+               } else {
+                       /* for i=0, targ_id is already treated as array element
+                        * type (because it's the original struct), for others
+                        * we should find array element type first
+                        */
+                       if (i > 0) {
+                               const struct btf_array *a;
+                               bool flex;
+
+                               if (!btf_is_array(targ_type))
+                                       return 0;
+
+                               a = btf_array(targ_type);
+                               flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+                               if (!flex && local_acc->idx >= a->nelems)
+                                       return 0;
+                               if (!skip_mods_and_typedefs(targ_btf, a->type,
+                                                           &targ_id))
+                                       return -EINVAL;
+                       }
+
+                       /* too deep struct/union/array nesting */
+                       if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+                               return -E2BIG;
+
+                       targ_acc->type_id = targ_id;
+                       targ_acc->idx = local_acc->idx;
+                       targ_acc->name = NULL;
+                       targ_spec->len++;
+                       targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+                       targ_spec->raw_len++;
+
+                       sz = btf__resolve_size(targ_btf, targ_id);
+                       if (sz < 0)
+                               return sz;
+                       targ_spec->bit_offset += local_acc->idx * sz * 8;
+               }
+       }
+
+       return 1;
+}
+
+static int bpf_core_calc_field_relo(const char *prog_name,
+                                   const struct bpf_core_relo *relo,
+                                   const struct bpf_core_spec *spec,
+                                   __u32 *val, __u32 *field_sz, __u32 *type_id,
+                                   bool *validate)
+{
+       const struct bpf_core_accessor *acc;
+       const struct btf_type *t;
+       __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+       const struct btf_member *m;
+       const struct btf_type *mt;
+       bool bitfield;
+       __s64 sz;
+
+       *field_sz = 0;
+
+       if (relo->kind == BPF_FIELD_EXISTS) {
+               *val = spec ? 1 : 0;
+               return 0;
+       }
+
+       if (!spec)
+               return -EUCLEAN; /* request instruction poisoning */
+
+       acc = &spec->spec[spec->len - 1];
+       t = btf__type_by_id(spec->btf, acc->type_id);
+
+       /* a[n] accessor needs special handling */
+       if (!acc->name) {
+               if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+                       *val = spec->bit_offset / 8;
+                       /* remember field size for load/store mem size */
+                       sz = btf__resolve_size(spec->btf, acc->type_id);
+                       if (sz < 0)
+                               return -EINVAL;
+                       *field_sz = sz;
+                       *type_id = acc->type_id;
+               } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+                       sz = btf__resolve_size(spec->btf, acc->type_id);
+                       if (sz < 0)
+                               return -EINVAL;
+                       *val = sz;
+               } else {
+                       pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+                               prog_name, relo->kind, relo->insn_off / 8);
+                       return -EINVAL;
+               }
+               if (validate)
+                       *validate = true;
+               return 0;
+       }
+
+       m = btf_members(t) + acc->idx;
+       mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
+       bit_off = spec->bit_offset;
+       bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+       bitfield = bit_sz > 0;
+       if (bitfield) {
+               byte_sz = mt->size;
+               byte_off = bit_off / 8 / byte_sz * byte_sz;
+               /* figure out smallest int size necessary for bitfield load */
+               while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+                       if (byte_sz >= 8) {
+                               /* bitfield can't be read with 64-bit read */
+                               pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+                                       prog_name, relo->kind, relo->insn_off / 8);
+                               return -E2BIG;
+                       }
+                       byte_sz *= 2;
+                       byte_off = bit_off / 8 / byte_sz * byte_sz;
+               }
+       } else {
+               sz = btf__resolve_size(spec->btf, field_type_id);
+               if (sz < 0)
+                       return -EINVAL;
+               byte_sz = sz;
+               byte_off = spec->bit_offset / 8;
+               bit_sz = byte_sz * 8;
+       }
+
+       /* for bitfields, all the relocatable aspects are ambiguous and we
+        * might disagree with compiler, so turn off validation of expected
+        * value, except for signedness
+        */
+       if (validate)
+               *validate = !bitfield;
+
+       switch (relo->kind) {
+       case BPF_FIELD_BYTE_OFFSET:
+               *val = byte_off;
+               if (!bitfield) {
+                       *field_sz = byte_sz;
+                       *type_id = field_type_id;
+               }
+               break;
+       case BPF_FIELD_BYTE_SIZE:
+               *val = byte_sz;
+               break;
+       case BPF_FIELD_SIGNED:
+               /* enums will be assumed unsigned */
+               *val = btf_is_enum(mt) ||
+                      (btf_int_encoding(mt) & BTF_INT_SIGNED);
+               if (validate)
+                       *validate = true; /* signedness is never ambiguous */
+               break;
+       case BPF_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+               *val = 64 - (bit_off + bit_sz - byte_off  * 8);
+#else
+               *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+               break;
+       case BPF_FIELD_RSHIFT_U64:
+               *val = 64 - bit_sz;
+               if (validate)
+                       *validate = true; /* right shift is never ambiguous */
+               break;
+       case BPF_FIELD_EXISTS:
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+                                  const struct bpf_core_spec *spec,
+                                  __u32 *val)
+{
+       __s64 sz;
+
+       /* type-based relos return zero when target type is not found */
+       if (!spec) {
+               *val = 0;
+               return 0;
+       }
+
+       switch (relo->kind) {
+       case BPF_TYPE_ID_TARGET:
+               *val = spec->root_type_id;
+               break;
+       case BPF_TYPE_EXISTS:
+               *val = 1;
+               break;
+       case BPF_TYPE_SIZE:
+               sz = btf__resolve_size(spec->btf, spec->root_type_id);
+               if (sz < 0)
+                       return -EINVAL;
+               *val = sz;
+               break;
+       case BPF_TYPE_ID_LOCAL:
+       /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+                                     const struct bpf_core_spec *spec,
+                                     __u32 *val)
+{
+       const struct btf_type *t;
+       const struct btf_enum *e;
+
+       switch (relo->kind) {
+       case BPF_ENUMVAL_EXISTS:
+               *val = spec ? 1 : 0;
+               break;
+       case BPF_ENUMVAL_VALUE:
+               if (!spec)
+                       return -EUCLEAN; /* request instruction poisoning */
+               t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+               e = btf_enum(t) + spec->spec[0].idx;
+               *val = e->val;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+struct bpf_core_relo_res
+{
+       /* expected value in the instruction, unless validate == false */
+       __u32 orig_val;
+       /* new value that needs to be patched up to */
+       __u32 new_val;
+       /* relocation unsuccessful, poison instruction, but don't fail load */
+       bool poison;
+       /* some relocations can't be validated against orig_val */
+       bool validate;
+       /* for field byte offset relocations or the forms:
+        *     *(T *)(rX + <off>) = rY
+        *     rX = *(T *)(rY + <off>),
+        * we remember original and resolved field size to adjust direct
+        * memory loads of pointers and integers; this is necessary for 32-bit
+        * host kernel architectures, but also allows to automatically
+        * relocate fields that were resized from, e.g., u32 to u64, etc.
+        */
+       bool fail_memsz_adjust;
+       __u32 orig_sz;
+       __u32 orig_type_id;
+       __u32 new_sz;
+       __u32 new_type_id;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const char *prog_name,
+                             const struct bpf_core_relo *relo,
+                             int relo_idx,
+                             const struct bpf_core_spec *local_spec,
+                             const struct bpf_core_spec *targ_spec,
+                             struct bpf_core_relo_res *res)
+{
+       int err = -EOPNOTSUPP;
+
+       res->orig_val = 0;
+       res->new_val = 0;
+       res->poison = false;
+       res->validate = true;
+       res->fail_memsz_adjust = false;
+       res->orig_sz = res->new_sz = 0;
+       res->orig_type_id = res->new_type_id = 0;
+
+       if (core_relo_is_field_based(relo->kind)) {
+               err = bpf_core_calc_field_relo(prog_name, relo, local_spec,
+                                              &res->orig_val, &res->orig_sz,
+                                              &res->orig_type_id, &res->validate);
+               err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec,
+                                                     &res->new_val, &res->new_sz,
+                                                     &res->new_type_id, NULL);
+               if (err)
+                       goto done;
+               /* Validate if it's safe to adjust load/store memory size.
+                * Adjustments are performed only if original and new memory
+                * sizes differ.
+                */
+               res->fail_memsz_adjust = false;
+               if (res->orig_sz != res->new_sz) {
+                       const struct btf_type *orig_t, *new_t;
+
+                       orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+                       new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+
+                       /* There are two use cases in which it's safe to
+                        * adjust load/store's mem size:
+                        *   - reading a 32-bit kernel pointer, while on BPF
+                        *   size pointers are always 64-bit; in this case
+                        *   it's safe to "downsize" instruction size due to
+                        *   pointer being treated as unsigned integer with
+                        *   zero-extended upper 32-bits;
+                        *   - reading unsigned integers, again due to
+                        *   zero-extension is preserving the value correctly.
+                        *
+                        * In all other cases it's incorrect to attempt to
+                        * load/store field because read value will be
+                        * incorrect, so we poison relocated instruction.
+                        */
+                       if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+                               goto done;
+                       if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+                           btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+                           btf_int_encoding(new_t) != BTF_INT_SIGNED)
+                               goto done;
+
+                       /* mark as invalid mem size adjustment, but this will
+                        * only be checked for LDX/STX/ST insns
+                        */
+                       res->fail_memsz_adjust = true;
+               }
+       } else if (core_relo_is_type_based(relo->kind)) {
+               err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+               err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+       } else if (core_relo_is_enumval_based(relo->kind)) {
+               err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+               err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+       }
+
+done:
+       if (err == -EUCLEAN) {
+               /* EUCLEAN is used to signal instruction poisoning request */
+               res->poison = true;
+               err = 0;
+       } else if (err == -EOPNOTSUPP) {
+               /* EOPNOTSUPP means unknown/unsupported relocation */
+               pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+                       prog_name, relo_idx, core_relo_kind_str(relo->kind),
+                       relo->kind, relo->insn_off / 8);
+       }
+
+       return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(const char *prog_name, int relo_idx,
+                                int insn_idx, struct bpf_insn *insn)
+{
+       pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+                prog_name, relo_idx, insn_idx);
+       insn->code = BPF_JMP | BPF_CALL;
+       insn->dst_reg = 0;
+       insn->src_reg = 0;
+       insn->off = 0;
+       /* if this instruction is reachable (not a dead code),
+        * verifier will complain with the following message:
+        * invalid func unknown#195896080
+        */
+       insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+       switch (BPF_SIZE(insn->code)) {
+       case BPF_DW: return 8;
+       case BPF_W: return 4;
+       case BPF_H: return 2;
+       case BPF_B: return 1;
+       default: return -1;
+       }
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+       switch (sz) {
+       case 8: return BPF_DW;
+       case 4: return BPF_W;
+       case 2: return BPF_H;
+       case 1: return BPF_B;
+       default: return -1;
+       }
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For existence relocations target spec will be NULL if field/type is not found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently supported classes of BPF instruction are:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
+ */
+static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+                              int insn_idx, const struct bpf_core_relo *relo,
+                              int relo_idx, const struct bpf_core_relo_res *res)
+{
+       __u32 orig_val, new_val;
+       __u8 class;
+
+       class = BPF_CLASS(insn->code);
+
+       if (res->poison) {
+poison:
+               /* poison second part of ldimm64 to avoid confusing error from
+                * verifier about "unknown opcode 00"
+                */
+               if (is_ldimm64_insn(insn))
+                       bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1);
+               bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn);
+               return 0;
+       }
+
+       orig_val = res->orig_val;
+       new_val = res->new_val;
+
+       switch (class) {
+       case BPF_ALU:
+       case BPF_ALU64:
+               if (BPF_SRC(insn->code) != BPF_K)
+                       return -EINVAL;
+               if (res->validate && insn->imm != orig_val) {
+                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+                               prog_name, relo_idx,
+                               insn_idx, insn->imm, orig_val, new_val);
+                       return -EINVAL;
+               }
+               orig_val = insn->imm;
+               insn->imm = new_val;
+               pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+                        prog_name, relo_idx, insn_idx,
+                        orig_val, new_val);
+               break;
+       case BPF_LDX:
+       case BPF_ST:
+       case BPF_STX:
+               if (res->validate && insn->off != orig_val) {
+                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
+                               prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+                       return -EINVAL;
+               }
+               if (new_val > SHRT_MAX) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
+                               prog_name, relo_idx, insn_idx, new_val);
+                       return -ERANGE;
+               }
+               if (res->fail_memsz_adjust) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+                               "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+                               prog_name, relo_idx, insn_idx);
+                       goto poison;
+               }
+
+               orig_val = insn->off;
+               insn->off = new_val;
+               pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
+                        prog_name, relo_idx, insn_idx, orig_val, new_val);
+
+               if (res->new_sz != res->orig_sz) {
+                       int insn_bytes_sz, insn_bpf_sz;
+
+                       insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+                       if (insn_bytes_sz != res->orig_sz) {
+                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+                                       prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+                               return -EINVAL;
+                       }
+
+                       insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+                       if (insn_bpf_sz < 0) {
+                               pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+                                       prog_name, relo_idx, insn_idx, res->new_sz);
+                               return -EINVAL;
+                       }
+
+                       insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+                       pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+                                prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+               }
+               break;
+       case BPF_LD: {
+               __u64 imm;
+
+               if (!is_ldimm64_insn(insn) ||
+                   insn[0].src_reg != 0 || insn[0].off != 0 ||
+                   insn[1].code != 0 || insn[1].dst_reg != 0 ||
+                   insn[1].src_reg != 0 || insn[1].off != 0) {
+                       pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+                               prog_name, relo_idx, insn_idx);
+                       return -EINVAL;
+               }
+
+               imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+               if (res->validate && imm != orig_val) {
+                       pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+                               prog_name, relo_idx,
+                               insn_idx, (unsigned long long)imm,
+                               orig_val, new_val);
+                       return -EINVAL;
+               }
+
+               insn[0].imm = new_val;
+               insn[1].imm = 0; /* currently only 32-bit values are supported */
+               pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+                        prog_name, relo_idx, insn_idx,
+                        (unsigned long long)imm, new_val);
+               break;
+       }
+       default:
+               pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+                       prog_name, relo_idx, insn_idx, insn->code,
+                       insn->src_reg, insn->dst_reg, insn->off, insn->imm);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+       const struct btf_type *t;
+       const struct btf_enum *e;
+       const char *s;
+       __u32 type_id;
+       int i;
+
+       type_id = spec->root_type_id;
+       t = btf__type_by_id(spec->btf, type_id);
+       s = btf__name_by_offset(spec->btf, t->name_off);
+
+       libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+       if (core_relo_is_type_based(spec->relo_kind))
+               return;
+
+       if (core_relo_is_enumval_based(spec->relo_kind)) {
+               t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+               e = btf_enum(t) + spec->raw_spec[0];
+               s = btf__name_by_offset(spec->btf, e->name_off);
+
+               libbpf_print(level, "::%s = %u", s, e->val);
+               return;
+       }
+
+       if (core_relo_is_field_based(spec->relo_kind)) {
+               for (i = 0; i < spec->len; i++) {
+                       if (spec->spec[i].name)
+                               libbpf_print(level, ".%s", spec->spec[i].name);
+                       else if (i > 0 || spec->spec[i].idx > 0)
+                               libbpf_print(level, "[%u]", spec->spec[i].idx);
+               }
+
+               libbpf_print(level, " (");
+               for (i = 0; i < spec->raw_len; i++)
+                       libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+               if (spec->bit_offset % 8)
+                       libbpf_print(level, " @ offset %u.%u)",
+                                    spec->bit_offset / 8, spec->bit_offset % 8);
+               else
+                       libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+               return;
+       }
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ *    Candidate type is a type with the same "essential" name, ignoring
+ *    everything after last triple underscore (___). E.g., `sample`,
+ *    `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ *    for each other. Names with triple underscore are referred to as
+ *    "flavors" and are useful, among other things, to allow to
+ *    specify/support incompatible variations of the same kernel struct, which
+ *    might differ between different kernel versions and/or build
+ *    configurations.
+ *
+ *    N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ *    converter, when deduplicated BTF of a kernel still contains more than
+ *    one different types with the same name. In that case, ___2, ___3, etc
+ *    are appended starting from second name conflict. But start flavors are
+ *    also useful to be defined "locally", in BPF program, to extract same
+ *    data from incompatible changes between different kernel
+ *    versions/configurations. For instance, to handle field renames between
+ *    kernel versions, one can use two flavors of the struct name with the
+ *    same common name and use conditional relocations to extract that field,
+ *    depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ *    candidate target type. Matching involves finding corresponding
+ *    high-level spec accessors, meaning that all named fields should match,
+ *    as well as all array accesses should be within the actual bounds. Also,
+ *    types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ *    matching the spec. As long as all the specs resolve to the same set of
+ *    offsets across all candidates, there is no error. If there is any
+ *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ *    imprefection of BTF deduplication, which can cause slight duplication of
+ *    the same BTF type, if some directly or indirectly referenced (by
+ *    pointer) type gets resolved to different actual types in different
+ *    object files. If such situation occurs, deduplicated BTF will end up
+ *    with two (or more) structurally identical types, which differ only in
+ *    types they refer to through pointer. This should be OK in most cases and
+ *    is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ *    types in target BTF. It is anticipated that this is overall more
+ *    efficient memory-wise and not significantly worse (if not better)
+ *    CPU-wise compared to prebuilding a map from all local type names to
+ *    a list of candidate type names. It's also sped up by caching resolved
+ *    list of matching candidates per each local "root" type ID, that has at
+ *    least one bpf_core_relo associated with it. This list is shared
+ *    between multiple relocations for the same type ID and is updated as some
+ *    of the candidates are pruned due to structural incompatibility.
+ */
+int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
+                            int insn_idx,
+                            const struct bpf_core_relo *relo,
+                            int relo_idx,
+                            const struct btf *local_btf,
+                            struct bpf_core_cand_list *cands)
+{
+       struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
+       struct bpf_core_relo_res cand_res, targ_res;
+       const struct btf_type *local_type;
+       const char *local_name;
+       __u32 local_id;
+       const char *spec_str;
+       int i, j, err;
+
+       local_id = relo->type_id;
+       local_type = btf__type_by_id(local_btf, local_id);
+       if (!local_type)
+               return -EINVAL;
+
+       local_name = btf__name_by_offset(local_btf, local_type->name_off);
+       if (!local_name)
+               return -EINVAL;
+
+       spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+       if (str_is_empty(spec_str))
+               return -EINVAL;
+
+       err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
+       if (err) {
+               pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+                       prog_name, relo_idx, local_id, btf_kind_str(local_type),
+                       str_is_empty(local_name) ? "<anon>" : local_name,
+                       spec_str, err);
+               return -EINVAL;
+       }
+
+       pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
+                relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+       bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+       libbpf_print(LIBBPF_DEBUG, "\n");
+
+       /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+       if (relo->kind == BPF_TYPE_ID_LOCAL) {
+               targ_res.validate = true;
+               targ_res.poison = false;
+               targ_res.orig_val = local_spec.root_type_id;
+               targ_res.new_val = local_spec.root_type_id;
+               goto patch_insn;
+       }
+
+       /* libbpf doesn't support candidate search for anonymous types */
+       if (str_is_empty(spec_str)) {
+               pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+                       prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+               return -EOPNOTSUPP;
+       }
+
+
+       for (i = 0, j = 0; i < cands->len; i++) {
+               err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
+                                         cands->cands[i].id, &cand_spec);
+               if (err < 0) {
+                       pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+                               prog_name, relo_idx, i);
+                       bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+                       libbpf_print(LIBBPF_WARN, ": %d\n", err);
+                       return err;
+               }
+
+               pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
+                        relo_idx, err == 0 ? "non-matching" : "matching", i);
+               bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+               libbpf_print(LIBBPF_DEBUG, "\n");
+
+               if (err == 0)
+                       continue;
+
+               err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+               if (err)
+                       return err;
+
+               if (j == 0) {
+                       targ_res = cand_res;
+                       targ_spec = cand_spec;
+               } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+                       /* if there are many field relo candidates, they
+                        * should all resolve to the same bit offset
+                        */
+                       pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+                               prog_name, relo_idx, cand_spec.bit_offset,
+                               targ_spec.bit_offset);
+                       return -EINVAL;
+               } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+                       /* all candidates should result in the same relocation
+                        * decision and value, otherwise it's dangerous to
+                        * proceed due to ambiguity
+                        */
+                       pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+                               prog_name, relo_idx,
+                               cand_res.poison ? "failure" : "success", cand_res.new_val,
+                               targ_res.poison ? "failure" : "success", targ_res.new_val);
+                       return -EINVAL;
+               }
+
+               cands->cands[j++] = cands->cands[i];
+       }
+
+       /*
+        * For BPF_FIELD_EXISTS relo or when used BPF program has field
+        * existence checks or kernel version/config checks, it's expected
+        * that we might not find any candidates. In this case, if field
+        * wasn't found in any candidate, the list of candidates shouldn't
+        * change at all, we'll just handle relocating appropriately,
+        * depending on relo's kind.
+        */
+       if (j > 0)
+               cands->len = j;
+
+       /*
+        * If no candidates were found, it might be both a programmer error,
+        * as well as expected case, depending whether instruction w/
+        * relocation is guarded in some way that makes it unreachable (dead
+        * code) if relocation can't be resolved. This is handled in
+        * bpf_core_patch_insn() uniformly by replacing that instruction with
+        * BPF helper call insn (using invalid helper ID). If that instruction
+        * is indeed unreachable, then it will be ignored and eliminated by
+        * verifier. If it was an error, then verifier will complain and point
+        * to a specific instruction number in its log.
+        */
+       if (j == 0) {
+               pr_debug("prog '%s': relo #%d: no matching targets found\n",
+                        prog_name, relo_idx);
+
+               /* calculate single target relo result explicitly */
+               err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res);
+               if (err)
+                       return err;
+       }
+
+patch_insn:
+       /* bpf_core_patch_insn() should know how to handle missing targ_spec */
+       err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res);
+       if (err) {
+               pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
+                       prog_name, relo_idx, relo->insn_off / 8, err);
+               return -EINVAL;
+       }
+
+       return 0;
+}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h

new file mode 100644 (file)

index 0000000..3b9f8f1
--- /dev/null
+++ b/tools/lib/bpf/relo_core.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __RELO_CORE_H
+#define __RELO_CORE_H
+
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
+ */
+enum bpf_core_relo_kind {
+       BPF_FIELD_BYTE_OFFSET = 0,      /* field byte offset */
+       BPF_FIELD_BYTE_SIZE = 1,        /* field size in bytes */
+       BPF_FIELD_EXISTS = 2,           /* field existence in target kernel */
+       BPF_FIELD_SIGNED = 3,           /* field signedness (0 - unsigned, 1 - signed) */
+       BPF_FIELD_LSHIFT_U64 = 4,       /* bitfield-specific left bitshift */
+       BPF_FIELD_RSHIFT_U64 = 5,       /* bitfield-specific right bitshift */
+       BPF_TYPE_ID_LOCAL = 6,          /* type ID in local BPF object */
+       BPF_TYPE_ID_TARGET = 7,         /* type ID in target kernel */
+       BPF_TYPE_EXISTS = 8,            /* type existence in target kernel */
+       BPF_TYPE_SIZE = 9,              /* type size in bytes */
+       BPF_ENUMVAL_EXISTS = 10,        /* enum value existence in target kernel */
+       BPF_ENUMVAL_VALUE = 11,         /* enum value integer value */
+};
+
+/* The minimum bpf_core_relo checked by the loader
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ *   its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ *   type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ *   interpretation depends on specific relocation kind:
+ *     - for field-based relocations, string encodes an accessed field using
+ *     a sequence of field and array indices, separated by colon (:). It's
+ *     conceptually very close to LLVM's getelementptr ([0]) instruction's
+ *     arguments for identifying offset to a field.
+ *     - for type-based relocations, strings is expected to be just "0";
+ *     - for enum value-based relocations, string contains an index of enum
+ *     value within its enum type;
+ *
+ * Example to provide a better feel.
+ *
+ *   struct sample {
+ *       int a;
+ *       struct {
+ *           int b[10];
+ *       };
+ *   };
+ *
+ *   struct sample *s = ...;
+ *   int x = &s->a;     // encoded as "0:0" (a is field #0)
+ *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1,
+ *                      // b is field #0 inside anon struct, accessing elem #5)
+ *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example  will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ *               __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+       __u32   insn_off;
+       __u32   type_id;
+       __u32   access_str_off;
+       enum bpf_core_relo_kind kind;
+};
+
+struct bpf_core_cand {
+       const struct btf *btf;
+       const struct btf_type *t;
+       const char *name;
+       __u32 id;
+};
+
+/* dynamically sized list of type IDs and its associated struct btf */
+struct bpf_core_cand_list {
+       struct bpf_core_cand *cands;
+       int len;
+};
+
+int bpf_core_apply_relo_insn(const char *prog_name,
+                            struct bpf_insn *insn, int insn_idx,
+                            const struct bpf_core_relo *relo, int relo_idx,
+                            const struct btf *local_btf,
+                            struct bpf_core_cand_list *cands);
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+                             const struct btf *targ_btf, __u32 targ_id);
+
+size_t bpf_core_essential_name_len(const char *name);
+#endif
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c

index cdecda1..996d025 100644 (file)
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -223,10 +223,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
                         free(info_linear);
                         return -1;
                 }
-               if (btf__get_from_id(info->btf_id, &btf)) {
+               btf = btf__load_from_kernel_by_id(info->btf_id);
+               if (libbpf_get_error(btf)) {
                         pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
                         err = -1;
-                       btf = NULL;
                         goto out;
                 }
                 perf_env__fetch_btf(env, info->btf_id, btf);
@@ -296,7 +296,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
  
  out:
         free(info_linear);
-       free(btf);
+       btf__free(btf);
         return err ? -1 : 0;
  }
  
@@ -478,7 +478,8 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
         if (btf_id == 0)
                 goto out;
  
-       if (btf__get_from_id(btf_id, &btf)) {
+       btf = btf__load_from_kernel_by_id(btf_id);
+       if (libbpf_get_error(btf)) {
                 pr_debug("%s: failed to get BTF of id %u, aborting\n",
                          __func__, btf_id);
                 goto out;
@@ -486,7 +487,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
         perf_env__fetch_btf(env, btf_id, btf);
  
  out:
-       free(btf);
+       btf__free(btf);
         close(fd);
  }
  
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c

index 8150e03..ba0f208 100644 (file)
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -64,8 +64,8 @@ static char *bpf_target_prog_name(int tgt_fd)
         struct bpf_prog_info_linear *info_linear;
         struct bpf_func_info *func_info;
         const struct btf_type *t;
+       struct btf *btf = NULL;
         char *name = NULL;
-       struct btf *btf;
  
         info_linear = bpf_program__get_prog_info_linear(
                 tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -74,12 +74,17 @@ static char *bpf_target_prog_name(int tgt_fd)
                 return NULL;
         }
  
-       if (info_linear->info.btf_id == 0 ||
-           btf__get_from_id(info_linear->info.btf_id, &btf)) {
+       if (info_linear->info.btf_id == 0) {
                 pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
                 goto out;
         }
  
+       btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+       if (libbpf_get_error(btf)) {
+               pr_debug("failed to load btf for prog FD %d\n", tgt_fd);
+               goto out;
+       }
+
         func_info = u64_to_ptr(info_linear->info.func_info);
         t = btf__type_by_id(btf, func_info[0].type_id);
         if (!t) {
@@ -89,6 +94,7 @@ static char *bpf_target_prog_name(int tgt_fd)
         }
         name = strdup(btf__name_by_offset(btf, t->name_off));
  out:
+       btf__free(btf);
         free(info_linear);
         return name;
  }
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c

index 22f8326..bc1f648 100644 (file)
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2434,6 +2434,22 @@ static int cs_etm__process_event(struct perf_session *session,
         return 0;
  }
  
+static void dump_queued_data(struct cs_etm_auxtrace *etm,
+                            struct perf_record_auxtrace *event)
+{
+       struct auxtrace_buffer *buf;
+       unsigned int i;
+       /*
+        * Find all buffers with same reference in the queues and dump them.
+        * This is because the queues can contain multiple entries of the same
+        * buffer that were split on aux records.
+        */
+       for (i = 0; i < etm->queues.nr_queues; ++i)
+               list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
+                       if (buf->reference == event->reference)
+                               cs_etm__dump_event(etm, buf);
+}
+
  static int cs_etm__process_auxtrace_event(struct perf_session *session,
                                           union perf_event *event,
                                           struct perf_tool *tool __maybe_unused)
@@ -2466,7 +2482,8 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
                                 cs_etm__dump_event(etm, buffer);
                                 auxtrace_buffer__put_data(buffer);
                         }
-       }
+       } else if (dump_trace)
+               dump_queued_data(etm, &event->auxtrace);
  
         return 0;
  }
@@ -3042,7 +3059,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
  
         if (dump_trace) {
                 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
-               return 0;
         }
  
         err = cs_etm__synth_events(etm, session);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c

index 72e7f36..8af693d 100644 (file)
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -192,8 +192,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
                         if (!(prot & PROT_EXEC))
                                 dso__set_loaded(dso);
                 }
-
-               nsinfo__put(dso->nsinfo);
                 dso->nsinfo = nsi;
  
                 if (build_id__is_defined(bid))
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c

index a1bd700..fc683bc 100644 (file)
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -742,9 +742,13 @@ struct pmu_events_map *__weak pmu_events_map__find(void)
         return perf_pmu__find_map(NULL);
  }
  
-static bool perf_pmu__valid_suffix(char *pmu_name, char *tok)
+/*
+ * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name
+ * to be valid.
+ */
+static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok)
  {
-       char *p;
+       const char *p;
  
         if (strncmp(pmu_name, tok, strlen(tok)))
                 return false;
@@ -753,12 +757,16 @@ static bool perf_pmu__valid_suffix(char *pmu_name, char *tok)
         if (*p == 0)
                 return true;
  
-       if (*p != '_')
-               return false;
+       if (*p == '_')
+               ++p;
  
-       ++p;
-       if (*p == 0 || !isdigit(*p))
-               return false;
+       /* Ensure we end in a number */
+       while (1) {
+               if (!isdigit(*p))
+                       return false;
+               if (*(++p) == 0)
+                       break;
+       }
  
         return true;
  }
@@ -789,12 +797,19 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
          *          match "socket" in "socketX_pmunameY" and then "pmuname" in
          *          "pmunameY".
          */
-       for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) {
+       while (1) {
+               char *next_tok = strtok_r(NULL, ",", &tmp);
+
                 name = strstr(name, tok);
-               if (!name || !perf_pmu__valid_suffix((char *)name, tok)) {
+               if (!name ||
+                   (!next_tok && !perf_pmu__valid_suffix(name, tok))) {
                         res = false;
                         goto out;
                 }
+               if (!next_tok)
+                       break;
+               tok = next_tok;
+               name += strlen(tok);
         }
  
         res = true;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile

index fb010a3..da9e8b6 100644 (file)
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -38,6 +38,7 @@ TARGETS += mount_setattr
  TARGETS += mqueue
  TARGETS += nci
  TARGETS += net
+TARGETS += net/af_unix
  TARGETS += net/forwarding
  TARGETS += net/mptcp
  TARGETS += netfilter
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore

index addcfd8..433f8be 100644 (file)
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -23,7 +23,6 @@ test_skb_cgroup_id_user
  test_cgroup_storage
  test_flow_dissector
  flow_dissector_load
-test_netcnt
  test_tcpnotify_user
  test_libbpf
  test_tcp_check_syncookie_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index f405b20..2a58b7b 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
         test_verifier_log test_dev_cgroup \
         test_sock test_sockmap get_cgroup_id_user \
         test_cgroup_storage \
-       test_netcnt test_tcpnotify_user test_sysctl \
+       test_tcpnotify_user test_sysctl \
         test_progs-no_alu32
  
  # Also test bpf-gcc, if present
@@ -197,7 +197,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c
  $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
  $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
  $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_netcnt: cgroup_helpers.c
  $(OUTPUT)/test_sock_fields: cgroup_helpers.c
  $(OUTPUT)/test_sysctl: cgroup_helpers.c
  
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst

index 8deec1c..9b17f28 100644 (file)
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -19,6 +19,13 @@ the CI. It builds the kernel (without overwriting your existing Kconfig), recomp
  bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and
  saves the resulting output (by default in ``~/.bpf_selftests``).
  
+Script dependencies:
+- clang (preferably built from sources, https://github.com/llvm/llvm-project);
+- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/);
+- qemu;
+- docutils (for ``rst2man``);
+- libcap-devel.
+
  For more information on about using the script, run:
  
  .. code-block:: console
diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h

index 81084c1..0ab1c88 100644 (file)
--- a/tools/testing/selftests/bpf/netcnt_common.h
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -6,19 +6,39 @@
  
  #define MAX_PERCPU_PACKETS 32
  
-struct percpu_net_cnt {
-       __u64 packets;
-       __u64 bytes;
+/* sizeof(struct bpf_local_storage_elem):
+ *
+ * It really is about 128 bytes on x86_64, but allocate more to account for
+ * possible layout changes, different architectures, etc.
+ * The kernel will wrap up to PAGE_SIZE internally anyway.
+ */
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM          256
  
-       __u64 prev_ts;
+/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE       (0xFFFF - \
+                                                SIZEOF_BPF_LOCAL_STORAGE_ELEM)
  
-       __u64 prev_packets;
-       __u64 prev_bytes;
+#define PCPU_MIN_UNIT_SIZE                     32768
+
+union percpu_net_cnt {
+       struct {
+               __u64 packets;
+               __u64 bytes;
+
+               __u64 prev_ts;
+
+               __u64 prev_packets;
+               __u64 prev_bytes;
+       };
+       __u8 data[PCPU_MIN_UNIT_SIZE];
  };
  
-struct net_cnt {
-       __u64 packets;
-       __u64 bytes;
+union net_cnt {
+       struct {
+               __u64 packets;
+               __u64 bytes;
+       };
+       __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE];
  };
  
  #endif
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c

index 2060bc1..d685768 100644 (file)
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -66,17 +66,13 @@ int settimeo(int fd, int timeout_ms)
  
  #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
  
-int start_server(int family, int type, const char *addr_str, __u16 port,
-                int timeout_ms)
+static int __start_server(int type, const struct sockaddr *addr,
+                         socklen_t addrlen, int timeout_ms, bool reuseport)
  {
-       struct sockaddr_storage addr = {};
-       socklen_t len;
+       int on = 1;
         int fd;
  
-       if (make_sockaddr(family, addr_str, port, &addr, &len))
-               return -1;
-
-       fd = socket(family, type, 0);
+       fd = socket(addr->sa_family, type, 0);
         if (fd < 0) {
                 log_err("Failed to create server socket");
                 return -1;
@@ -85,7 +81,13 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
         if (settimeo(fd, timeout_ms))
                 goto error_close;
  
-       if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+       if (reuseport &&
+           setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
+               log_err("Failed to set SO_REUSEPORT");
+               return -1;
+       }
+
+       if (bind(fd, addr, addrlen) < 0) {
                 log_err("Failed to bind socket");
                 goto error_close;
         }
@@ -104,6 +106,69 @@ error_close:
         return -1;
  }
  
+int start_server(int family, int type, const char *addr_str, __u16 port,
+                int timeout_ms)
+{
+       struct sockaddr_storage addr;
+       socklen_t addrlen;
+
+       if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+               return -1;
+
+       return __start_server(type, (struct sockaddr *)&addr,
+                             addrlen, timeout_ms, false);
+}
+
+int *start_reuseport_server(int family, int type, const char *addr_str,
+                           __u16 port, int timeout_ms, unsigned int nr_listens)
+{
+       struct sockaddr_storage addr;
+       unsigned int nr_fds = 0;
+       socklen_t addrlen;
+       int *fds;
+
+       if (!nr_listens)
+               return NULL;
+
+       if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+               return NULL;
+
+       fds = malloc(sizeof(*fds) * nr_listens);
+       if (!fds)
+               return NULL;
+
+       fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
+                               timeout_ms, true);
+       if (fds[0] == -1)
+               goto close_fds;
+       nr_fds = 1;
+
+       if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
+               goto close_fds;
+
+       for (; nr_fds < nr_listens; nr_fds++) {
+               fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
+                                            addrlen, timeout_ms, true);
+               if (fds[nr_fds] == -1)
+                       goto close_fds;
+       }
+
+       return fds;
+
+close_fds:
+       free_fds(fds, nr_fds);
+       return NULL;
+}
+
+void free_fds(int *fds, unsigned int nr_close_fds)
+{
+       if (fds) {
+               while (nr_close_fds)
+                       close(fds[--nr_close_fds]);
+               free(fds);
+       }
+}
+
  int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
                      int timeout_ms)
  {
@@ -217,6 +282,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
         if (family == AF_INET) {
                 struct sockaddr_in *sin = (void *)addr;
  
+               memset(addr, 0, sizeof(*sin));
                 sin->sin_family = AF_INET;
                 sin->sin_port = htons(port);
                 if (addr_str &&
@@ -230,6 +296,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
         } else if (family == AF_INET6) {
                 struct sockaddr_in6 *sin6 = (void *)addr;
  
+               memset(addr, 0, sizeof(*sin6));
                 sin6->sin6_family = AF_INET6;
                 sin6->sin6_port = htons(port);
                 if (addr_str &&
@@ -243,3 +310,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
         }
         return -1;
  }
+
+char *ping_command(int family)
+{
+       if (family == AF_INET6) {
+               /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
+               if (!system("which ping6 >/dev/null 2>&1"))
+                       return "ping6";
+               else
+                       return "ping -6";
+       }
+       return "ping";
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h

index 5e0d51c..c59a8f6 100644 (file)
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -36,11 +36,16 @@ extern struct ipv6_packet pkt_v6;
  int settimeo(int fd, int timeout_ms);
  int start_server(int family, int type, const char *addr, __u16 port,
                  int timeout_ms);
+int *start_reuseport_server(int family, int type, const char *addr_str,
+                           __u16 port, int timeout_ms,
+                           unsigned int nr_listens);
+void free_fds(int *fds, unsigned int nr_close_fds);
  int connect_to_fd(int server_fd, int timeout_ms);
  int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
  int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
                      int timeout_ms);
  int make_sockaddr(int family, const char *addr_str, __u16 port,
                   struct sockaddr_storage *addr, socklen_t *len);
+char *ping_command(int family);
  
  #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c

new file mode 100644 (file)

index 0000000..85babb0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+#include "bpf_iter_setsockopt.skel.h"
+
+static int create_netns(void)
+{
+       if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+               return -1;
+
+       if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+               return -1;
+
+       return 0;
+}
+
+static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
+{
+       unsigned int i;
+
+       for (i = 0; i < nr_fds; i++) {
+               if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
+                              sizeof("bpf_cubic")))
+                       return i;
+       }
+
+       return nr_fds;
+}
+
+static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
+{
+       char tcp_cc[16];
+       socklen_t optlen = sizeof(tcp_cc);
+       unsigned int i;
+
+       for (i = 0; i < nr_fds; i++) {
+               if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
+                              tcp_cc, &optlen) ||
+                   strcmp(tcp_cc, "bpf_dctcp"))
+                       return i;
+       }
+
+       return nr_fds;
+}
+
+static int *make_established(int listen_fd, unsigned int nr_est,
+                            int **paccepted_fds)
+{
+       int *est_fds, *accepted_fds;
+       unsigned int i;
+
+       est_fds = malloc(sizeof(*est_fds) * nr_est);
+       if (!est_fds)
+               return NULL;
+
+       accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
+       if (!accepted_fds) {
+               free(est_fds);
+               return NULL;
+       }
+
+       for (i = 0; i < nr_est; i++) {
+               est_fds[i] = connect_to_fd(listen_fd, 0);
+               if (est_fds[i] == -1)
+                       break;
+               if (set_bpf_cubic(&est_fds[i], 1) != 1) {
+                       close(est_fds[i]);
+                       break;
+               }
+
+               accepted_fds[i] = accept(listen_fd, NULL, 0);
+               if (accepted_fds[i] == -1) {
+                       close(est_fds[i]);
+                       break;
+               }
+       }
+
+       if (!ASSERT_EQ(i, nr_est, "create established fds")) {
+               free_fds(accepted_fds, i);
+               free_fds(est_fds, i);
+               return NULL;
+       }
+
+       *paccepted_fds = accepted_fds;
+       return est_fds;
+}
+
+static unsigned short get_local_port(int fd)
+{
+       struct sockaddr_in6 addr;
+       socklen_t addrlen = sizeof(addr);
+
+       if (!getsockname(fd, &addr, &addrlen))
+               return ntohs(addr.sin6_port);
+
+       return 0;
+}
+
+static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
+                                  bool random_retry)
+{
+       int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
+       unsigned int nr_reuse_listens = 256, nr_est = 256;
+       int err, iter_fd = -1, listen_fd = -1;
+       char buf;
+
+       /* Prepare non-reuseport listen_fd */
+       listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+       if (!ASSERT_GE(listen_fd, 0, "start_server"))
+               return;
+       if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
+                      "set listen_fd to cubic"))
+               goto done;
+       iter_skel->bss->listen_hport = get_local_port(listen_fd);
+       if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
+                       "get_local_port(listen_fd)"))
+               goto done;
+
+       /* Connect to non-reuseport listen_fd */
+       est_fds = make_established(listen_fd, nr_est, &accepted_fds);
+       if (!ASSERT_OK_PTR(est_fds, "create established"))
+               goto done;
+
+       /* Prepare reuseport listen fds */
+       reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
+                                                 "::1", 0, 0,
+                                                 nr_reuse_listens);
+       if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
+               goto done;
+       if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
+                      nr_reuse_listens, "set reuse_listen_fds to cubic"))
+               goto done;
+       iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
+       if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
+                       "get_local_port(reuse_listen_fds[0])"))
+               goto done;
+
+       /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
+       iter_skel->bss->random_retry = random_retry;
+       iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
+       if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
+               goto done;
+
+       while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+              errno == EAGAIN)
+               ;
+       if (!ASSERT_OK(err, "read iter error"))
+               goto done;
+
+       /* Check reuseport listen fds for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
+                 nr_reuse_listens,
+                 "check reuse_listen_fds dctcp");
+
+       /* Check non reuseport listen fd for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
+                 "check listen_fd dctcp");
+
+       /* Check established fds for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
+                 "check est_fds dctcp");
+
+       /* Check accepted fds for dctcp */
+       ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
+                 "check accepted_fds dctcp");
+
+done:
+       if (iter_fd != -1)
+               close(iter_fd);
+       if (listen_fd != -1)
+               close(listen_fd);
+       free_fds(reuse_listen_fds, nr_reuse_listens);
+       free_fds(accepted_fds, nr_est);
+       free_fds(est_fds, nr_est);
+}
+
+void test_bpf_iter_setsockopt(void)
+{
+       struct bpf_iter_setsockopt *iter_skel = NULL;
+       struct bpf_cubic *cubic_skel = NULL;
+       struct bpf_dctcp *dctcp_skel = NULL;
+       struct bpf_link *cubic_link = NULL;
+       struct bpf_link *dctcp_link = NULL;
+
+       if (create_netns())
+               return;
+
+       /* Load iter_skel */
+       iter_skel = bpf_iter_setsockopt__open_and_load();
+       if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
+               return;
+       iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
+       if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
+               goto done;
+
+       /* Load bpf_cubic */
+       cubic_skel = bpf_cubic__open_and_load();
+       if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
+               goto done;
+       cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+       if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
+               goto done;
+
+       /* Load bpf_dctcp */
+       dctcp_skel = bpf_dctcp__open_and_load();
+       if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+               goto done;
+       dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+       if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
+               goto done;
+
+       do_bpf_iter_setsockopt(iter_skel, true);
+       do_bpf_iter_setsockopt(iter_skel, false);
+
+done:
+       bpf_link__destroy(cubic_link);
+       bpf_link__destroy(dctcp_link);
+       bpf_cubic__destroy(cubic_skel);
+       bpf_dctcp__destroy(dctcp_skel);
+       bpf_iter_setsockopt__destroy(iter_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c

index 857e3f2..649f873 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4350,7 +4350,8 @@ static void do_test_file(unsigned int test_num)
                 goto done;
         }
  
-       err = btf__get_from_id(info.btf_id, &btf);
+       btf = btf__load_from_kernel_by_id(info.btf_id);
+       err = libbpf_get_error(btf);
         if (CHECK(err, "cannot get btf from kernel, err: %d", err))
                 goto done;
  
@@ -4386,6 +4387,7 @@ skip:
         fprintf(stderr, "OK");
  
  done:
+       btf__free(btf);
         free(func_info);
         bpf_object__close(obj);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c

index 1b90e68..52ccf0c 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -232,7 +232,593 @@ err_out:
         btf__free(btf);
  }
  
+#define STRSIZE                                4096
+
+static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)
+{
+       char *s = ctx, new[STRSIZE];
+
+       vsnprintf(new, STRSIZE, fmt, args);
+       if (strlen(s) < STRSIZE)
+               strncat(s, new, STRSIZE - strlen(s) - 1);
+}
+
+static int btf_dump_data(struct btf *btf, struct btf_dump *d,
+                        char *name, char *prefix, __u64 flags, void *ptr,
+                        size_t ptr_sz, char *str, const char *expected_val)
+{
+       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+       size_t type_sz;
+       __s32 type_id;
+       int ret = 0;
+
+       if (flags & BTF_F_COMPACT)
+               opts.compact = true;
+       if (flags & BTF_F_NONAME)
+               opts.skip_names = true;
+       if (flags & BTF_F_ZERO)
+               opts.emit_zeroes = true;
+       if (prefix) {
+               ASSERT_STRNEQ(name, prefix, strlen(prefix),
+                             "verify prefix match");
+               name += strlen(prefix) + 1;
+       }
+       type_id = btf__find_by_name(btf, name);
+       if (!ASSERT_GE(type_id, 0, "find type id"))
+               return -ENOENT;
+       type_sz = btf__resolve_size(btf, type_id);
+       str[0] = '\0';
+       ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts);
+       if (type_sz <= ptr_sz) {
+               if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+                       return -EINVAL;
+       } else {
+               if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+                       return -EINVAL;
+       }
+       if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+               return -EFAULT;
+       return 0;
+}
+
+#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags,       \
+                          _expected, ...)                              \
+       do {                                                            \
+               char __ptrtype[64] = #_type;                            \
+               char *_ptrtype = (char *)__ptrtype;                     \
+               _type _ptrdata = __VA_ARGS__;                           \
+               void *_ptr = &_ptrdata;                                 \
+                                                                       \
+               (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \
+                                    _ptr, sizeof(_type), _str,         \
+                                    _expected);                        \
+       } while (0)
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix,  _str, _type, _flags,    \
+                            ...)                                       \
+       TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags,        \
+                          "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
+
+/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
+#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz,        \
+                               _expected, ...)                         \
+       do {                                                            \
+               char __ptrtype[64] = #_type;                            \
+               char *_ptrtype = (char *)__ptrtype;                     \
+               _type _ptrdata = __VA_ARGS__;                           \
+               void *_ptr = &_ptrdata;                                 \
+                                                                       \
+               (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0,      \
+                                    _ptr, _type_sz, _str, _expected);  \
+       } while (0)
+
+#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags,  \
+                         _expected, ...)                               \
+       do {                                                            \
+               _type _ptrdata = __VA_ARGS__;                           \
+               void *_ptr = &_ptrdata;                                 \
+                                                                       \
+               (void) btf_dump_data(_b, _d, _var, _prefix, _flags,     \
+                                    _ptr, sizeof(_type), _str,         \
+                                    _expected);                        \
+       } while (0)
+
+static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
+                                  char *str)
+{
+#ifdef __SIZEOF_INT128__
+       __int128 i = 0xffffffffffffffff;
+
+       /* this dance is required because we cannot directly initialize
+        * a 128-bit value to anything larger than a 64-bit value.
+        */
+       i = (i << 64) | (i - 1);
+#endif
+       /* simple int */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+                          "1234", 1234);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234);
+
+       /* zero value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(int)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+                          "-4567", -4567);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567);
+
+       TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
+
+#ifdef __SIZEOF_INT128__
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT,
+                          "(__int128)0xffffffffffffffff",
+                          0xffffffffffffffff);
+       ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str,
+                               "(__int128)0xfffffffffffffffffffffffffffffffe"),
+                 "dump __int128");
+#endif
+}
+
+static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d,
+                                    char *str)
+{
+       float t1 = 1.234567;
+       float t2 = -1.234567;
+       float t3 = 0.0;
+       double t4 = 5.678912;
+       double t5 = -5.678912;
+       double t6 = 0.0;
+       long double t7 = 9.876543;
+       long double t8 = -9.876543;
+       long double t9 = 0.0;
+
+       /* since the kernel does not likely have any float types in its BTF, we
+        * will need to add some of various sizes.
+        */
+
+       ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float");
+       ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str,
+                               "(test_float)1.234567"), "dump float");
+       ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str,
+                               "(test_float)-1.234567"), "dump float");
+       ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str,
+                               "(test_float)0.000000"), "dump float");
+
+       ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str,
+                 "(test_double)5.678912"), "dump double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str,
+                 "(test_double)-5.678912"), "dump double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str,
+                               "(test_double)0.000000"), "dump double");
+
+       ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16,
+                               str, "(test_long_double)9.876543"),
+                               "dump long_double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16,
+                               str, "(test_long_double)-9.876543"),
+                               "dump long_double");
+       ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16,
+                               str, "(test_long_double)0.000000"),
+                               "dump long_double");
+}
+
+static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d,
+                                   char *str)
+{
+       /* simple char */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+                          "100", 100);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100);
+       /* zero value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT,
+                          "(char)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(char)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0);
+
+       TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100);
+}
+
+static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d,
+                                      char *str)
+{
+       /* simple typedef */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+                          "1", 1);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1);
+       /* zero value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(u64)0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "0", 0);
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0);
+
+       /* typedef struct */
+       TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT,
+                            {.counter = (int)1,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+                          "{1,}", { .counter = 1 });
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"      .counter = (int)1,\n"
+"}",
+                          {.counter = 1,});
+       /* typedef with 0 value should be printed at toplevel */
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}",
+                          {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+                          "{}", {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"}",
+                          {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO,
+                          "(atomic_t){.counter = (int)0,}",
+                          {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "{0,}", {.counter = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO,
+"(atomic_t){\n"
+"      .counter = (int)0,\n"
+"}",
+                          { .counter = 0,});
+
+       /* overflow should show type but not value since it overflows */
+       TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1,
+                               "(atomic_t){\n", { .counter = 1});
+}
+
+static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d,
+                                   char *str)
+{
+       /* enum where enum value does (and does not) exist */
+       TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+                            BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+                          "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "BPF_MAP_CREATE",
+                          BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+                          "(enum bpf_cmd)BPF_MAP_CREATE",
+                          BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "BPF_MAP_CREATE", 0);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_ZERO,
+                          "(enum bpf_cmd)BPF_MAP_CREATE",
+                          BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "BPF_MAP_CREATE", BPF_MAP_CREATE);
+       TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "2000", 2000);
+       TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+                          "(enum bpf_cmd)2000", 2000);
+
+       TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd,
+                               sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE);
+}
+
+static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
+                                     char *str)
+{
+       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+       char zero_data[512] = { };
+       char type_data[512];
+       void *fops = type_data;
+       void *skb = type_data;
+       size_t type_sz;
+       __s32 type_id;
+       char *cmpstr;
+       int ret;
+
+       memset(type_data, 255, sizeof(type_data));
+
+       /* simple struct */
+       TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+                            {.name_off = (__u32)3,.val = (__s32)-1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{3,-1,}",
+                          { .name_off = 3, .val = -1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"      .name_off = (__u32)3,\n"
+"      .val = (__s32)-1,\n"
+"}",
+                          { .name_off = 3, .val = -1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{-1,}",
+                          { .name_off = 0, .val = -1,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+                          "{0,-1,}",
+                          { .name_off = 0, .val = -1,});
+       /* empty struct should be printed */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+                          "(struct btf_enum){}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_COMPACT | BTF_F_ZERO,
+                          "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+                          { .name_off = 0, .val = 0,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+                          BTF_F_ZERO,
+"(struct btf_enum){\n"
+"      .name_off = (__u32)0,\n"
+"      .val = (__s32)0,\n"
+"}",
+                          { .name_off = 0, .val = 0,});
+
+       /* struct with pointers */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+                          "(struct list_head){.next = (struct list_head *)0x1,}",
+                          { .next = (struct list_head *)1 });
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"      .next = (struct list_head *)0x1,\n"
+"}",
+                          { .next = (struct list_head *)1 });
+       /* NULL pointer should not be displayed */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+                          "(struct list_head){}",
+                          { .next = (struct list_head *)0 });
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"}",
+                          { .next = (struct list_head *)0 });
+
+       /* struct with function pointers */
+       type_id = btf__find_by_name(btf, "file_operations");
+       if (ASSERT_GT(type_id, 0, "find type id")) {
+               type_sz = btf__resolve_size(btf, type_id);
+               str[0] = '\0';
+
+               ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts);
+               ASSERT_EQ(ret, type_sz,
+                         "unexpected return value dumping file_operations");
+               cmpstr =
+"(struct file_operations){\n"
+"      .owner = (struct module *)0xffffffffffffffff,\n"
+"      .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+
+               ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
+       }
+
+       /* struct with char array */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+                          "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}",
+                          { .name = "foo",});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{['f','o','o',],}",
+                          {.name = "foo",});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0,
+"(struct bpf_prog_info){\n"
+"      .name = (char[16])[\n"
+"              'f',\n"
+"              'o',\n"
+"              'o',\n"
+"      ],\n"
+"}",
+                          {.name = "foo",});
+       /* leading null char means do not display string */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+                          "(struct bpf_prog_info){}",
+                          {.name = {'\0', 'f', 'o', 'o'}});
+       /* handle non-printable characters */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+                          "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}",
+                          { .name = {1, 2, 3, 0}});
+
+       /* struct with non-char array */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+                          "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}",
+                          { .cb = {1, 2, 3, 4, 5,},});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{[1,2,3,4,5,],}",
+                          { .cb = { 1, 2, 3, 4, 5},});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+"      .cb = (__u32[5])[\n"
+"              1,\n"
+"              2,\n"
+"              3,\n"
+"              4,\n"
+"              5,\n"
+"      ],\n"
+"}",
+                          { .cb = { 1, 2, 3, 4, 5},});
+       /* For non-char, arrays, show non-zero values only */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+                          "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}",
+                          { .cb = { 0, 0, 1, 0, 0},});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+"      .cb = (__u32[5])[\n"
+"              0,\n"
+"              0,\n"
+"              1,\n"
+"              0,\n"
+"              0,\n"
+"      ],\n"
+"}",
+                          { .cb = { 0, 0, 1, 0, 0},});
+
+       /* struct with bitfields */
+       TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+               {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn,
+                          BTF_F_COMPACT | BTF_F_NONAME,
+                          "{1,0x2,0x3,4,5,}",
+                          { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+                            .imm = 5,});
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0,
+"(struct bpf_insn){\n"
+"      .code = (__u8)1,\n"
+"      .dst_reg = (__u8)0x2,\n"
+"      .src_reg = (__u8)0x3,\n"
+"      .off = (__s16)4,\n"
+"      .imm = (__s32)5,\n"
+"}",
+                          {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
+
+       /* zeroed bitfields should not be displayed */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+                          "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
+                          { .code = 0, .dst_reg = 1});
+
+       /* struct with enum bitfield */
+       type_id = btf__find_by_name(btf, "fs_context");
+       if (ASSERT_GT(type_id,  0, "find fs_context")) {
+               type_sz = btf__resolve_size(btf, type_id);
+               str[0] = '\0';
+
+               opts.emit_zeroes = true;
+               ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts);
+               ASSERT_EQ(ret, type_sz,
+                         "unexpected return value dumping fs_context");
+
+               ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL,
+                                 "bitfield value not present");
+       }
+
+       /* struct with nested anon union */
+       TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT,
+                          "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}",
+                          { .op = 1, .args = { 1, 2, 3, 4}});
+
+       /* union with nested struct */
+       TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
+                          "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}",
+                          { .map = { .map_fd = 1 }});
+
+       /* struct skb with nested structs/unions; because type output is so
+        * complex, we don't do a string comparison, just verify we return
+        * the type size as the amount of data displayed.
+        */
+       type_id = btf__find_by_name(btf, "sk_buff");
+       if (ASSERT_GT(type_id, 0, "find struct sk_buff")) {
+               type_sz = btf__resolve_size(btf, type_id);
+               str[0] = '\0';
+
+               ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts);
+               ASSERT_EQ(ret, type_sz,
+                         "unexpected return value dumping sk_buff");
+       }
+
+       /* overflow bpf_sock_ops struct with final element nonzero/zero.
+        * Regardless of the value of the final field, we don't have all the
+        * data we need to display it, so we should trigger an overflow.
+        * In other words oveflow checking should trump "is field zero?"
+        * checks because if we've overflowed, it shouldn't matter what the
+        * field is - we can't trust its value so shouldn't display it.
+        */
+       TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+                               sizeof(struct bpf_sock_ops) - 1,
+                               "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+                               { .op = 1, .skb_tcp_flags = 2});
+       TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+                               sizeof(struct bpf_sock_ops) - 1,
+                               "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+                               { .op = 1, .skb_tcp_flags = 0});
+}
+
+static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
+                                  char *str)
+{
+       TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
+                         "int cpu_number = (int)100", 100);
+       TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT,
+                         "static int cpu_profile_flip = (int)2", 2);
+}
+
+static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
+                            const char *name, const char *expected_val,
+                            void *data, size_t data_sz)
+{
+       DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+       int ret = 0, cmp;
+       size_t secsize;
+       __s32 type_id;
+
+       opts.compact = true;
+
+       type_id = btf__find_by_name(btf, name);
+       if (!ASSERT_GT(type_id, 0, "find type id"))
+               return;
+
+       secsize = btf__resolve_size(btf, type_id);
+       ASSERT_EQ(secsize,  0, "verify section size");
+
+       str[0] = '\0';
+       ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts);
+       ASSERT_EQ(ret, 0, "unexpected return value");
+
+       cmp = strcmp(str, expected_val);
+       ASSERT_EQ(cmp, 0, "ensure expected/actual match");
+}
+
+static void test_btf_dump_datasec_data(char *str)
+{
+       struct btf *btf = btf__parse("xdping_kern.o", NULL);
+       struct btf_dump_opts opts = { .ctx = str };
+       char license[4] = "GPL";
+       struct btf_dump *d;
+
+       if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found"))
+               return;
+
+       d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+       if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+               return;
+
+       test_btf_datasec(btf, d, str, "license",
+                        "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
+                        license, sizeof(license));
+}
+
  void test_btf_dump() {
+       char str[STRSIZE];
+       struct btf_dump_opts opts = { .ctx = str };
+       struct btf_dump *d;
+       struct btf *btf;
         int i;
  
         for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
@@ -245,4 +831,33 @@ void test_btf_dump() {
         }
         if (test__start_subtest("btf_dump: incremental"))
                 test_btf_dump_incremental();
+
+       btf = libbpf_find_kernel_btf();
+       if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
+               return;
+
+       d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+       if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+               return;
+
+       /* Verify type display for various types. */
+       if (test__start_subtest("btf_dump: int_data"))
+               test_btf_dump_int_data(btf, d, str);
+       if (test__start_subtest("btf_dump: float_data"))
+               test_btf_dump_float_data(btf, d, str);
+       if (test__start_subtest("btf_dump: char_data"))
+               test_btf_dump_char_data(btf, d, str);
+       if (test__start_subtest("btf_dump: typedef_data"))
+               test_btf_dump_typedef_data(btf, d, str);
+       if (test__start_subtest("btf_dump: enum_data"))
+               test_btf_dump_enum_data(btf, d, str);
+       if (test__start_subtest("btf_dump: struct_data"))
+               test_btf_dump_struct_data(btf, d, str);
+       if (test__start_subtest("btf_dump: var_data"))
+               test_btf_dump_var_data(btf, d, str);
+       btf_dump__free(d);
+       btf__free(btf);
+
+       if (test__start_subtest("btf_dump: datasec_data"))
+               test_btf_dump_datasec_data(str);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c

index 981c251..3d4b2a3 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -53,8 +53,8 @@ void test_core_autosize(void)
         char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
         int err, fd = -1, zero = 0;
         int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
         struct test_core_autosize* skel = NULL;
-       struct bpf_object_load_attr load_attr = {};
         struct bpf_program *prog;
         struct bpf_map *bss_map;
         struct btf *btf = NULL;
@@ -125,9 +125,10 @@ void test_core_autosize(void)
         fd = -1;
  
         /* open and load BPF program with custom BTF as the kernel BTF */
-       skel = test_core_autosize__open();
+       open_opts.btf_custom_path = btf_file;
+       skel = test_core_autosize__open_opts(&open_opts);
         if (!ASSERT_OK_PTR(skel, "skel_open"))
-               return;
+               goto cleanup;
  
         /* disable handle_signed() for now */
         prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
@@ -135,9 +136,7 @@ void test_core_autosize(void)
                 goto cleanup;
         bpf_program__set_autoload(prog, false);
  
-       load_attr.obj = skel->obj;
-       load_attr.target_btf_path = btf_file;
-       err = bpf_object__load_xattr(&load_attr);
+       err = bpf_object__load(skel->obj);
         if (!ASSERT_OK(err, "prog_load"))
                 goto cleanup;
  
@@ -204,14 +203,13 @@ void test_core_autosize(void)
         skel = NULL;
  
         /* now re-load with handle_signed() enabled, it should fail loading */
-       skel = test_core_autosize__open();
+       open_opts.btf_custom_path = btf_file;
+       skel = test_core_autosize__open_opts(&open_opts);
         if (!ASSERT_OK_PTR(skel, "skel_open"))
-               return;
+               goto cleanup;
  
-       load_attr.obj = skel->obj;
-       load_attr.target_btf_path = btf_file;
-       err = bpf_object__load_xattr(&load_attr);
-       if (!ASSERT_ERR(err, "bad_prog_load"))
+       err = test_core_autosize__load(skel);
+       if (!ASSERT_ERR(err, "skel_load"))
                 goto cleanup;
  
  cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c

index d02e064..4739b15 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -816,7 +816,7 @@ static size_t roundup_page(size_t sz)
  void test_core_reloc(void)
  {
         const size_t mmap_sz = roundup_page(sizeof(struct data));
-       struct bpf_object_load_attr load_attr = {};
+       DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
         struct core_reloc_test_case *test_case;
         const char *tp_name, *probe_name;
         int err, i, equal;
@@ -846,9 +846,16 @@ void test_core_reloc(void)
                                 continue;
                 }
  
-               obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
+               if (test_case->btf_src_file) {
+                       err = access(test_case->btf_src_file, R_OK);
+                       if (!ASSERT_OK(err, "btf_src_file"))
+                               goto cleanup;
+               }
+
+               open_opts.btf_custom_path = test_case->btf_src_file;
+               obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts);
                 if (!ASSERT_OK_PTR(obj, "obj_open"))
-                       continue;
+                       goto cleanup;
  
                 probe_name = "raw_tracepoint/sys_enter";
                 tp_name = "sys_enter";
@@ -862,17 +869,7 @@ void test_core_reloc(void)
                           "prog '%s' not found\n", probe_name))
                         goto cleanup;
  
-
-               if (test_case->btf_src_file) {
-                       err = access(test_case->btf_src_file, R_OK);
-                       if (!ASSERT_OK(err, "btf_src_file"))
-                               goto cleanup;
-               }
-
-               load_attr.obj = obj;
-               load_attr.log_level = 0;
-               load_attr.target_btf_path = test_case->btf_src_file;
-               err = bpf_object__load_xattr(&load_attr);
+               err = bpf_object__load(obj);
                 if (err) {
                         if (!test_case->fails)
                                 ASSERT_OK(err, "obj_load");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c

index 088b365..02a465f 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -17,6 +17,7 @@ void test_get_func_ip_test(void)
          */
  #ifndef __x86_64__
         bpf_program__set_autoload(skel->progs.test6, false);
+       bpf_program__set_autoload(skel->progs.test7, false);
  #endif
  
         err = get_func_ip_test__load(skel);
@@ -46,6 +47,7 @@ void test_get_func_ip_test(void)
         ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
  #ifdef __x86_64__
         ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+       ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
  #endif
  
  cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c

new file mode 100644 (file)

index 0000000..6ede48b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "netcnt_prog.skel.h"
+#include "netcnt_common.h"
+
+#define CG_NAME "/netcnt"
+
+void test_netcnt(void)
+{
+       union percpu_net_cnt *percpu_netcnt = NULL;
+       struct bpf_cgroup_storage_key key;
+       int map_fd, percpu_map_fd;
+       struct netcnt_prog *skel;
+       unsigned long packets;
+       union net_cnt netcnt;
+       unsigned long bytes;
+       int cpu, nproc;
+       int cg_fd = -1;
+       char cmd[128];
+
+       skel = netcnt_prog__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
+               return;
+
+       nproc = get_nprocs_conf();
+       percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+       if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
+               goto err;
+
+       cg_fd = test__join_cgroup(CG_NAME);
+       if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
+               goto err;
+
+       skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
+       if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
+                          "attach_cgroup(bpf_nextcnt)"))
+               goto err;
+
+       snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
+       ASSERT_OK(system(cmd), cmd);
+
+       map_fd = bpf_map__fd(skel->maps.netcnt);
+       if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
+               goto err;
+
+       if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
+               goto err;
+
+       percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
+       if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
+                      "bpf_map_lookup_elem(percpu_netcnt)"))
+               goto err;
+
+       /* Some packets can be still in per-cpu cache, but not more than
+        * MAX_PERCPU_PACKETS.
+        */
+       packets = netcnt.packets;
+       bytes = netcnt.bytes;
+       for (cpu = 0; cpu < nproc; cpu++) {
+               ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
+
+               packets += percpu_netcnt[cpu].packets;
+               bytes += percpu_netcnt[cpu].bytes;
+       }
+
+       /* No packets should be lost */
+       ASSERT_EQ(packets, 10000, "packets");
+
+       /* Let's check that bytes counter matches the number of packets
+        * multiplied by the size of ipv6 ICMP packet.
+        */
+       ASSERT_EQ(bytes, packets * 104, "bytes");
+
+err:
+       if (cg_fd != -1)
+               close(cg_fd);
+       free(percpu_netcnt);
+       netcnt_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c

index fcf54b3..d4b953a 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -125,6 +125,10 @@ void test_pinning(void)
         if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
                 goto out;
  
+       /* get pinning path */
+       if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path"))
+               goto out;
+
         /* set pinning path of other map and re-pin all */
         map = bpf_object__find_map_by_name(obj, "nopinmap");
         if (CHECK(!map, "find map", "NULL map"))
@@ -134,6 +138,11 @@ void test_pinning(void)
         if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
                 goto out;
  
+       /* get pinning path after set */
+       if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath,
+                         "get pin path after set"))
+               goto out;
+
         /* should only pin the one unpinned map */
         err = bpf_object__pin_maps(obj, NULL);
         if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c

index de26881..4e91f4d 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -34,8 +34,8 @@ void test_reference_tracking(void)
                 if (!test__start_subtest(title))
                         continue;
  
-               /* Expect verifier failure if test name has 'fail' */
-               if (strstr(title, "fail") != NULL) {
+               /* Expect verifier failure if test name has 'err' */
+               if (strstr(title, "err_") != NULL) {
                         libbpf_print_fn_t old_print_fn;
  
                         old_print_fn = libbpf_set_print(NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c

index 5703c91..e7201ba 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -13,15 +13,16 @@
  #define _GNU_SOURCE
  
  #include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
  #include <linux/limits.h>
  #include <linux/sysctl.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
  #include <sched.h>
  #include <stdbool.h>
  #include <stdio.h>
-#include <sys/stat.h>
  #include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
  
  #include "test_progs.h"
  #include "network_helpers.h"
@@ -391,9 +392,7 @@ done:
  
  static int test_ping(int family, const char *addr)
  {
-       const char *ping = family == AF_INET6 ? "ping6" : "ping";
-
-       SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
+       SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
         return 0;
  fail:
         return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c

new file mode 100644 (file)

index 0000000..6b186b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/**
+ * Test XDP bonding support
+ *
+ * Sets up two bonded veth pairs between two fresh namespaces
+ * and verifies that XDP_TX program loaded on a bond device
+ * are correctly loaded onto the slave devices and XDP_TX'd
+ * packets are balanced using bonding.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include <linux/if_bonding.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+
+#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
+#define BOND1_MAC_STR "00:11:22:33:44:55"
+#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
+#define BOND2_MAC_STR "00:22:33:44:55:66"
+#define NPACKETS 100
+
+static int root_netns_fd = -1;
+
+static void restore_root_netns(void)
+{
+       ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
+}
+
+static int setns_by_name(char *name)
+{
+       int nsfd, err;
+       char nspath[PATH_MAX];
+
+       snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+       nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+       if (nsfd < 0)
+               return -1;
+
+       err = setns(nsfd, CLONE_NEWNET);
+       close(nsfd);
+       return err;
+}
+
+static int get_rx_packets(const char *iface)
+{
+       FILE *f;
+       char line[512];
+       int iface_len = strlen(iface);
+
+       f = fopen("/proc/net/dev", "r");
+       if (!f)
+               return -1;
+
+       while (fgets(line, sizeof(line), f)) {
+               char *p = line;
+
+               while (*p == ' ')
+                       p++; /* skip whitespace */
+               if (!strncmp(p, iface, iface_len)) {
+                       p += iface_len;
+                       if (*p++ != ':')
+                               continue;
+                       while (*p == ' ')
+                               p++; /* skip whitespace */
+                       while (*p && *p != ' ')
+                               p++; /* skip rx bytes */
+                       while (*p == ' ')
+                               p++; /* skip whitespace */
+                       fclose(f);
+                       return atoi(p);
+               }
+       }
+       fclose(f);
+       return -1;
+}
+
+#define MAX_BPF_LINKS 8
+
+struct skeletons {
+       struct xdp_dummy *xdp_dummy;
+       struct xdp_tx *xdp_tx;
+       struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+
+       int nlinks;
+       struct bpf_link *links[MAX_BPF_LINKS];
+};
+
+static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
+{
+       struct bpf_link *link;
+       int ifindex;
+
+       ifindex = if_nametoindex(iface);
+       if (!ASSERT_GT(ifindex, 0, "get ifindex"))
+               return -1;
+
+       if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
+               return -1;
+
+       link = bpf_program__attach_xdp(prog, ifindex);
+       if (!ASSERT_OK_PTR(link, "attach xdp program"))
+               return -1;
+
+       skeletons->links[skeletons->nlinks++] = link;
+       return 0;
+}
+
+enum {
+       BOND_ONE_NO_ATTACH = 0,
+       BOND_BOTH_AND_ATTACH,
+};
+
+static const char * const mode_names[] = {
+       [BOND_MODE_ROUNDROBIN]   = "balance-rr",
+       [BOND_MODE_ACTIVEBACKUP] = "active-backup",
+       [BOND_MODE_XOR]          = "balance-xor",
+       [BOND_MODE_BROADCAST]    = "broadcast",
+       [BOND_MODE_8023AD]       = "802.3ad",
+       [BOND_MODE_TLB]          = "balance-tlb",
+       [BOND_MODE_ALB]          = "balance-alb",
+};
+
+static const char * const xmit_policy_names[] = {
+       [BOND_XMIT_POLICY_LAYER2]       = "layer2",
+       [BOND_XMIT_POLICY_LAYER34]      = "layer3+4",
+       [BOND_XMIT_POLICY_LAYER23]      = "layer2+3",
+       [BOND_XMIT_POLICY_ENCAP23]      = "encap2+3",
+       [BOND_XMIT_POLICY_ENCAP34]      = "encap3+4",
+};
+
+static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
+                        int bond_both_attach)
+{
+#define SYS(fmt, ...)                                          \
+       ({                                                      \
+               char cmd[1024];                                 \
+               snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+               if (!ASSERT_OK(system(cmd), cmd))               \
+                       return -1;                              \
+       })
+
+       SYS("ip netns add ns_dst");
+       SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+       SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+       SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
+           mode_names[mode], xmit_policy_names[xmit_policy]);
+       SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+       SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+           mode_names[mode], xmit_policy_names[xmit_policy]);
+       SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+
+       SYS("ip link set veth1_1 master bond1");
+       if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+               SYS("ip link set veth1_2 master bond1");
+       } else {
+               SYS("ip link set veth1_2 up addrgenmode none");
+
+               if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
+                       return -1;
+       }
+
+       SYS("ip -netns ns_dst link set veth2_1 master bond2");
+
+       if (bond_both_attach == BOND_BOTH_AND_ATTACH)
+               SYS("ip -netns ns_dst link set veth2_2 master bond2");
+       else
+               SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
+
+       /* Load a dummy program on sending side as with veth peer needs to have a
+        * XDP program loaded as well.
+        */
+       if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
+               return -1;
+
+       if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+               if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
+                       return -1;
+
+               if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
+                       return -1;
+
+               restore_root_netns();
+       }
+
+       return 0;
+
+#undef SYS
+}
+
+static void bonding_cleanup(struct skeletons *skeletons)
+{
+       restore_root_netns();
+       while (skeletons->nlinks) {
+               skeletons->nlinks--;
+               bpf_link__destroy(skeletons->links[skeletons->nlinks]);
+       }
+       ASSERT_OK(system("ip link delete bond1"), "delete bond1");
+       ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
+       ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
+       ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
+}
+
+static int send_udp_packets(int vary_dst_ip)
+{
+       struct ethhdr eh = {
+               .h_source = BOND1_MAC,
+               .h_dest = BOND2_MAC,
+               .h_proto = htons(ETH_P_IP),
+       };
+       uint8_t buf[128] = {};
+       struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
+       struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
+       int i, s = -1;
+       int ifindex;
+
+       s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+       if (!ASSERT_GE(s, 0, "socket"))
+               goto err;
+
+       ifindex = if_nametoindex("bond1");
+       if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
+               goto err;
+
+       memcpy(buf, &eh, sizeof(eh));
+       iph->ihl = 5;
+       iph->version = 4;
+       iph->tos = 16;
+       iph->id = 1;
+       iph->ttl = 64;
+       iph->protocol = IPPROTO_UDP;
+       iph->saddr = 1;
+       iph->daddr = 2;
+       iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
+       iph->check = 0;
+
+       for (i = 1; i <= NPACKETS; i++) {
+               int n;
+               struct sockaddr_ll saddr_ll = {
+                       .sll_ifindex = ifindex,
+                       .sll_halen = ETH_ALEN,
+                       .sll_addr = BOND2_MAC,
+               };
+
+               /* vary the UDP destination port for even distribution with roundrobin/xor modes */
+               uh->dest++;
+
+               if (vary_dst_ip)
+                       iph->daddr++;
+
+               n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
+               if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
+                       goto err;
+       }
+
+       return 0;
+
+err:
+       if (s >= 0)
+               close(s);
+       return -1;
+}
+
+static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
+{
+       int bond1_rx;
+
+       if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
+               goto out;
+
+       if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
+               goto out;
+
+       bond1_rx = get_rx_packets("bond1");
+       ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
+
+       switch (mode) {
+       case BOND_MODE_ROUNDROBIN:
+       case BOND_MODE_XOR: {
+               int veth1_rx = get_rx_packets("veth1_1");
+               int veth2_rx = get_rx_packets("veth1_2");
+               int diff = abs(veth1_rx - veth2_rx);
+
+               ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
+
+               switch (xmit_policy) {
+               case BOND_XMIT_POLICY_LAYER2:
+                       ASSERT_GE(diff, NPACKETS,
+                                 "expected packets on only one of the interfaces");
+                       break;
+               case BOND_XMIT_POLICY_LAYER23:
+               case BOND_XMIT_POLICY_LAYER34:
+                       ASSERT_LT(diff, NPACKETS/2,
+                                 "expected even distribution of packets");
+                       break;
+               default:
+                       PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+                       break;
+               }
+               break;
+       }
+       case BOND_MODE_ACTIVEBACKUP: {
+               int veth1_rx = get_rx_packets("veth1_1");
+               int veth2_rx = get_rx_packets("veth1_2");
+               int diff = abs(veth1_rx - veth2_rx);
+
+               ASSERT_GE(diff, NPACKETS,
+                         "expected packets on only one of the interfaces");
+               break;
+       }
+       default:
+               PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+               break;
+       }
+
+out:
+       bonding_cleanup(skeletons);
+}
+
+/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
+ * all the interfaces to it and checking that broadcasting won't send the packet
+ * to neither the ingress bond device (bond2) or its slave (veth2_1).
+ */
+static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
+{
+       static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
+       int veth1_1_rx, veth1_2_rx;
+       int err;
+
+       if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
+                         BOND_ONE_NO_ATTACH))
+               goto out;
+
+
+       if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
+               goto out;
+
+       /* populate the devmap with the relevant interfaces */
+       for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
+               int ifindex = if_nametoindex(ifaces[i]);
+               int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
+
+               if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
+                       goto out;
+
+               err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
+               if (!ASSERT_OK(err, "add interface to map_all"))
+                       goto out;
+       }
+
+       if (xdp_attach(skeletons,
+                      skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
+                      "bond2"))
+               goto out;
+
+       restore_root_netns();
+
+       if (send_udp_packets(BOND_MODE_ROUNDROBIN))
+               goto out;
+
+       veth1_1_rx = get_rx_packets("veth1_1");
+       veth1_2_rx = get_rx_packets("veth1_2");
+
+       ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
+       ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
+
+out:
+       restore_root_netns();
+       bonding_cleanup(skeletons);
+}
+
+/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
+static void test_xdp_bonding_attach(struct skeletons *skeletons)
+{
+       struct bpf_link *link = NULL;
+       struct bpf_link *link2 = NULL;
+       int veth, bond;
+       int err;
+
+       if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
+               goto out;
+       if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+               goto out;
+
+       veth = if_nametoindex("veth");
+       if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
+               goto out;
+       bond = if_nametoindex("bond");
+       if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+               goto out;
+
+       /* enslaving with a XDP program loaded fails */
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+       if (!ASSERT_OK_PTR(link, "attach program to veth"))
+               goto out;
+
+       err = system("ip link set veth master bond");
+       if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail"))
+               goto out;
+
+       bpf_link__destroy(link);
+       link = NULL;
+
+       err = system("ip link set veth master bond");
+       if (!ASSERT_OK(err, "set veth master"))
+               goto out;
+
+       /* attaching to slave when master has no program is allowed */
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+       if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
+               goto out;
+
+       /* attaching to master not allowed when slave has program loaded */
+       link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+       if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
+               goto out;
+
+       bpf_link__destroy(link);
+       link = NULL;
+
+       /* attaching XDP program to master allowed when slave has no program */
+       link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+       if (!ASSERT_OK_PTR(link, "attach program to master"))
+               goto out;
+
+       /* attaching to slave not allowed when master has program loaded */
+       link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+       ASSERT_ERR_PTR(link2, "attach program to slave when master has program");
+
+out:
+       bpf_link__destroy(link);
+       bpf_link__destroy(link2);
+
+       system("ip link del veth");
+       system("ip link del bond");
+}
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+                             const char *format, va_list args)
+{
+       if (level != LIBBPF_WARN)
+               vprintf(format, args);
+       return 0;
+}
+
+struct bond_test_case {
+       char *name;
+       int mode;
+       int xmit_policy;
+};
+
+static struct bond_test_case bond_test_cases[] = {
+       { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
+       { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
+
+       { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
+       { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
+       { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
+};
+
+void test_xdp_bonding(void)
+{
+       libbpf_print_fn_t old_print_fn;
+       struct skeletons skeletons = {};
+       int i;
+
+       old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+       root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+       if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+               goto out;
+
+       skeletons.xdp_dummy = xdp_dummy__open_and_load();
+       if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+               goto out;
+
+       skeletons.xdp_tx = xdp_tx__open_and_load();
+       if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+               goto out;
+
+       skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+       if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
+                          "xdp_redirect_multi_kern__open_and_load"))
+               goto out;
+
+       if (!test__start_subtest("xdp_bonding_attach"))
+               test_xdp_bonding_attach(&skeletons);
+
+       for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
+               struct bond_test_case *test_case = &bond_test_cases[i];
+
+               if (!test__start_subtest(test_case->name))
+                       test_xdp_bonding_with_mode(
+                               &skeletons,
+                               test_case->mode,
+                               test_case->xmit_policy);
+       }
+
+       if (!test__start_subtest("xdp_bonding_redirect_multi"))
+               test_xdp_bonding_redirect_multi(&skeletons);
+
+out:
+       xdp_dummy__destroy(skeletons.xdp_dummy);
+       xdp_tx__destroy(skeletons.xdp_tx);
+       xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
+
+       libbpf_set_print(old_print_fn);
+       if (root_netns_fd >= 0)
+               close(root_netns_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c

new file mode 100644 (file)

index 0000000..b77adfd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define bpf_tcp_sk(skc)        ({                              \
+       struct sock_common *_skc = skc;                 \
+       sk = NULL;                                      \
+       tp = NULL;                                      \
+       if (_skc) {                                     \
+               tp = bpf_skc_to_tcp_sock(_skc);         \
+               sk = (struct sock *)tp;                 \
+       }                                               \
+       tp;                                             \
+})
+
+unsigned short reuse_listen_hport = 0;
+unsigned short listen_hport = 0;
+char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
+bool random_retry = false;
+
+static bool tcp_cc_eq(const char *a, const char *b)
+{
+       int i;
+
+       for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+               if (a[i] != b[i])
+                       return false;
+               if (!a[i])
+                       break;
+       }
+
+       return true;
+}
+
+SEC("iter/tcp")
+int change_tcp_cc(struct bpf_iter__tcp *ctx)
+{
+       char cur_cc[TCP_CA_NAME_MAX];
+       struct tcp_sock *tp;
+       struct sock *sk;
+       int ret;
+
+       if (!bpf_tcp_sk(ctx->sk_common))
+               return 0;
+
+       if (sk->sk_family != AF_INET6 ||
+           (sk->sk_state != TCP_LISTEN &&
+            sk->sk_state != TCP_ESTABLISHED) ||
+           (sk->sk_num != reuse_listen_hport &&
+            sk->sk_num != listen_hport &&
+            bpf_ntohs(sk->sk_dport) != listen_hport))
+               return 0;
+
+       if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
+                          cur_cc, sizeof(cur_cc)))
+               return 0;
+
+       if (!tcp_cc_eq(cur_cc, cubic_cc))
+               return 0;
+
+       if (random_retry && bpf_get_prandom_u32() % 4 == 1)
+               return 1;
+
+       bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c

index 2e4775c..92267ab 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
         }
  
         BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
-                      seq_num, src, srcp, destp, destp);
+                      seq_num, src, srcp, dest, destp);
         BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
                        state,
                        tp->write_seq - tp->snd_una, rx_queue,
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h

index 0137891..3af0998 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,10 @@
  #define AF_INET                        2
  #define AF_INET6               10
  
+#define SOL_TCP                        6
+#define TCP_CONGESTION         13
+#define TCP_CA_NAME_MAX                16
+
  #define ICSK_TIME_RETRANS      1
  #define ICSK_TIME_PROBE0       3
  #define ICSK_TIME_LOSS_PROBE   5
@@ -32,6 +36,8 @@
  #define ir_v6_rmt_addr         req.__req_common.skc_v6_daddr
  #define ir_v6_loc_addr         req.__req_common.skc_v6_rcv_saddr
  
+#define sk_num                 __sk_common.skc_num
+#define sk_dport               __sk_common.skc_dport
  #define sk_family              __sk_common.skc_family
  #define sk_rmem_alloc          sk_backlog.rmem_alloc
  #define sk_refcnt              __sk_common.skc_refcnt
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c

index acd587b..a587aec 100644 (file)
--- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -11,6 +11,7 @@ extern const void bpf_fentry_test3 __ksym;
  extern const void bpf_fentry_test4 __ksym;
  extern const void bpf_modify_return_test __ksym;
  extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
  
  __u64 test1_result = 0;
  SEC("fentry/bpf_fentry_test1")
@@ -71,3 +72,13 @@ int test6(struct pt_regs *ctx)
         test6_result = (const void *) addr == &bpf_fentry_test6 + 5;
         return 0;
  }
+
+__u64 test7_result = 0;
+SEC("kprobe/bpf_fentry_test7+5")
+int test7(struct pt_regs *ctx)
+{
+       __u64 addr = bpf_get_func_ip(ctx);
+
+       test7_result = (const void *) addr == &bpf_fentry_test7 + 5;
+       return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c

index d071adf..43649bc 100644 (file)
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -13,21 +13,21 @@
  struct {
         __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
         __type(key, struct bpf_cgroup_storage_key);
-       __type(value, struct percpu_net_cnt);
+       __type(value, union percpu_net_cnt);
  } percpu_netcnt SEC(".maps");
  
  struct {
         __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
         __type(key, struct bpf_cgroup_storage_key);
-       __type(value, struct net_cnt);
+       __type(value, union net_cnt);
  } netcnt SEC(".maps");
  
  SEC("cgroup/skb")
  int bpf_nextcnt(struct __sk_buff *skb)
  {
-       struct percpu_net_cnt *percpu_cnt;
+       union percpu_net_cnt *percpu_cnt;
         char fmt[] = "%d %llu %llu\n";
-       struct net_cnt *cnt;
+       union net_cnt *cnt;
         __u64 ts, dt;
         int ret;
  
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c

new file mode 100644 (file)

index 0000000..703c08e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Isovalent, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, __u32);
+       __type(value, int);
+       __uint(max_entries, 4);
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+       __uint(max_entries, 0); /* This will make map creation to fail */
+       __uint(key_size, sizeof(__u32));
+       __array(values, struct inner);
+} mim SEC(".maps");
+
+SEC("xdp")
+int xdp_noop0(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c

index e83d0b4..8249075 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
         return 0;
  }
  
-SEC("classifier/fail_use_after_free")
+SEC("classifier/err_use_after_free")
  int bpf_sk_lookup_uaf(struct __sk_buff *skb)
  {
         struct bpf_sock_tuple tuple = {};
@@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
         return family;
  }
  
-SEC("classifier/fail_modify_sk_pointer")
+SEC("classifier/err_modify_sk_pointer")
  int bpf_sk_lookup_modptr(struct __sk_buff *skb)
  {
         struct bpf_sock_tuple tuple = {};
@@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
         return 0;
  }
  
-SEC("classifier/fail_modify_sk_or_null_pointer")
+SEC("classifier/err_modify_sk_or_null_pointer")
  int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
  {
         struct bpf_sock_tuple tuple = {};
@@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
         return 0;
  }
  
-SEC("classifier/fail_no_release")
+SEC("classifier/err_no_release")
  int bpf_sk_lookup_test2(struct __sk_buff *skb)
  {
         struct bpf_sock_tuple tuple = {};
@@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
         return 0;
  }
  
-SEC("classifier/fail_release_twice")
+SEC("classifier/err_release_twice")
  int bpf_sk_lookup_test3(struct __sk_buff *skb)
  {
         struct bpf_sock_tuple tuple = {};
@@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
         return 0;
  }
  
-SEC("classifier/fail_release_unchecked")
+SEC("classifier/err_release_unchecked")
  int bpf_sk_lookup_test4(struct __sk_buff *skb)
  {
         struct bpf_sock_tuple tuple = {};
@@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb)
         bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
  }
  
-SEC("classifier/fail_no_release_subcall")
+SEC("classifier/err_no_release_subcall")
  int bpf_sk_lookup_test5(struct __sk_buff *skb)
  {
         lookup_no_release(skb);
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c

index 94e6c2b..5f725c7 100644 (file)
--- a/tools/testing/selftests/bpf/progs/xdp_tx.c
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -3,7 +3,7 @@
  #include <linux/bpf.h>
  #include <bpf/bpf_helpers.h>
  
-SEC("tx")
+SEC("xdp")
  int xdp_tx(struct xdp_md *xdp)
  {
         return XDP_TX;
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py

new file mode 100755 (executable)

index 0000000..be54b73
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -0,0 +1,586 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+#
+# Copyright (C) 2021 Isovalent, Inc.
+
+import argparse
+import re
+import os, sys
+
+LINUX_ROOT = os.path.abspath(os.path.join(__file__,
+    os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
+BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')
+retval = 0
+
+class BlockParser(object):
+    """
+    A parser for extracting set of values from blocks such as enums.
+    @reader: a pointer to the open file to parse
+    """
+    def __init__(self, reader):
+        self.reader = reader
+
+    def search_block(self, start_marker):
+        """
+        Search for a given structure in a file.
+        @start_marker: regex marking the beginning of a structure to parse
+        """
+        offset = self.reader.tell()
+        array_start = re.search(start_marker, self.reader.read())
+        if array_start is None:
+            raise Exception('Failed to find start of block')
+        self.reader.seek(offset + array_start.start())
+
+    def parse(self, pattern, end_marker):
+        """
+        Parse a block and return a set of values. Values to extract must be
+        on separate lines in the file.
+        @pattern: pattern used to identify the values to extract
+        @end_marker: regex marking the end of the block to parse
+        """
+        entries = set()
+        while True:
+            line = self.reader.readline()
+            if not line or re.match(end_marker, line):
+                break
+            capture = pattern.search(line)
+            if capture and pattern.groups >= 1:
+                entries.add(capture.group(1))
+        return entries
+
+class ArrayParser(BlockParser):
+    """
+    A parser for extracting dicionaries of values from some BPF-related arrays.
+    @reader: a pointer to the open file to parse
+    @array_name: name of the array to parse
+    """
+    end_marker = re.compile('^};')
+
+    def __init__(self, reader, array_name):
+        self.array_name = array_name
+        self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n')
+        super().__init__(reader)
+
+    def search_block(self):
+        """
+        Search for the given array in a file.
+        """
+        super().search_block(self.start_marker);
+
+    def parse(self):
+        """
+        Parse a block and return data as a dictionary. Items to extract must be
+        on separate lines in the file.
+        """
+        pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$')
+        entries = {}
+        while True:
+            line = self.reader.readline()
+            if line == '' or re.match(self.end_marker, line):
+                break
+            capture = pattern.search(line)
+            if capture:
+                entries[capture.group(1)] = capture.group(2)
+        return entries
+
+class InlineListParser(BlockParser):
+    """
+    A parser for extracting set of values from inline lists.
+    """
+    def parse(self, pattern, end_marker):
+        """
+        Parse a block and return a set of values. Multiple values to extract
+        can be on a same line in the file.
+        @pattern: pattern used to identify the values to extract
+        @end_marker: regex marking the end of the block to parse
+        """
+        entries = set()
+        while True:
+            line = self.reader.readline()
+            if not line:
+                break
+            entries.update(pattern.findall(line))
+            if re.search(end_marker, line):
+                break
+        return entries
+
+class FileExtractor(object):
+    """
+    A generic reader for extracting data from a given file. This class contains
+    several helper methods that wrap arround parser objects to extract values
+    from different structures.
+    This class does not offer a way to set a filename, which is expected to be
+    defined in children classes.
+    """
+    def __init__(self):
+        self.reader = open(self.filename, 'r')
+
+    def close(self):
+        """
+        Close the file used by the parser.
+        """
+        self.reader.close()
+
+    def reset_read(self):
+        """
+        Reset the file position indicator for this parser. This is useful when
+        parsing several structures in the file without respecting the order in
+        which those structures appear in the file.
+        """
+        self.reader.seek(0)
+
+    def get_types_from_array(self, array_name):
+        """
+        Search for and parse an array associating names to BPF_* enum members,
+        for example:
+
+            const char * const prog_type_name[] = {
+                    [BPF_PROG_TYPE_UNSPEC]                  = "unspec",
+                    [BPF_PROG_TYPE_SOCKET_FILTER]           = "socket_filter",
+                    [BPF_PROG_TYPE_KPROBE]                  = "kprobe",
+            };
+
+        Return a dictionary with the enum member names as keys and the
+        associated names as values, for example:
+
+            {'BPF_PROG_TYPE_UNSPEC': 'unspec',
+             'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter',
+             'BPF_PROG_TYPE_KPROBE': 'kprobe'}
+
+        @array_name: name of the array to parse
+        """
+        array_parser = ArrayParser(self.reader, array_name)
+        array_parser.search_block()
+        return array_parser.parse()
+
+    def get_enum(self, enum_name):
+        """
+        Search for and parse an enum containing BPF_* members, for example:
+
+            enum bpf_prog_type {
+                    BPF_PROG_TYPE_UNSPEC,
+                    BPF_PROG_TYPE_SOCKET_FILTER,
+                    BPF_PROG_TYPE_KPROBE,
+            };
+
+        Return a set containing all member names, for example:
+
+            {'BPF_PROG_TYPE_UNSPEC',
+             'BPF_PROG_TYPE_SOCKET_FILTER',
+             'BPF_PROG_TYPE_KPROBE'}
+
+        @enum_name: name of the enum to parse
+        """
+        start_marker = re.compile(f'enum {enum_name} {{\n')
+        pattern = re.compile('^\s*(BPF_\w+),?$')
+        end_marker = re.compile('^};')
+        parser = BlockParser(self.reader)
+        parser.search_block(start_marker)
+        return parser.parse(pattern, end_marker)
+
+    def __get_description_list(self, start_marker, pattern, end_marker):
+        parser = InlineListParser(self.reader)
+        parser.search_block(start_marker)
+        return parser.parse(pattern, end_marker)
+
+    def get_rst_list(self, block_name):
+        """
+        Search for and parse a list of type names from RST documentation, for
+        example:
+
+             |       *TYPE* := {
+             |               **socket** | **kprobe** |
+             |               **kretprobe**
+             |       }
+
+        Return a set containing all type names, for example:
+
+            {'socket', 'kprobe', 'kretprobe'}
+
+        @block_name: name of the blog to parse, 'TYPE' in the example
+        """
+        start_marker = re.compile(f'\*{block_name}\* := {{')
+        pattern = re.compile('\*\*([\w/-]+)\*\*')
+        end_marker = re.compile('}\n')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+    def get_help_list(self, block_name):
+        """
+        Search for and parse a list of type names from a help message in
+        bpftool, for example:
+
+            "       TYPE := { socket | kprobe |\\n"
+            "               kretprobe }\\n"
+
+        Return a set containing all type names, for example:
+
+            {'socket', 'kprobe', 'kretprobe'}
+
+        @block_name: name of the blog to parse, 'TYPE' in the example
+        """
+        start_marker = re.compile(f'"\s*{block_name} := {{')
+        pattern = re.compile('([\w/]+) [|}]')
+        end_marker = re.compile('}')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+    def get_help_list_macro(self, macro):
+        """
+        Search for and parse a list of values from a help message starting with
+        a macro in bpftool, for example:
+
+            "       " HELP_SPEC_OPTIONS " |\\n"
+            "                    {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n"
+
+        Return a set containing all item names, for example:
+
+            {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'}
+
+        @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
+        """
+        start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
+        pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+        end_marker = re.compile('}\\\\n')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+    def default_options(self):
+        """
+        Return the default options contained in HELP_SPEC_OPTIONS
+        """
+        return { '-j', '--json', '-p', '--pretty', '-d', '--debug' }
+
+    def get_bashcomp_list(self, block_name):
+        """
+        Search for and parse a list of type names from a variable in bash
+        completion file, for example:
+
+            local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\
+                kretprobe'
+
+        Return a set containing all type names, for example:
+
+            {'socket', 'kprobe', 'kretprobe'}
+
+        @block_name: name of the blog to parse, 'TYPE' in the example
+        """
+        start_marker = re.compile(f'local {block_name}=\'')
+        pattern = re.compile('(?:.*=\')?([\w/]+)')
+        end_marker = re.compile('\'$')
+        return self.__get_description_list(start_marker, pattern, end_marker)
+
+class SourceFileExtractor(FileExtractor):
+    """
+    An abstract extractor for a source file with usage message.
+    This class does not offer a way to set a filename, which is expected to be
+    defined in children classes.
+    """
+    def get_options(self):
+        return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS'))
+
+class ProgFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's prog.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'prog.c')
+
+    def get_prog_types(self):
+        return self.get_types_from_array('prog_type_name')
+
+    def get_attach_types(self):
+        return self.get_types_from_array('attach_type_strings')
+
+    def get_prog_attach_help(self):
+        return self.get_help_list('ATTACH_TYPE')
+
+class MapFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's map.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'map.c')
+
+    def get_map_types(self):
+        return self.get_types_from_array('map_type_name')
+
+    def get_map_help(self):
+        return self.get_help_list('TYPE')
+
+class CgroupFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's cgroup.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'cgroup.c')
+
+    def get_prog_attach_help(self):
+        return self.get_help_list('ATTACH_TYPE')
+
+class CommonFileExtractor(SourceFileExtractor):
+    """
+    An extractor for bpftool's common.c.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'common.c')
+
+    def __init__(self):
+        super().__init__()
+        self.attach_types = {}
+
+    def get_attach_types(self):
+        if not self.attach_types:
+            self.attach_types = self.get_types_from_array('attach_type_name')
+        return self.attach_types
+
+    def get_cgroup_attach_types(self):
+        if not self.attach_types:
+            self.get_attach_types()
+        cgroup_types = {}
+        for (key, value) in self.attach_types.items():
+            if key.find('BPF_CGROUP') != -1:
+                cgroup_types[key] = value
+        return cgroup_types
+
+class GenericSourceExtractor(SourceFileExtractor):
+    """
+    An extractor for generic source code files.
+    """
+    filename = ""
+
+    def __init__(self, filename):
+        self.filename = os.path.join(BPFTOOL_DIR, filename)
+        super().__init__()
+
+class BpfHeaderExtractor(FileExtractor):
+    """
+    An extractor for the UAPI BPF header.
+    """
+    filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h')
+
+    def get_prog_types(self):
+        return self.get_enum('bpf_prog_type')
+
+    def get_map_types(self):
+        return self.get_enum('bpf_map_type')
+
+    def get_attach_types(self):
+        return self.get_enum('bpf_attach_type')
+
+class ManPageExtractor(FileExtractor):
+    """
+    An abstract extractor for an RST documentation page.
+    This class does not offer a way to set a filename, which is expected to be
+    defined in children classes.
+    """
+    def get_options(self):
+        return self.get_rst_list('OPTIONS')
+
+class ManProgExtractor(ManPageExtractor):
+    """
+    An extractor for bpftool-prog.rst.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst')
+
+    def get_attach_types(self):
+        return self.get_rst_list('ATTACH_TYPE')
+
+class ManMapExtractor(ManPageExtractor):
+    """
+    An extractor for bpftool-map.rst.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst')
+
+    def get_map_types(self):
+        return self.get_rst_list('TYPE')
+
+class ManCgroupExtractor(ManPageExtractor):
+    """
+    An extractor for bpftool-cgroup.rst.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst')
+
+    def get_attach_types(self):
+        return self.get_rst_list('ATTACH_TYPE')
+
+class ManGenericExtractor(ManPageExtractor):
+    """
+    An extractor for generic RST documentation pages.
+    """
+    filename = ""
+
+    def __init__(self, filename):
+        self.filename = os.path.join(BPFTOOL_DIR, filename)
+        super().__init__()
+
+class BashcompExtractor(FileExtractor):
+    """
+    An extractor for bpftool's bash completion file.
+    """
+    filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool')
+
+    def get_prog_attach_types(self):
+        return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
+
+    def get_map_types(self):
+        return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
+
+    def get_cgroup_attach_types(self):
+        return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
+
+def verify(first_set, second_set, message):
+    """
+    Print all values that differ between two sets.
+    @first_set: one set to compare
+    @second_set: another set to compare
+    @message: message to print for values belonging to only one of the sets
+    """
+    global retval
+    diff = first_set.symmetric_difference(second_set)
+    if diff:
+        print(message, diff)
+        retval = 1
+
+def main():
+    # No arguments supported at this time, but print usage for -h|--help
+    argParser = argparse.ArgumentParser(description="""
+    Verify that bpftool's code, help messages, documentation and bash
+    completion are all in sync on program types, map types, attach types, and
+    options. Also check that bpftool is in sync with the UAPI BPF header.
+    """)
+    args = argParser.parse_args()
+
+    # Map types (enum)
+
+    bpf_info = BpfHeaderExtractor()
+    ref = bpf_info.get_map_types()
+
+    map_info = MapFileExtractor()
+    source_map_items = map_info.get_map_types()
+    map_types_enum = set(source_map_items.keys())
+
+    verify(ref, map_types_enum,
+            f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):')
+
+    # Map types (names)
+
+    source_map_types = set(source_map_items.values())
+    source_map_types.discard('unspec')
+
+    help_map_types = map_info.get_map_help()
+    help_map_options = map_info.get_options()
+    map_info.close()
+
+    man_map_info = ManMapExtractor()
+    man_map_options = man_map_info.get_options()
+    man_map_types = man_map_info.get_map_types()
+    man_map_info.close()
+
+    bashcomp_info = BashcompExtractor()
+    bashcomp_map_types = bashcomp_info.get_map_types()
+
+    verify(source_map_types, help_map_types,
+            f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):')
+    verify(source_map_types, man_map_types,
+            f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):')
+    verify(help_map_options, man_map_options,
+            f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
+    verify(source_map_types, bashcomp_map_types,
+            f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
+
+    # Program types (enum)
+
+    ref = bpf_info.get_prog_types()
+
+    prog_info = ProgFileExtractor()
+    prog_types = set(prog_info.get_prog_types().keys())
+
+    verify(ref, prog_types,
+            f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):')
+
+    # Attach types (enum)
+
+    ref = bpf_info.get_attach_types()
+    bpf_info.close()
+
+    common_info = CommonFileExtractor()
+    attach_types = common_info.get_attach_types()
+
+    verify(ref, attach_types,
+            f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):')
+
+    # Attach types (names)
+
+    source_prog_attach_types = set(prog_info.get_attach_types().values())
+
+    help_prog_attach_types = prog_info.get_prog_attach_help()
+    help_prog_options = prog_info.get_options()
+    prog_info.close()
+
+    man_prog_info = ManProgExtractor()
+    man_prog_options = man_prog_info.get_options()
+    man_prog_attach_types = man_prog_info.get_attach_types()
+    man_prog_info.close()
+
+    bashcomp_info.reset_read() # We stopped at map types, rewind
+    bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+
+    verify(source_prog_attach_types, help_prog_attach_types,
+            f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+    verify(source_prog_attach_types, man_prog_attach_types,
+            f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+    verify(help_prog_options, man_prog_options,
+            f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
+    verify(source_prog_attach_types, bashcomp_prog_attach_types,
+            f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+
+    # Cgroup attach types
+
+    source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values())
+    common_info.close()
+
+    cgroup_info = CgroupFileExtractor()
+    help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
+    help_cgroup_options = cgroup_info.get_options()
+    cgroup_info.close()
+
+    man_cgroup_info = ManCgroupExtractor()
+    man_cgroup_options = man_cgroup_info.get_options()
+    man_cgroup_attach_types = man_cgroup_info.get_attach_types()
+    man_cgroup_info.close()
+
+    bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
+    bashcomp_info.close()
+
+    verify(source_cgroup_attach_types, help_cgroup_attach_types,
+            f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+    verify(source_cgroup_attach_types, man_cgroup_attach_types,
+            f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+    verify(help_cgroup_options, man_cgroup_options,
+            f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
+    verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
+            f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
+
+    # Options for remaining commands
+
+    for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]:
+        source_info = GenericSourceExtractor(cmd + '.c')
+        help_cmd_options = source_info.get_options()
+        source_info.close()
+
+        man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst'))
+        man_cmd_options = man_cmd_info.get_options()
+        man_cmd_info.close()
+
+        verify(help_cmd_options, man_cmd_options,
+                f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):')
+
+    source_main_info = GenericSourceExtractor('main.c')
+    help_main_options = source_main_info.get_options()
+    source_main_info.close()
+
+    man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst'))
+    man_main_options = man_main_info.get_options()
+    man_main_info.close()
+
+    verify(help_main_options, man_main_options,
+            f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+
+    sys.exit(retval)
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c

index 30cbf5d..14cea86 100644 (file)
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -764,8 +764,8 @@ static void test_sockmap(unsigned int tasks, void *data)
         udp = socket(AF_INET, SOCK_DGRAM, 0);
         i = 0;
         err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
-       if (!err) {
-               printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+       if (err) {
+               printf("Failed socket update SOCK_DGRAM '%i:%i'\n",
                        i, udp);
                 goto out_sockmap;
         }
@@ -1153,12 +1153,17 @@ out_sockmap:
  }
  
  #define MAPINMAP_PROG "./test_map_in_map.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o"
  static void test_map_in_map(void)
  {
         struct bpf_object *obj;
         struct bpf_map *map;
         int mim_fd, fd, err;
         int pos = 0;
+       struct bpf_map_info info = {};
+       __u32 len = sizeof(info);
+       __u32 id = 0;
+       libbpf_print_fn_t old_print_fn;
  
         obj = bpf_object__open(MAPINMAP_PROG);
  
@@ -1228,11 +1233,72 @@ static void test_map_in_map(void)
         }
  
         close(fd);
+       fd = -1;
         bpf_object__close(obj);
+
+       /* Test that failing bpf_object__create_map() destroys the inner map */
+       obj = bpf_object__open(MAPINMAP_INVALID_PROG);
+       err = libbpf_get_error(obj);
+       if (err) {
+               printf("Failed to load %s program: %d %d",
+                      MAPINMAP_INVALID_PROG, err, errno);
+               goto out_map_in_map;
+       }
+
+       map = bpf_object__find_map_by_name(obj, "mim");
+       if (!map) {
+               printf("Failed to load array of maps from test prog\n");
+               goto out_map_in_map;
+       }
+
+       old_print_fn = libbpf_set_print(NULL);
+
+       err = bpf_object__load(obj);
+       if (!err) {
+               printf("Loading obj supposed to fail\n");
+               goto out_map_in_map;
+       }
+
+       libbpf_set_print(old_print_fn);
+
+       /* Iterate over all maps to check whether the internal map
+        * ("mim.internal") has been destroyed.
+        */
+       while (true) {
+               err = bpf_map_get_next_id(id, &id);
+               if (err) {
+                       if (errno == ENOENT)
+                               break;
+                       printf("Failed to get next map: %d", errno);
+                       goto out_map_in_map;
+               }
+
+               fd = bpf_map_get_fd_by_id(id);
+               if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
+                       printf("Failed to get map by id %u: %d", id, errno);
+                       goto out_map_in_map;
+               }
+
+               err = bpf_obj_get_info_by_fd(fd, &info, &len);
+               if (err) {
+                       printf("Failed to get map info by fd %d: %d", fd,
+                              errno);
+                       goto out_map_in_map;
+               }
+
+               if (!strcmp(info.name, "mim.inner")) {
+                       printf("Inner map mim.inner was not destroyed\n");
+                       goto out_map_in_map;
+               }
+       }
+
         return;
  
  out_map_in_map:
-       close(fd);
+       if (fd >= 0)
+               close(fd);
         exit(1);
  }
  
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c

deleted file mode 100644 (file)

index a7b9a69..0000000
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "netcnt_common.h"
-
-#define BPF_PROG "./netcnt_prog.o"
-#define TEST_CGROUP "/test-network-counters/"
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
-                       const char *name)
-{
-       struct bpf_map *map;
-
-       map = bpf_object__find_map_by_name(obj, name);
-       if (!map) {
-               printf("%s:FAIL:map '%s' not found\n", test, name);
-               return -1;
-       }
-       return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
-       struct percpu_net_cnt *percpu_netcnt;
-       struct bpf_cgroup_storage_key key;
-       int map_fd, percpu_map_fd;
-       int error = EXIT_FAILURE;
-       struct net_cnt netcnt;
-       struct bpf_object *obj;
-       int prog_fd, cgroup_fd;
-       unsigned long packets;
-       unsigned long bytes;
-       int cpu, nproc;
-       __u32 prog_cnt;
-
-       nproc = get_nprocs_conf();
-       percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
-       if (!percpu_netcnt) {
-               printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
-               goto err;
-       }
-
-       if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
-                         &obj, &prog_fd)) {
-               printf("Failed to load bpf program\n");
-               goto out;
-       }
-
-       cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
-       if (cgroup_fd < 0)
-               goto err;
-
-       /* Attach bpf program */
-       if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
-               printf("Failed to attach bpf program");
-               goto err;
-       }
-
-       if (system("which ping6 &>/dev/null") == 0)
-               assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
-       else
-               assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
-
-       if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
-                          &prog_cnt)) {
-               printf("Failed to query attached programs");
-               goto err;
-       }
-
-       map_fd = bpf_find_map(__func__, obj, "netcnt");
-       if (map_fd < 0) {
-               printf("Failed to find bpf map with net counters");
-               goto err;
-       }
-
-       percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
-       if (percpu_map_fd < 0) {
-               printf("Failed to find bpf map with percpu net counters");
-               goto err;
-       }
-
-       if (bpf_map_get_next_key(map_fd, NULL, &key)) {
-               printf("Failed to get key in cgroup storage\n");
-               goto err;
-       }
-
-       if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
-               printf("Failed to lookup cgroup storage\n");
-               goto err;
-       }
-
-       if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
-               printf("Failed to lookup percpu cgroup storage\n");
-               goto err;
-       }
-
-       /* Some packets can be still in per-cpu cache, but not more than
-        * MAX_PERCPU_PACKETS.
-        */
-       packets = netcnt.packets;
-       bytes = netcnt.bytes;
-       for (cpu = 0; cpu < nproc; cpu++) {
-               if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
-                       printf("Unexpected percpu value: %llu\n",
-                              percpu_netcnt[cpu].packets);
-                       goto err;
-               }
-
-               packets += percpu_netcnt[cpu].packets;
-               bytes += percpu_netcnt[cpu].bytes;
-       }
-
-       /* No packets should be lost */
-       if (packets != 10000) {
-               printf("Unexpected packet count: %lu\n", packets);
-               goto err;
-       }
-
-       /* Let's check that bytes counter matches the number of packets
-        * multiplied by the size of ipv6 ICMP packet.
-        */
-       if (bytes != packets * 104) {
-               printf("Unexpected bytes count: %lu\n", bytes);
-               goto err;
-       }
-
-       error = 0;
-       printf("test_netcnt:PASS\n");
-
-err:
-       cleanup_cgroup_environment();
-       free(percpu_netcnt);
-
-out:
-       return error;
-}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h

index 8ef7f33..c8c2bf8 100644 (file)
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -221,6 +221,18 @@ extern int test__join_cgroup(const char *path);
         ___ok;                                                          \
  })
  
+#define ASSERT_STRNEQ(actual, expected, len, name) ({                  \
+       static int duration = 0;                                        \
+       const char *___act = actual;                                    \
+       const char *___exp = expected;                                  \
+       int ___len = len;                                               \
+       bool ___ok = strncmp(___act, ___exp, ___len) == 0;              \
+       CHECK(!___ok, (name),                                           \
+             "unexpected %s: actual '%.*s' != expected '%.*s'\n",      \
+             (name), ___len, ___act, ___len, ___exp);                  \
+       ___ok;                                                          \
+})
+
  #define ASSERT_OK(res, name) ({                                                \
         static int duration = 0;                                        \
         long long ___res = (res);                                       \
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh

index c9dde9b..088fcad 100755 (executable)
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -69,7 +69,7 @@ cleanup() {
  }
  
  server_listen() {
-       ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+       ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
         server_pid=$!
         sleep 0.2
  }
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh

index ba8ffcd..995278e 100755 (executable)
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
  ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
  
  ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
  ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
  
  trap cleanup EXIT
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c

index a3e593d..2debba4 100644 (file)
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -1,3 +1,232 @@
+{
+       "map access: known scalar += value_ptr unknown vs const",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+       BPF_MOV64_IMM(BPF_REG_1, 3),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr const vs unknown",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+       BPF_MOV64_IMM(BPF_REG_1, 3),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr const vs const (ne)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+       BPF_MOV64_IMM(BPF_REG_1, 3),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+       BPF_MOV64_IMM(BPF_REG_1, 5),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr const vs const (eq)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+       BPF_MOV64_IMM(BPF_REG_1, 5),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+       BPF_MOV64_IMM(BPF_REG_1, 5),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr unknown vs unknown (eq)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr unknown vs unknown (lt)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
+{
+       "map access: known scalar += value_ptr unknown vs unknown (gt)",
+       .insns = {
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                   offsetof(struct __sk_buff, len)),
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+       BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+       BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+       BPF_MOV64_IMM(BPF_REG_1, 6),
+       BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+       BPF_MOV64_IMM(BPF_REG_0, 1),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_hash_16b = { 5 },
+       .fixup_map_array_48b = { 8 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+       .result = ACCEPT,
+       .retval = 1,
+},
  {
         "map access: known scalar += value_ptr from different maps",
         .insns = {
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore

index 06a351b..0709af0 100644 (file)
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -38,6 +38,7 @@
  /x86_64/xen_vmcall_test
  /x86_64/xss_msr_test
  /x86_64/vmx_pmu_msrs_test
+/access_tracking_perf_test
  /demand_paging_test
  /dirty_log_test
  /dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile

index b853be2..5832f51 100644 (file)
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
  TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
  TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
  TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
+TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
  TEST_GEN_PROGS_x86_64 += demand_paging_test
  TEST_GEN_PROGS_x86_64 += dirty_log_test
  TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c

index a16c8f0..cc89818 100644 (file)
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -1019,7 +1019,8 @@ static __u64 sve_rejects_set[] = {
  #define VREGS_SUBLIST \
         { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
  #define PMU_SUBLIST \
-       { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+       { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+         .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
  #define SVE_SUBLIST \
         { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
           .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c

new file mode 100644 (file)

index 0000000..e2baa18
--- /dev/null
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * access_tracking_perf_test
+ *
+ * Copyright (C) 2021, Google, Inc.
+ *
+ * This test measures the performance effects of KVM's access tracking.
+ * Access tracking is driven by the MMU notifiers test_young, clear_young, and
+ * clear_flush_young. These notifiers do not have a direct userspace API,
+ * however the clear_young notifier can be triggered by marking a pages as idle
+ * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
+ * enable access tracking on guest memory.
+ *
+ * To measure performance this test runs a VM with a configurable number of
+ * vCPUs that each touch every page in disjoint regions of memory. Performance
+ * is measured in the time it takes all vCPUs to finish touching their
+ * predefined region.
+ *
+ * Note that a deterministic correctness test of access tracking is not possible
+ * by using page_idle as it exists today. This is for a few reasons:
+ *
+ * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
+ *    means subsequent guest accesses are not guaranteed to see page table
+ *    updates made by KVM until some time in the future.
+ *
+ * 2. page_idle only operates on LRU pages. Newly allocated pages are not
+ *    immediately allocated to LRU lists. Instead they are held in a "pagevec",
+ *    which is drained to LRU lists some time in the future. There is no
+ *    userspace API to force this drain to occur.
+ *
+ * These limitations are worked around in this test by using a large enough
+ * region of memory for each vCPU such that the number of translations cached in
+ * the TLB and the number of pages held in pagevecs are a small fraction of the
+ * overall workload. And if either of those conditions are not true this test
+ * will fail rather than silently passing.
+ */
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
+
+/* Global variable used to synchronize all of the vCPU threads. */
+static int iteration = -1;
+
+/* Defines what vCPU threads should do during a given iteration. */
+static enum {
+       /* Run the vCPU to access all its memory. */
+       ITERATION_ACCESS_MEMORY,
+       /* Mark the vCPU's memory idle in page_idle. */
+       ITERATION_MARK_IDLE,
+} iteration_work;
+
+/* Set to true when vCPU threads should exit. */
+static bool done;
+
+/* The iteration that was last completed by each vCPU. */
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+/* Whether to overlap the regions of memory vCPUs access. */
+static bool overlap_memory_access;
+
+struct test_params {
+       /* The backing source for the region of memory. */
+       enum vm_mem_backing_src_type backing_src;
+
+       /* The amount of memory to allocate for each vCPU. */
+       uint64_t vcpu_memory_bytes;
+
+       /* The number of vCPUs to create in the VM. */
+       int vcpus;
+};
+
+static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
+{
+       uint64_t value;
+       off_t offset = index * sizeof(value);
+
+       TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value),
+                   "pread from %s offset 0x%" PRIx64 " failed!",
+                   filename, offset);
+
+       return value;
+
+}
+
+#define PAGEMAP_PRESENT (1ULL << 63)
+#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1)
+
+static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
+{
+       uint64_t hva = (uint64_t) addr_gva2hva(vm, gva);
+       uint64_t entry;
+       uint64_t pfn;
+
+       entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize());
+       if (!(entry & PAGEMAP_PRESENT))
+               return 0;
+
+       pfn = entry & PAGEMAP_PFN_MASK;
+       if (!pfn) {
+               print_skip("Looking up PFNs requires CAP_SYS_ADMIN");
+               exit(KSFT_SKIP);
+       }
+
+       return pfn;
+}
+
+static bool is_page_idle(int page_idle_fd, uint64_t pfn)
+{
+       uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64);
+
+       return !!((bits >> (pfn % 64)) & 1);
+}
+
+static void mark_page_idle(int page_idle_fd, uint64_t pfn)
+{
+       uint64_t bits = 1ULL << (pfn % 64);
+
+       TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8,
+                   "Set page_idle bits for PFN 0x%" PRIx64, pfn);
+}
+
+static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id)
+{
+       uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva;
+       uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages;
+       uint64_t page;
+       uint64_t still_idle = 0;
+       uint64_t no_pfn = 0;
+       int page_idle_fd;
+       int pagemap_fd;
+
+       /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */
+       if (overlap_memory_access && vcpu_id)
+               return;
+
+       page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+       TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle.");
+
+       pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+       TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
+
+       for (page = 0; page < pages; page++) {
+               uint64_t gva = base_gva + page * perf_test_args.guest_page_size;
+               uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
+
+               if (!pfn) {
+                       no_pfn++;
+                       continue;
+               }
+
+               if (is_page_idle(page_idle_fd, pfn)) {
+                       still_idle++;
+                       continue;
+               }
+
+               mark_page_idle(page_idle_fd, pfn);
+       }
+
+       /*
+        * Assumption: Less than 1% of pages are going to be swapped out from
+        * under us during this test.
+        */
+       TEST_ASSERT(no_pfn < pages / 100,
+                   "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.",
+                   vcpu_id, no_pfn, pages);
+
+       /*
+        * Test that at least 90% of memory has been marked idle (the rest might
+        * not be marked idle because the pages have not yet made it to an LRU
+        * list or the translations are still cached in the TLB). 90% is
+        * arbitrary; high enough that we ensure most memory access went through
+        * access tracking but low enough as to not make the test too brittle
+        * over time and across architectures.
+        */
+       TEST_ASSERT(still_idle < pages / 10,
+                   "vCPU%d: Too many pages still idle (%"PRIu64 " out of %"
+                   PRIu64 ").\n",
+                   vcpu_id, still_idle, pages);
+
+       close(page_idle_fd);
+       close(pagemap_fd);
+}
+
+static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id,
+                        uint64_t expected_ucall)
+{
+       struct ucall uc;
+       uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc);
+
+       TEST_ASSERT(expected_ucall == actual_ucall,
+                   "Guest exited unexpectedly (expected ucall %" PRIu64
+                   ", got %" PRIu64 ")",
+                   expected_ucall, actual_ucall);
+}
+
+static bool spin_wait_for_next_iteration(int *current_iteration)
+{
+       int last_iteration = *current_iteration;
+
+       do {
+               if (READ_ONCE(done))
+                       return false;
+
+               *current_iteration = READ_ONCE(iteration);
+       } while (last_iteration == *current_iteration);
+
+       return true;
+}
+
+static void *vcpu_thread_main(void *arg)
+{
+       struct perf_test_vcpu_args *vcpu_args = arg;
+       struct kvm_vm *vm = perf_test_args.vm;
+       int vcpu_id = vcpu_args->vcpu_id;
+       int current_iteration = -1;
+
+       vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
+       while (spin_wait_for_next_iteration(&current_iteration)) {
+               switch (READ_ONCE(iteration_work)) {
+               case ITERATION_ACCESS_MEMORY:
+                       vcpu_run(vm, vcpu_id);
+                       assert_ucall(vm, vcpu_id, UCALL_SYNC);
+                       break;
+               case ITERATION_MARK_IDLE:
+                       mark_vcpu_memory_idle(vm, vcpu_id);
+                       break;
+               };
+
+               vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+       }
+
+       return NULL;
+}
+
+static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
+{
+       while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+              target_iteration) {
+               continue;
+       }
+}
+
+/* The type of memory accesses to perform in the VM. */
+enum access_type {
+       ACCESS_READ,
+       ACCESS_WRITE,
+};
+
+static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
+{
+       struct timespec ts_start;
+       struct timespec ts_elapsed;
+       int next_iteration;
+       int vcpu_id;
+
+       /* Kick off the vCPUs by incrementing iteration. */
+       next_iteration = ++iteration;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+       /* Wait for all vCPUs to finish the iteration. */
+       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
+               spin_wait_for_vcpu(vcpu_id, next_iteration);
+
+       ts_elapsed = timespec_elapsed(ts_start);
+       pr_info("%-30s: %ld.%09lds\n",
+               description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
+}
+
+static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
+                         const char *description)
+{
+       perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1;
+       sync_global_to_guest(vm, perf_test_args);
+       iteration_work = ITERATION_ACCESS_MEMORY;
+       run_iteration(vm, vcpus, description);
+}
+
+static void mark_memory_idle(struct kvm_vm *vm, int vcpus)
+{
+       /*
+        * Even though this parallelizes the work across vCPUs, this is still a
+        * very slow operation because page_idle forces the test to mark one pfn
+        * at a time and the clear_young notifier serializes on the KVM MMU
+        * lock.
+        */
+       pr_debug("Marking VM memory idle (slow)...\n");
+       iteration_work = ITERATION_MARK_IDLE;
+       run_iteration(vm, vcpus, "Mark memory idle");
+}
+
+static pthread_t *create_vcpu_threads(int vcpus)
+{
+       pthread_t *vcpu_threads;
+       int i;
+
+       vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0]));
+       TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads.");
+
+       for (i = 0; i < vcpus; i++) {
+               vcpu_last_completed_iteration[i] = iteration;
+               pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main,
+                              &perf_test_args.vcpu_args[i]);
+       }
+
+       return vcpu_threads;
+}
+
+static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus)
+{
+       int i;
+
+       /* Set done to signal the vCPU threads to exit */
+       done = true;
+
+       for (i = 0; i < vcpus; i++)
+               pthread_join(vcpu_threads[i], NULL);
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+       struct test_params *params = arg;
+       struct kvm_vm *vm;
+       pthread_t *vcpu_threads;
+       int vcpus = params->vcpus;
+
+       vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes,
+                                params->backing_src);
+
+       perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes,
+                             !overlap_memory_access);
+
+       vcpu_threads = create_vcpu_threads(vcpus);
+
+       pr_info("\n");
+       access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory");
+
+       /* As a control, read and write to the populated memory first. */
+       access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory");
+       access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory");
+
+       /* Repeat on memory that has been marked as idle. */
+       mark_memory_idle(vm, vcpus);
+       access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory");
+       mark_memory_idle(vm, vcpus);
+       access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory");
+
+       terminate_vcpu_threads(vcpu_threads, vcpus);
+       free(vcpu_threads);
+       perf_test_destroy_vm(vm);
+}
+
+static void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o]  [-s mem_type]\n",
+              name);
+       puts("");
+       printf(" -h: Display this help message.");
+       guest_modes_help();
+       printf(" -b: specify the size of the memory region which should be\n"
+              "     dirtied by each vCPU. e.g. 10M or 3G.\n"
+              "     (default: 1G)\n");
+       printf(" -v: specify the number of vCPUs to run.\n");
+       printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+              "     them into a separate region of memory for each vCPU.\n");
+       printf(" -s: specify the type of memory that should be used to\n"
+              "     back the guest data region.\n\n");
+       backing_src_help();
+       puts("");
+       exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+       struct test_params params = {
+               .backing_src = VM_MEM_SRC_ANONYMOUS,
+               .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
+               .vcpus = 1,
+       };
+       int page_idle_fd;
+       int opt;
+
+       guest_modes_append_default();
+
+       while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+               switch (opt) {
+               case 'm':
+                       guest_modes_cmdline(optarg);
+                       break;
+               case 'b':
+                       params.vcpu_memory_bytes = parse_size(optarg);
+                       break;
+               case 'v':
+                       params.vcpus = atoi(optarg);
+                       break;
+               case 'o':
+                       overlap_memory_access = true;
+                       break;
+               case 's':
+                       params.backing_src = parse_backing_src_type(optarg);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+       if (page_idle_fd < 0) {
+               print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled");
+               exit(KSFT_SKIP);
+       }
+       close(page_idle_fd);
+
+       for_each_guest_mode(run_test, &params);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c

index 04a2641..80cbd3a 100644 (file)
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -312,6 +312,7 @@ int main(int argc, char *argv[])
                         break;
                 case 'o':
                         p.partition_vcpu_memory_access = false;
+                       break;
                 case 's':
                         p.backing_src = parse_backing_src_type(optarg);
                         break;
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h

index 412eaee..b669107 100644 (file)
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -117,7 +117,7 @@
  #define HV_X64_GUEST_DEBUGGING_AVAILABLE               BIT(1)
  #define HV_X64_PERF_MONITOR_AVAILABLE                  BIT(2)
  #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE      BIT(3)
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE          BIT(4)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE           BIT(4)
  #define HV_X64_GUEST_IDLE_STATE_AVAILABLE              BIT(5)
  #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE            BIT(8)
  #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE           BIT(10)
@@ -182,4 +182,7 @@
  #define HV_STATUS_INVALID_CONNECTION_ID                18
  #define HV_STATUS_INSUFFICIENT_BUFFERS         19
  
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT          BIT(16)
+
  #endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c

index b0031f2..ecec308 100644 (file)
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -320,7 +320,7 @@ int main(int ac, char **av)
                 run_delay = get_run_delay();
                 pthread_create(&thread, &attr, do_steal_time, NULL);
                 do
-                       pthread_yield();
+                       sched_yield();
                 while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
                 pthread_join(thread, NULL);
                 run_delay = get_run_delay() - run_delay;
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c

index bab10ae..e0b2bb1 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -215,7 +215,7 @@ int main(void)
         vcpu_set_hv_cpuid(vm, VCPU_ID);
  
         tsc_page_gva = vm_vaddr_alloc_page(vm);
-       memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
+       memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
         TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
                 "TSC page has to be page aligned\n");
         vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c

index af27c7e..91d88aa 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val)
  }
  
  static int nr_gp;
+static int nr_ud;
  
  static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
                             vm_vaddr_t output_address)
@@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs)
                 regs->rip = (uint64_t)&wrmsr_end;
  }
  
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       nr_ud++;
+       regs->rip += 3;
+}
+
  struct msr_data {
         uint32_t idx;
         bool available;
@@ -90,6 +97,7 @@ struct msr_data {
  struct hcall_data {
         uint64_t control;
         uint64_t expect;
+       bool ud_expected;
  };
  
  static void guest_msr(struct msr_data *msr)
@@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr)
  static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
  {
         int i = 0;
+       u64 res, input, output;
  
         wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
         wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
  
         while (hcall->control) {
-               GUEST_ASSERT(hypercall(hcall->control, pgs_gpa,
-                                      pgs_gpa + 4096) == hcall->expect);
+               nr_ud = 0;
+               if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+                       input = pgs_gpa;
+                       output = pgs_gpa + 4096;
+               } else {
+                       input = output = 0;
+               }
+
+               res = hypercall(hcall->control, input, output);
+               if (hcall->ud_expected)
+                       GUEST_ASSERT(nr_ud == 1);
+               else
+                       GUEST_ASSERT(res == hcall->expect);
+
                 GUEST_SYNC(i++);
         }
  
@@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall
                         recomm.ebx = 0xfff;
                         hcall->expect = HV_STATUS_SUCCESS;
                         break;
-
                 case 17:
+                       /* XMM fast hypercall */
+                       hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+                       hcall->ud_expected = true;
+                       break;
+               case 18:
+                       feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
+                       hcall->ud_expected = false;
+                       hcall->expect = HV_STATUS_SUCCESS;
+                       break;
+
+               case 19:
                         /* END */
                         hcall->control = 0;
                         break;
@@ -625,6 +656,10 @@ int main(void)
         /* Test hypercalls */
         vm = vm_create_default(VCPU_ID, 0, guest_hcall);
  
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+       vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
         /* Hypercall input/output */
         hcall_page = vm_vaddr_alloc_pages(vm, 2);
         memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile

index 5b169e9..4f9f73e 100644 (file)
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -38,8 +38,10 @@ TEST_GEN_FILES += reuseaddr_ports_exhausted
  TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
  TEST_GEN_FILES += ipsec
  TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
  TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
  TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += toeplitz
  
  TEST_FILES := settings
  
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile

new file mode 100644 (file)

index 0000000..cfc7f4f
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -0,0 +1,5 @@
+##TEST_GEN_FILES := test_unix_oob
+TEST_PROGS := test_unix_oob
+include ../../lib.mk
+
+all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c

new file mode 100644 (file)

index 0000000..0f3e376
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+       signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+       struct sigaction sa;
+
+       sa.sa_sigaction = sig_hand;
+       sigemptyset(&sa.sa_mask);
+       sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+       return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+       int flags = fcntl(fd, F_GETFL, 0);
+
+       if (set)
+               flags &= ~O_NONBLOCK;
+       else
+               flags |= O_NONBLOCK;
+       fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+       char cmd;
+
+       cmd = 'S';
+       write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+       char buf[5];
+
+       read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+       fflush(NULL);
+       unlink(sock_name);
+       kill(producer_id, SIGTERM);
+       exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+       int ans = -1;
+
+       if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+               perror("SIOCATMARK Failed");
+#endif
+       }
+       return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+       *c = ' ';
+       if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+               perror("Reading MSG_OOB Failed");
+#endif
+       }
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+       int len = 0;
+
+       memset(buf, size, '0');
+       len = read(pfd, buf, size);
+#ifdef DEBUG
+       if (len < 0)
+               perror("read failed");
+#endif
+       return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+       struct pollfd pfds[1];
+
+       pfds[0].fd = pfd;
+       pfds[0].events = event;
+       poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+       int cfd;
+       char buf[64];
+       int i;
+
+       memset(buf, 'x', sizeof(buf));
+       cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+       wait_for_signal(pipefd[0]);
+       if (connect(cfd, (struct sockaddr *)consumer_addr,
+                    sizeof(struct sockaddr)) != 0) {
+               perror("Connect failed");
+               kill(0, SIGTERM);
+               exit(1);
+       }
+
+       for (i = 0; i < 2; i++) {
+               /* Test 1: Test for SIGURG and OOB */
+               wait_for_signal(pipefd[0]);
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '@';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               wait_for_signal(pipefd[0]);
+
+               /* Test 2: Test for OOB being overwitten */
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '%';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '#';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               wait_for_signal(pipefd[0]);
+
+               /* Test 3: Test for SIOCATMARK */
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '@';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               memset(buf, 'x', sizeof(buf));
+               buf[63] = '%';
+               send(cfd, buf, sizeof(buf), MSG_OOB);
+
+               memset(buf, 'x', sizeof(buf));
+               send(cfd, buf, sizeof(buf), 0);
+
+               wait_for_signal(pipefd[0]);
+
+               /* Test 4: Test for 1byte OOB msg */
+               memset(buf, 'x', sizeof(buf));
+               buf[0] = '@';
+               send(cfd, buf, 1, MSG_OOB);
+       }
+}
+
+int
+main(int argc, char **argv)
+{
+       int lfd, pfd;
+       struct sockaddr_un consumer_addr, paddr;
+       socklen_t len = sizeof(consumer_addr);
+       char buf[1024];
+       int on = 0;
+       char oob;
+       int flags;
+       int atmark;
+       char *tmp_file;
+
+       lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+       memset(&consumer_addr, 0, sizeof(consumer_addr));
+       consumer_addr.sun_family = AF_UNIX;
+       sprintf(sock_name, "unix_oob_%d", getpid());
+       unlink(sock_name);
+       strcpy(consumer_addr.sun_path, sock_name);
+
+       if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+                 sizeof(consumer_addr))) != 0) {
+               perror("socket bind failed");
+               exit(1);
+       }
+
+       pipe(pipefd);
+
+       listen(lfd, 1);
+
+       producer_id = fork();
+       if (producer_id == 0) {
+               producer(&consumer_addr);
+               exit(0);
+       }
+
+       set_sig_handler(SIGURG);
+       signal_producer(pipefd[1]);
+
+       pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+       fcntl(pfd, F_SETOWN, getpid());
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 1:
+        * veriyf that SIGURG is
+        * delivered and 63 bytes are
+        * read and oob is '@'
+        */
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       read_oob(pfd, &oob);
+       len = read_data(pfd, buf, 1024);
+       if (!signal_recvd || len != 63 || oob != '@') {
+               fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+                        signal_recvd, len, oob);
+                       die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 2:
+        * Verify that the first OOB is over written by
+        * the 2nd one and the first OOB is returned as
+        * part of the read, and sigurg is received.
+        */
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = 0;
+       while (len < 70)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       read_oob(pfd, &oob);
+       if (!signal_recvd || len != 127 || oob != '#') {
+               fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+               signal_recvd, len, oob);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 3:
+        * verify that 2nd oob over writes
+        * the first one and read breaks at
+        * oob boundary returning 127 bytes
+        * and sigurg is received and atmark
+        * is set.
+        * oob is '%' and second read returns
+        * 64 bytes.
+        */
+       len = 0;
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       while (len < 150)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       read_oob(pfd, &oob);
+
+       if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+               fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
+               "atmark %d\n", signal_recvd, len, oob, atmark);
+               die(1);
+       }
+
+       signal_recvd = 0;
+
+       len = read_data(pfd, buf, 1024);
+       if (len != 64) {
+               fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+                       signal_recvd, len, oob);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 4:
+        * verify that a single byte
+        * oob message is delivered.
+        * set non blocking mode and
+        * check proper error is
+        * returned and sigurg is
+        * received and correct
+        * oob is read.
+        */
+
+       set_filemode(pfd, 0);
+
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = read_data(pfd, buf, 1024);
+       if ((len == -1) && (errno == 11))
+               len = 0;
+
+       read_oob(pfd, &oob);
+
+       if (!signal_recvd || len != 0 || oob != '@') {
+               fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+                        signal_recvd, len, oob);
+               die(1);
+       }
+
+       set_filemode(pfd, 1);
+
+       /* Inline Testing */
+
+       on = 1;
+       if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+               perror("SO_OOBINLINE");
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 1 -- Inline:
+        * Check that SIGURG is
+        * delivered and 63 bytes are
+        * read and oob is '@'
+        */
+
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = read_data(pfd, buf, 1024);
+
+       if (!signal_recvd || len != 63) {
+               fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+                       signal_recvd, len);
+               die(1);
+       }
+
+       len = read_data(pfd, buf, 1024);
+
+       if (len != 1) {
+               fprintf(stderr,
+                        "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+                        signal_recvd, len, oob);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 2 -- Inline:
+        * Verify that the first OOB is over written by
+        * the 2nd one and read breaks correctly on
+        * 2nd OOB boundary with the first OOB returned as
+        * part of the read, and sigurg is delivered and
+        * siocatmark returns true.
+        * next read returns one byte, the oob byte
+        * and siocatmark returns false.
+        */
+       len = 0;
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       while (len < 70)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (len != 127 || atmark != 1 || !signal_recvd) {
+               fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+                        len, atmark);
+               die(1);
+       }
+
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (len != 1 || buf[0] != '#' || atmark == 1) {
+               fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+                       len, buf[0], atmark);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 3 -- Inline:
+        * verify that 2nd oob over writes
+        * the first one and read breaks at
+        * oob boundary returning 127 bytes
+        * and sigurg is received and siocatmark
+        * is true after the read.
+        * subsequent read returns 65 bytes
+        * because of oob which should be '%'.
+        */
+       len = 0;
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       while (len < 126)
+               len = recv(pfd, buf, 1024, MSG_PEEK);
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (!signal_recvd || len != 127 || !atmark) {
+               fprintf(stderr,
+                        "Test 3 Inline failed, sigurg %d len %d data %c\n",
+                        signal_recvd, len, buf[0]);
+               die(1);
+       }
+
+       len = read_data(pfd, buf, 1024);
+       atmark = is_sioctatmark(pfd);
+       if (len != 65 || buf[0] != '%' || atmark != 0) {
+               fprintf(stderr,
+                        "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+                        len, buf[0], atmark);
+               die(1);
+       }
+
+       signal_recvd = 0;
+       signal_producer(pipefd[1]);
+
+       /* Test 4 -- Inline:
+        * verify that a single
+        * byte oob message is delivered
+        * and read returns one byte, the oob
+        * byte and sigurg is received
+        */
+       wait_for_data(pfd, POLLIN | POLLPRI);
+       len = read_data(pfd, buf, 1024);
+       if (!signal_recvd || len != 1 || buf[0] != '@') {
+               fprintf(stderr,
+                       "Test 4 Inline failed, signal %d len %d data %c\n",
+               signal_recvd, len, buf[0]);
+               die(1);
+       }
+       die(0);
+}
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c

new file mode 100644 (file)

index 0000000..cf37ce8
--- /dev/null
+++ b/tools/testing/selftests/net/gro.c
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ *  Data packets of the same size and same header setup with correct
+ *  sequence numbers coalesce. The one exception being the last data
+ *  packet coalesced: it can be smaller than the rest and coalesced
+ *  as long as it is in the same flow.
+ * 2.ack
+ *  Pure ACK does not coalesce.
+ * 3.flags
+ *  Specific test cases: no packets with PSH, SYN, URG, RST set will
+ *  be coalesced.
+ * 4.tcp
+ *  Packets with incorrect checksum, non-consecutive seqno and
+ *  different TCP header options shouldn't coalesce. Nit: given that
+ *  some extension headers have paddings, such as timestamp, headers
+ *  that are padding differently would not be coalesced.
+ * 5.ip:
+ *  Packets with different (ECN, TTL, TOS) header, ip options or
+ *  ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ *  Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define SIP6 "fdaa::2"
+#define DIP6 "fdaa::1"
+#define SIP4 "192.168.1.200"
+#define DIP4 "192.168.1.100"
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+       va_list args;
+
+       if (verbose) {
+               va_start(args, fmt);
+               vfprintf(stderr, fmt, args);
+               va_end(args);
+       }
+}
+
+static void setup_sock_filter(int fd)
+{
+       const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+       const int ethproto_off = offsetof(struct ethhdr, h_proto);
+       int optlen = 0;
+       int ipproto_off;
+       int next_off;
+
+       if (proto == PF_INET)
+               next_off = offsetof(struct iphdr, protocol);
+       else
+               next_off = offsetof(struct ipv6hdr, nexthdr);
+       ipproto_off = ETH_HLEN + next_off;
+
+       if (strcmp(testname, "ip") == 0) {
+               if (proto == PF_INET)
+                       optlen = sizeof(struct ip_timestamp);
+               else
+                       optlen = sizeof(struct ip6_frag);
+       }
+
+       struct sock_filter filter[] = {
+                       BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, ethproto_off),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+                       BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, ipproto_off),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+                       BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+                       BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off + optlen),
+                       BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+                       BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+                       BPF_STMT(BPF_RET + BPF_K, 0),
+       };
+
+       struct sock_fprog bpf = {
+               .len = ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+               error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+       uint16_t *words = data;
+       int i;
+
+       for (i = 0; i < len / 2; i++)
+               sum += words[i];
+       if (len & 1)
+               sum += ((char *)data)[len - 1];
+       return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+       sum = checksum_nofold(data, len, sum);
+       while (sum > 0xFFFF)
+               sum = (sum & 0xFFFF) + (sum >> 16);
+       return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+       struct pseudo_header6 {
+               struct in6_addr saddr;
+               struct in6_addr daddr;
+               uint16_t protocol;
+               uint16_t payload_len;
+       } ph6;
+       struct pseudo_header4 {
+               struct in_addr saddr;
+               struct in_addr daddr;
+               uint16_t protocol;
+               uint16_t payload_len;
+       } ph4;
+       uint32_t sum = 0;
+
+       if (proto == PF_INET6) {
+               if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+                       error(1, errno, "inet_pton6 source ip pseudo");
+               if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+                       error(1, errno, "inet_pton6 dest ip pseudo");
+               ph6.protocol = htons(IPPROTO_TCP);
+               ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+               sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+       } else if (proto == PF_INET) {
+               if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+                       error(1, errno, "inet_pton source ip pseudo");
+               if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+                       error(1, errno, "inet_pton dest ip pseudo");
+               ph4.protocol = htons(IPPROTO_TCP);
+               ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+               sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+       }
+
+       return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+       if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+                  &mac_addr[0], &mac_addr[1], &mac_addr[2],
+                  &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+               error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+       struct ethhdr *eth = buf;
+
+       memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+       memcpy(eth->h_source, src_mac, ETH_ALEN);
+       eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+       struct ipv6hdr *ip6h = buf;
+       struct iphdr *iph = buf;
+
+       if (proto == PF_INET6) {
+               memset(ip6h, 0, sizeof(*ip6h));
+
+               ip6h->version = 6;
+               ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+               ip6h->nexthdr = IPPROTO_TCP;
+               ip6h->hop_limit = 8;
+               if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+                       error(1, errno, "inet_pton source ip6");
+               if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+                       error(1, errno, "inet_pton dest ip6");
+       } else if (proto == PF_INET) {
+               memset(iph, 0, sizeof(*iph));
+
+               iph->version = 4;
+               iph->ihl = 5;
+               iph->ttl = 8;
+               iph->protocol   = IPPROTO_TCP;
+               iph->tot_len = htons(sizeof(struct tcphdr) +
+                               payload_len + sizeof(struct iphdr));
+               iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+               if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+                       error(1, errno, "inet_pton source ip");
+               if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+                       error(1, errno, "inet_pton dest ip");
+               iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+       }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+                               int payload_len, int fin)
+{
+       struct tcphdr *tcph = buf;
+
+       memset(tcph, 0, sizeof(*tcph));
+
+       tcph->source = htons(SPORT);
+       tcph->dest = htons(DPORT);
+       tcph->seq = ntohl(START_SEQ + seq_offset);
+       tcph->ack_seq = ntohl(START_ACK + ack_offset);
+       tcph->ack = 1;
+       tcph->fin = fin;
+       tcph->doff = 5;
+       tcph->window = htons(TCP_MAXWIN);
+       tcph->urg_ptr = 0;
+       tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+       int ret = -1;
+
+       ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+       if (ret == -1)
+               error(1, errno, "sendto failure");
+       if (ret != len)
+               error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+                         int payload_len, int fin)
+{
+       memset(buf, 0, total_hdr_len);
+       memset(buf + total_hdr_len, 'a', payload_len);
+       fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+                           payload_len, fin);
+       fill_networklayer(buf + ETH_HLEN, payload_len);
+       fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+                      int rst, int urg)
+{
+       static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       int payload_len, pkt_size, flag, i;
+       struct tcphdr *tcph;
+
+       payload_len = PAYLOAD_LEN * psh;
+       pkt_size = total_hdr_len + payload_len;
+       flag = NUM_PACKETS / 2;
+
+       create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+       tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+       tcph->psh = psh;
+       tcph->syn = syn;
+       tcph->rst = rst;
+       tcph->urg = urg;
+       tcph->check = 0;
+       tcph->check = tcp_checksum(tcph, payload_len);
+
+       for (i = 0; i < NUM_PACKETS + 1; i++) {
+               if (i == flag) {
+                       write_packet(fd, flag_buf, pkt_size, daddr);
+                       continue;
+               }
+               create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+               write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+       }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+                          int payload_len1, int payload_len2)
+{
+       static char buf[ETH_HLEN + IP_MAXPACKET];
+
+       create_packet(buf, 0, 0, payload_len1, 0);
+       write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+       create_packet(buf, payload_len1, 0, payload_len2, 0);
+       write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+       static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+       static char last[TOTAL_HDR_LEN + MSS];
+       static char new_seg[TOTAL_HDR_LEN + MSS];
+       int i;
+
+       for (i = 0; i < NUM_LARGE_PKT; i++)
+               create_packet(pkts[i], i * MSS, 0, MSS, 0);
+       create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+       create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+       for (i = 0; i < NUM_LARGE_PKT; i++)
+               write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+       write_packet(fd, last, total_hdr_len + remainder, daddr);
+       write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN];
+
+       create_packet(buf, 0, 0, 0, 0);
+       write_packet(fd, buf, total_hdr_len, daddr);
+       write_packet(fd, buf, total_hdr_len, daddr);
+       create_packet(buf, 0, 1, 0, 0);
+       write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+       struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+       memmove(buf, no_ext, total_hdr_len);
+       memmove(buf + total_hdr_len + extlen,
+               no_ext + total_hdr_len, PAYLOAD_LEN);
+
+       tcphdr->doff = tcphdr->doff + (extlen / 4);
+       tcphdr->check = 0;
+       tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+       if (proto == PF_INET) {
+               iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+               iph->check = 0;
+               iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       } else {
+               ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+       }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+       struct tcp_option_ts {
+               uint8_t kind;
+               uint8_t len;
+               uint32_t tsval;
+               uint32_t tsecr;
+       } *opt_ts = (void *)buf;
+       struct tcp_option_window {
+               uint8_t kind;
+               uint8_t len;
+               uint8_t shift;
+       } *opt_window = (void *)buf;
+
+       switch (kind) {
+       case TCPOPT_NOP:
+               buf[0] = TCPOPT_NOP;
+               break;
+       case TCPOPT_WINDOW:
+               memset(opt_window, 0, sizeof(struct tcp_option_window));
+               opt_window->kind = TCPOPT_WINDOW;
+               opt_window->len = TCPOLEN_WINDOW;
+               opt_window->shift = 0;
+               break;
+       case TCPOPT_TIMESTAMP:
+               memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+               opt_ts->kind = TCPOPT_TIMESTAMP;
+               opt_ts->len = TCPOLEN_TIMESTAMP;
+               opt_ts->tsval = ts;
+               opt_ts->tsecr = 0;
+               break;
+       default:
+               error(1, 0, "unimplemented TCP option");
+               break;
+       }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+       switch (order) {
+       case 0:
+               tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+                                 TCPOPT_TIMESTAMP, ts);
+               break;
+       case 1:
+               tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + 1,
+                                 TCPOPT_TIMESTAMP, ts);
+               tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+                                 TCPOPT_NOP, 0);
+               break;
+       case 2:
+               tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+               tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+                                 TCPOPT_NOP, 0);
+               tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+                                 TCPOPT_NOP, 0);
+               break;
+       default:
+               error(1, 0, "unknown order");
+               break;
+       }
+       recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       tcph->check = tcph->check - 1;
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       tcph->seq = ntohl(htonl(tcph->seq) + 1);
+       tcph->check = 0;
+       tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+  * don't coalesce.
+  */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+       int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 0, 0);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 0, 0);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 100, 0);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 100, 1);
+       write_packet(fd, extpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt, buf, 100, 2);
+       write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+       static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+       int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+       int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt1, buf, 0, 0);
+       write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       add_standard_tcp_options(extpkt1, buf, 0, 0);
+       write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+       tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+       recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+       write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+       struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+       int optlen = sizeof(struct ip_timestamp);
+       struct iphdr *iph;
+
+       if (optlen % 4)
+               error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+       ts->ipt_code = IPOPT_TS;
+       ts->ipt_len = optlen;
+       ts->ipt_ptr = 5;
+       ts->ipt_flg = IPOPT_TS_TSONLY;
+
+       memcpy(optpkt, buf, tcp_offset);
+       memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+              sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+       iph = (struct iphdr *)(optpkt + ETH_HLEN);
+       iph->ihl = 5 + (optlen / 4);
+       iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+       iph->check = 0;
+       iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+       int optlen = sizeof(struct ip_timestamp);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+       add_ipv4_ts_option(buf, optpkt);
+       write_packet(fd, optpkt, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/*  IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[IP_MAXPACKET];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       /* Once fragmented, packet would retain the total_len.
+        * Tcp header is prepared as if rest of data is in follow-up frags,
+        * but follow up frags aren't actually sent.
+        */
+       memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+       fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+       fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+       fill_datalinklayer(buf);
+
+       iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+       iph->check = 0;
+       iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       iph->ttl = 7;
+       iph->check = 0;
+       iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       if (proto == PF_INET) {
+               iph->tos = 1;
+               iph->check = 0;
+               iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       } else if (proto == PF_INET6) {
+               ip6h->priority = 0xf;
+       }
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+       int pkt_size = total_hdr_len + PAYLOAD_LEN;
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+       create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, pkt_size, daddr);
+
+       create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+       if (proto == PF_INET) {
+               buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+               iph->check = 0;
+               iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+       } else {
+               buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+       }
+       write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+       static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+       static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+                          sizeof(struct ip6_frag)];
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+       struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+       int extlen = sizeof(struct ip6_frag);
+       int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+       int extpkt_len = bufpkt_len + extlen;
+       int i;
+
+       for (i = 0; i < 2; i++) {
+               create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+               write_packet(fd, buf, bufpkt_len, daddr);
+       }
+
+       create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+       memset(extpkt, 0, extpkt_len);
+
+       ip6h->nexthdr = IPPROTO_FRAGMENT;
+       ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+       frag->ip6f_nxt = IPPROTO_TCP;
+
+       memcpy(extpkt, buf, tcp_offset);
+       memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+              sizeof(struct tcphdr) + PAYLOAD_LEN);
+       write_packet(fd, extpkt, extpkt_len, daddr);
+
+       create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+       write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+       struct sockaddr_ll daddr = {};
+
+       daddr.sll_family = AF_PACKET;
+       daddr.sll_protocol = ethhdr_proto;
+       daddr.sll_ifindex = if_nametoindex(ifname);
+       if (daddr.sll_ifindex == 0)
+               error(1, errno, "if_nametoindex");
+
+       if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+               error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+       struct timeval timeout;
+
+       timeout.tv_sec = 120;
+       timeout.tv_usec = 0;
+       if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+                      sizeof(timeout)) < 0)
+               error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+                           int correct_num_pkts)
+{
+       static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+       struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+       struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+       struct tcphdr *tcph;
+       bool bad_packet = false;
+       int tcp_ext_len = 0;
+       int ip_ext_len = 0;
+       int pkt_size = -1;
+       int data_len = 0;
+       int num_pkt = 0;
+       int i;
+
+       vlog("Expected {");
+       for (i = 0; i < correct_num_pkts; i++)
+               vlog("%d ", correct_payload[i]);
+       vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+       while (1) {
+               pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+               if (pkt_size < 0)
+                       error(1, errno, "could not receive");
+
+               if (iph->version == 4)
+                       ip_ext_len = (iph->ihl - 5) * 4;
+               else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+                       ip_ext_len = sizeof(struct ip6_frag);
+
+               tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+               if (tcph->fin)
+                       break;
+
+               tcp_ext_len = (tcph->doff - 5) * 4;
+               data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+               /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+                * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+                * Packet sockets are protocol agnostic, and will not trim the padding.
+                */
+               if (pkt_size == ETH_ZLEN && iph->version == 4) {
+                       data_len = ntohs(iph->tot_len)
+                               - sizeof(struct tcphdr) - sizeof(struct iphdr);
+               }
+               vlog("%d ", data_len);
+               if (data_len != correct_payload[num_pkt]) {
+                       vlog("[!=%d]", correct_payload[num_pkt]);
+                       bad_packet = true;
+               }
+               num_pkt++;
+       }
+       vlog("}, Total %d packets.\n", num_pkt);
+       if (num_pkt != correct_num_pkts)
+               error(1, 0, "incorrect number of packets");
+       if (bad_packet)
+               error(1, 0, "incorrect packet geometry");
+
+       printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+       static char fin_pkt[MAX_HDR_LEN];
+       struct sockaddr_ll daddr = {};
+       int txfd = -1;
+
+       txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+       if (txfd < 0)
+               error(1, errno, "socket creation");
+
+       memset(&daddr, 0, sizeof(daddr));
+       daddr.sll_ifindex = if_nametoindex(ifname);
+       if (daddr.sll_ifindex == 0)
+               error(1, errno, "if_nametoindex");
+       daddr.sll_family = AF_PACKET;
+       memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+       daddr.sll_halen = ETH_ALEN;
+       create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+       if (strcmp(testname, "data") == 0) {
+               send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "ack") == 0) {
+               send_ack(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "flags") == 0) {
+               send_flags(txfd, &daddr, 1, 0, 0, 0);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_flags(txfd, &daddr, 0, 1, 0, 0);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_flags(txfd, &daddr, 0, 0, 1, 0);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_flags(txfd, &daddr, 0, 0, 0, 1);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "tcp") == 0) {
+               send_changed_checksum(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_changed_seq(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_changed_ts(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_diff_opt(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else if (strcmp(testname, "ip") == 0) {
+               send_changed_ECN(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_changed_tos(txfd, &daddr);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+               if (proto == PF_INET) {
+                       /* Modified packets may be received out of order.
+                        * Sleep function added to enforce test boundaries
+                        * so that fin pkts are not received prior to other pkts.
+                        */
+                       sleep(1);
+                       send_changed_ttl(txfd, &daddr);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+                       sleep(1);
+                       send_ip_options(txfd, &daddr);
+                       sleep(1);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+                       sleep(1);
+                       send_fragment4(txfd, &daddr);
+                       sleep(1);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+               } else if (proto == PF_INET6) {
+                       send_fragment6(txfd, &daddr);
+                       write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+               }
+       } else if (strcmp(testname, "large") == 0) {
+               /* 20 is the difference between min iphdr size
+                * and min ipv6hdr size. Like MAX_HDR_SIZE,
+                * MAX_PAYLOAD is defined with the larger header of the two.
+                */
+               int offset = proto == PF_INET ? 20 : 0;
+               int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+               send_large(txfd, &daddr, remainder);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+               send_large(txfd, &daddr, remainder + 1);
+               write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+       } else {
+               error(1, 0, "Unknown testcase");
+       }
+
+       if (close(txfd))
+               error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+       static int correct_payload[NUM_PACKETS];
+       int rxfd = -1;
+
+       rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+       if (rxfd < 0)
+               error(1, 0, "socket creation");
+       setup_sock_filter(rxfd);
+       set_timeout(rxfd);
+       bind_packetsocket(rxfd);
+
+       memset(correct_payload, 0, sizeof(correct_payload));
+
+       if (strcmp(testname, "data") == 0) {
+               printf("pure data packet of same size: ");
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               check_recv_pkts(rxfd, correct_payload, 1);
+
+               printf("large data packets followed by a smaller one: ");
+               correct_payload[0] = PAYLOAD_LEN * 1.5;
+               check_recv_pkts(rxfd, correct_payload, 1);
+
+               printf("small data packets followed by a larger one: ");
+               correct_payload[0] = PAYLOAD_LEN / 2;
+               correct_payload[1] = PAYLOAD_LEN;
+               check_recv_pkts(rxfd, correct_payload, 2);
+       } else if (strcmp(testname, "ack") == 0) {
+               printf("duplicate ack and pure ack: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+       } else if (strcmp(testname, "flags") == 0) {
+               correct_payload[0] = PAYLOAD_LEN * 3;
+               correct_payload[1] = PAYLOAD_LEN * 2;
+
+               printf("psh flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               correct_payload[1] = 0;
+               correct_payload[2] = PAYLOAD_LEN * 2;
+               printf("syn flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+
+               printf("rst flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+
+               printf("urg flag ends coalescing: ");
+               check_recv_pkts(rxfd, correct_payload, 3);
+       } else if (strcmp(testname, "tcp") == 0) {
+               correct_payload[0] = PAYLOAD_LEN;
+               correct_payload[1] = PAYLOAD_LEN;
+               correct_payload[2] = PAYLOAD_LEN;
+               correct_payload[3] = PAYLOAD_LEN;
+
+               printf("changed checksum does not coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               printf("Wrong Seq number doesn't coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               printf("Different timestamp doesn't coalesce: ");
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               check_recv_pkts(rxfd, correct_payload, 4);
+
+               printf("Different options doesn't coalesce: ");
+               correct_payload[0] = PAYLOAD_LEN * 2;
+               check_recv_pkts(rxfd, correct_payload, 2);
+       } else if (strcmp(testname, "ip") == 0) {
+               correct_payload[0] = PAYLOAD_LEN;
+               correct_payload[1] = PAYLOAD_LEN;
+
+               printf("different ECN doesn't coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               printf("different tos doesn't coalesce: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               if (proto == PF_INET) {
+                       printf("different ttl doesn't coalesce: ");
+                       check_recv_pkts(rxfd, correct_payload, 2);
+
+                       printf("ip options doesn't coalesce: ");
+                       correct_payload[2] = PAYLOAD_LEN;
+                       check_recv_pkts(rxfd, correct_payload, 3);
+
+                       printf("fragmented ip4 doesn't coalesce: ");
+                       check_recv_pkts(rxfd, correct_payload, 2);
+               } else if (proto == PF_INET6) {
+                       /* GRO doesn't check for ipv6 hop limit when flushing.
+                        * Hence no corresponding test to the ipv4 case.
+                        */
+                       printf("fragmented ip6 doesn't coalesce: ");
+                       correct_payload[0] = PAYLOAD_LEN * 2;
+                       check_recv_pkts(rxfd, correct_payload, 2);
+               }
+       } else if (strcmp(testname, "large") == 0) {
+               int offset = proto == PF_INET ? 20 : 0;
+               int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+               correct_payload[0] = (MAX_PAYLOAD + offset);
+               correct_payload[1] = remainder;
+               printf("Shouldn't coalesce if exceed IP max pkt size: ");
+               check_recv_pkts(rxfd, correct_payload, 2);
+
+               /* last segment sent individually, doesn't start new segment */
+               correct_payload[0] = correct_payload[0] - remainder;
+               correct_payload[1] = remainder + 1;
+               correct_payload[2] = remainder + 1;
+               check_recv_pkts(rxfd, correct_payload, 3);
+       } else {
+               error(1, 0, "Test case error, should never trigger");
+       }
+
+       if (close(rxfd))
+               error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+       static const struct option opts[] = {
+               { "dmac", required_argument, NULL, 'D' },
+               { "iface", required_argument, NULL, 'i' },
+               { "ipv4", no_argument, NULL, '4' },
+               { "ipv6", no_argument, NULL, '6' },
+               { "rx", no_argument, NULL, 'r' },
+               { "smac", required_argument, NULL, 'S' },
+               { "test", required_argument, NULL, 't' },
+               { "verbose", no_argument, NULL, 'v' },
+               { 0, 0, 0, 0 }
+       };
+       int c;
+
+       while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+               switch (c) {
+               case '4':
+                       proto = PF_INET;
+                       ethhdr_proto = htons(ETH_P_IP);
+                       break;
+               case '6':
+                       proto = PF_INET6;
+                       ethhdr_proto = htons(ETH_P_IPV6);
+                       break;
+               case 'D':
+                       dmac = optarg;
+                       break;
+               case 'i':
+                       ifname = optarg;
+                       break;
+               case 'r':
+                       tx_socket = false;
+                       break;
+               case 'S':
+                       smac = optarg;
+                       break;
+               case 't':
+                       testname = optarg;
+                       break;
+               case 'v':
+                       verbose = true;
+                       break;
+               default:
+                       error(1, 0, "%s invalid option %c\n", __func__, c);
+                       break;
+               }
+       }
+}
+
+int main(int argc, char **argv)
+{
+       parse_args(argc, argv);
+
+       if (proto == PF_INET) {
+               tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+               total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+       } else if (proto == PF_INET6) {
+               tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+               total_hdr_len = MAX_HDR_LEN;
+       } else {
+               error(1, 0, "Protocol family is not ipv4 or ipv6");
+       }
+
+       read_MAC(src_mac, smac);
+       read_MAC(dst_mac, dmac);
+
+       if (tx_socket)
+               gro_sender();
+       else
+               gro_receiver();
+       return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh

new file mode 100755 (executable)

index 0000000..794d2bf
--- /dev/null
+++ b/tools/testing/selftests/net/gro.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source setup_loopback.sh
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev="eth0"
+test="all"
+proto="ipv4"
+
+setup_interrupt() {
+  # Use timer on  host to trigger the network stack
+  # Also disable device interrupt to not depend on NIC interrupt
+  # Reduce test flakiness caused by unexpected interrupts
+  echo 100000 >"${FLUSH_PATH}"
+  echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+  # Set up server_ns namespace and client_ns namespace
+  setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
+  setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+  cleanup_macvlan_ns server_ns server client_ns client
+}
+
+setup() {
+  setup_loopback_environment "${dev}"
+  setup_interrupt
+}
+
+cleanup() {
+  cleanup_loopback "${dev}"
+
+  echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+  echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
+
+run_test() {
+  local server_pid=0
+  local exit_code=0
+  local protocol=$1
+  local test=$2
+  local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+  "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+  setup_ns
+  # Each test is run 3 times to deflake, because given the receive timing,
+  # not all packets that should coalesce will be considered in the same flow
+  # on every try.
+  for tries in {1..3}; do
+    # Actual test starts here
+    ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+      1>>log.txt &
+    server_pid=$!
+    sleep 0.5  # to allow for socket init
+    ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+      1>>log.txt
+    wait "${server_pid}"
+    exit_code=$?
+    if [[ "${exit_code}" -eq 0 ]]; then
+        break;
+    fi
+  done
+  cleanup_ns
+  echo ${exit_code}
+}
+
+run_all_tests() {
+  local failed_tests=()
+  for proto in "${PROTOS[@]}"; do
+    for test in "${TESTS[@]}"; do
+      echo "running test ${proto} ${test}" >&2
+      exit_code=$(run_test $proto $test)
+      if [[ "${exit_code}" -ne 0 ]]; then
+        failed_tests+=("${proto}_${test}")
+      fi;
+    done;
+  done
+  if [[ ${#failed_tests[@]} -ne 0 ]]; then
+    echo "failed tests: ${failed_tests[*]}. \
+    Please see log.txt for more logs"
+    exit 1
+  else
+    echo "All Tests Succeeded!"
+  fi;
+}
+
+usage() {
+  echo "Usage: $0 \
+  [-i <DEV>] \
+  [-t data|ack|flags|tcp|ip|large] \
+  [-p <ipv4|ipv6>]" 1>&2;
+  exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+  case "${opt}" in
+    i)
+      dev="${OPTARG}"
+      ;;
+    t)
+      test="${OPTARG}"
+      ;;
+    p)
+      proto="${OPTARG}"
+      ;;
+    *)
+      usage
+      ;;
+  esac
+done
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+  run_all_tests
+else
+  run_test "${proto}" "${test}"
+fi;
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c

index f23438d..3d7dde2 100644 (file)
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -484,13 +484,16 @@ enum desc_type {
         MONITOR_ACQUIRE,
         EXPIRE_STATE,
         EXPIRE_POLICY,
+       SPDINFO_ATTRS,
  };
  const char *desc_name[] = {
         "create tunnel",
         "alloc spi",
         "monitor acquire",
         "expire state",
-       "expire policy"
+       "expire policy",
+       "spdinfo attributes",
+       ""
  };
  struct xfrm_desc {
         enum desc_type  type;
@@ -1593,6 +1596,155 @@ out_close:
         return ret;
  }
  
+static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq,
+               unsigned thresh4_l, unsigned thresh4_r,
+               unsigned thresh6_l, unsigned thresh6_r,
+               bool add_bad_attr)
+
+{
+       struct {
+               struct nlmsghdr         nh;
+               union {
+                       uint32_t        unused;
+                       int             error;
+               };
+               char                    attrbuf[MAX_PAYLOAD];
+       } req;
+       struct xfrmu_spdhthresh thresh;
+
+       memset(&req, 0, sizeof(req));
+       req.nh.nlmsg_len        = NLMSG_LENGTH(sizeof(req.unused));
+       req.nh.nlmsg_type       = XFRM_MSG_NEWSPDINFO;
+       req.nh.nlmsg_flags      = NLM_F_REQUEST | NLM_F_ACK;
+       req.nh.nlmsg_seq        = (*seq)++;
+
+       thresh.lbits = thresh4_l;
+       thresh.rbits = thresh4_r;
+       if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh)))
+               return -1;
+
+       thresh.lbits = thresh6_l;
+       thresh.rbits = thresh6_r;
+       if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh)))
+               return -1;
+
+       if (add_bad_attr) {
+               BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1);
+               if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) {
+                       pr_err("adding attribute failed: no space");
+                       return -1;
+               }
+       }
+
+       if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+               pr_err("send()");
+               return -1;
+       }
+
+       if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+               pr_err("recv()");
+               return -1;
+       } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+               printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+               return -1;
+       }
+
+       if (req.error) {
+               printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+               return -1;
+       }
+
+       return 0;
+}
+
+static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq)
+{
+       struct {
+               struct nlmsghdr                 nh;
+               union {
+                       uint32_t        unused;
+                       int             error;
+               };
+               char                    attrbuf[MAX_PAYLOAD];
+       } req;
+
+       if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) {
+               pr_err("Can't set SPD HTHRESH");
+               return KSFT_FAIL;
+       }
+
+       memset(&req, 0, sizeof(req));
+
+       req.nh.nlmsg_len        = NLMSG_LENGTH(sizeof(req.unused));
+       req.nh.nlmsg_type       = XFRM_MSG_GETSPDINFO;
+       req.nh.nlmsg_flags      = NLM_F_REQUEST;
+       req.nh.nlmsg_seq        = (*seq)++;
+       if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+               pr_err("send()");
+               return KSFT_FAIL;
+       }
+
+       if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+               pr_err("recv()");
+               return KSFT_FAIL;
+       } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) {
+               size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused));
+               struct rtattr *attr = (void *)req.attrbuf;
+               int got_thresh = 0;
+
+               for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
+                       if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) {
+                               struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+                               got_thresh++;
+                               if (t->lbits != 32 || t->rbits != 31) {
+                                       pr_err("thresh differ: %u, %u",
+                                                       t->lbits, t->rbits);
+                                       return KSFT_FAIL;
+                               }
+                       }
+                       if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) {
+                               struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+                               got_thresh++;
+                               if (t->lbits != 120 || t->rbits != 16) {
+                                       pr_err("thresh differ: %u, %u",
+                                                       t->lbits, t->rbits);
+                                       return KSFT_FAIL;
+                               }
+                       }
+               }
+               if (got_thresh != 2) {
+                       pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh);
+                       return KSFT_FAIL;
+               }
+       } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+               printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+               return KSFT_FAIL;
+       } else {
+               printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+               return -1;
+       }
+
+       /* Restore the default */
+       if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) {
+               pr_err("Can't restore SPD HTHRESH");
+               return KSFT_FAIL;
+       }
+
+       /*
+        * At this moment xfrm uses nlmsg_parse_deprecated(), which
+        * implies NL_VALIDATE_LIBERAL - ignoring attributes with
+        * (type > maxtype). nla_parse_depricated_strict() would enforce
+        * it. Or even stricter nla_parse().
+        * Right now it's not expected to fail, but to be ignored.
+        */
+       if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true))
+               return KSFT_PASS;
+
+       return KSFT_PASS;
+}
+
  static int child_serv(int xfrm_sock, uint32_t *seq,
                 unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
  {
@@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
                 case EXPIRE_POLICY:
                         ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
                         break;
+               case SPDINFO_ATTRS:
+                       ret = xfrm_spdinfo_attrs(xfrm_sock, &seq);
+                       break;
                 default:
                         printk("Unknown desc type %d", desc.type);
                         exit(KSFT_FAIL);
@@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto)
   *   sizeof(xfrm_user_polexpire)  = 168  |  sizeof(xfrm_user_polexpire)  = 176
   *
   * Check the affected by the UABI difference structures.
+ * Also, check translation for xfrm_set_spdinfo: it has it's own attributes
+ * which needs to be correctly copied, but not translated.
   */
-const unsigned int compat_plan = 4;
+const unsigned int compat_plan = 5;
  static int write_compat_struct_tests(int test_desc_fd)
  {
         struct xfrm_desc desc = {};
@@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd)
         if (__write_desc(test_desc_fd, &desc))
                 return -1;
  
+       desc.type = SPDINFO_ATTRS;
+       if (__write_desc(test_desc_fd, &desc))
+               return -1;
+
         return 0;
  }
  
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh

index 170be65..1cbfeb5 100755 (executable)
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
  echo "raw gso min size"
  ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
  
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
  echo "raw gso max size"
  ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
  
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh

new file mode 100755 (executable)

index 0000000..0a8ad97
--- /dev/null
+++ b/tools/testing/selftests/net/setup_loopback.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+netdev_check_for_carrier() {
+       local -r dev="$1"
+
+       for i in {1..5}; do
+               carrier="$(cat /sys/class/net/${dev}/carrier)"
+               if [[ "${carrier}" -ne 1 ]] ; then
+                       echo "carrier not ready yet..." >&2
+                       sleep 1
+               else
+                       echo "carrier ready" >&2
+                       break
+               fi
+       done
+       echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+    local dev="$1"
+
+       # Fail hard if cannot turn on loopback mode for current NIC
+       ethtool -K "${dev}" loopback on || exit 1
+       sleep 1
+
+       # Check for the carrier
+       carrier=$(netdev_check_for_carrier ${dev})
+       if [[ "${carrier}" -ne 1 ]] ; then
+               echo "setup_loopback_environment failed"
+               exit 1
+       fi
+}
+
+setup_macvlan_ns(){
+       local -r link_dev="$1"
+       local -r ns_name="$2"
+       local -r ns_dev="$3"
+       local -r ns_mac="$4"
+       local -r addr="$5"
+
+       ip link add link "${link_dev}" dev "${ns_dev}" \
+               address "${ns_mac}" type macvlan
+       exit_code=$?
+       if [[ "${exit_code}" -ne 0 ]]; then
+               echo "setup_macvlan_ns failed"
+               exit $exit_code
+       fi
+
+       [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+       ip link set dev "${ns_dev}" netns "${ns_name}"
+       ip -netns "${ns_name}" link set dev "${ns_dev}" up
+       if [[ -n "${addr}" ]]; then
+               ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+       fi
+
+       sleep 1
+}
+
+cleanup_macvlan_ns(){
+       while (( $# >= 2 )); do
+               ns_name="$1"
+               ns_dev="$2"
+               ip -netns "${ns_name}" link del dev "${ns_dev}"
+               ip netns del "${ns_name}"
+               shift 2
+       done
+}
+
+cleanup_loopback(){
+       local -r dev="$1"
+
+       ethtool -K "${dev}" loopback off
+       sleep 1
+
+       # Check for the carrier
+       carrier=$(netdev_check_for_carrier ${dev})
+       if [[ "${carrier}" -ne 1 ]] ; then
+               echo "setup_loopback_environment failed"
+               exit 1
+       fi
+}
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c

new file mode 100644 (file)

index 0000000..710ac95
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define TOEPLITZ_KEY_MIN_LEN   40
+#define TOEPLITZ_KEY_MAX_LEN   60
+
+#define TOEPLITZ_STR_LEN(K)    (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN   TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN   TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN     ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL      /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport =    8000;
+static int cfg_family =                AF_INET6;
+static char *cfg_ifname =      "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type =          SOCK_STREAM;
+static int cfg_timeout_msec =  1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...)   do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+       int fd;
+       char *mmap;
+       int idx;
+       int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+                               const unsigned char *key)
+{
+       int i, bit, ret = 0;
+       uint32_t key32;
+
+       key32 = ntohl(*((uint32_t *)key));
+       key += 4;
+
+       for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+               for (bit = 7; bit >= 0; bit--) {
+                       if (four_tuple[i] & (1 << bit))
+                               ret ^= key32;
+
+                       key32 <<= 1;
+                       key32 |= !!(key[0] & (1 << bit));
+               }
+               key++;
+       }
+
+       return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+       int queue = rx_hash % cfg_num_queues;
+
+       log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+       if (rx_irq_cpus[queue] != cpu) {
+               log_verbose(". error: rss cpu mismatch (%d)", cpu);
+               frames_error++;
+       }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+       int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+       log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+       if (rps_silo_to_cpu[silo] != cpu) {
+               log_verbose(". error: rps cpu mismatch (%d)", cpu);
+               frames_error++;
+       }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+                      const char *addrs, int addr_len)
+{
+       char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+       uint16_t *ports;
+
+       if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+           !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+               error(1, 0, "address parse error");
+
+       ports = (void *)addrs + (addr_len * 2);
+       log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+                   cpu, rx_hash, saddr, daddr,
+                   ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+       unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+       uint32_t rx_hash_sw;
+       const char *addrs;
+       int addr_len;
+
+       if (cfg_family == AF_INET) {
+               addr_len = sizeof(struct in_addr);
+               addrs = pkt + offsetof(struct iphdr, saddr);
+       } else {
+               addr_len = sizeof(struct in6_addr);
+               addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+       }
+
+       memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+       rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+       if (cfg_verbose)
+               log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+       if (rx_hash != rx_hash_sw) {
+               log_verbose(" != expected 0x%x\n", rx_hash_sw);
+               frames_error++;
+               return;
+       }
+
+       log_verbose(" OK");
+       if (cfg_num_queues)
+               verify_rss(rx_hash, cpu);
+       else if (cfg_num_rps_cpus)
+               verify_rps(rx_hash, cpu);
+       log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+       struct tpacket3_hdr *hdr = (void *)frame;
+
+       if (hdr->hv1.tp_rxhash)
+               verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+                             ring->cpu);
+       else
+               frames_nohash++;
+
+       return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static void recv_block(struct ring_state *ring)
+{
+       struct tpacket_block_desc *block;
+       char *frame;
+       int i;
+
+       block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+       if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+               return;
+
+       frame = (char *)block;
+       frame += block->hdr.bh1.offset_to_first_pkt;
+
+       for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+               frame = recv_frame(ring, frame);
+               frames_received++;
+       }
+
+       block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+       ring->idx = (ring->idx + 1) % ring_block_nr;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+       int i;
+
+       usleep(1000 * cfg_timeout_msec);
+
+       for (i = 0; i < num_cpus; i++)
+               recv_block(&rings[i]);
+
+       fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+               frames_received - frames_nohash - frames_error,
+               frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+       struct tpacket_req3 req3 = {0};
+       void *ring;
+
+       req3.tp_retire_blk_tov = cfg_timeout_msec;
+       req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+       req3.tp_frame_size = 2048;
+       req3.tp_frame_nr = 1 << 10;
+       req3.tp_block_nr = 2;
+
+       req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+       req3.tp_block_size /= req3.tp_block_nr;
+
+       if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+               error(1, errno, "setsockopt PACKET_RX_RING");
+
+       ring_block_sz = req3.tp_block_size;
+       ring_block_nr = req3.tp_block_nr;
+
+       ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+                   PROT_READ | PROT_WRITE,
+                   MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+       if (ring == MAP_FAILED)
+               error(1, 0, "mmap failed");
+
+       return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+               BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, off_proto),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+               BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, off_dport),
+               BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+               BPF_STMT(BPF_RET + BPF_K, 0),
+               BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+       };
+       struct sock_fprog prog = {};
+
+       prog.filter = filter;
+       prog.len = sizeof(filter) / sizeof(struct sock_filter);
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+               error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+       const int off_dport = offsetof(struct tcphdr, dest);    /* same for udp */
+       uint8_t proto;
+
+       proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+       if (cfg_family == AF_INET)
+               __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+                            sizeof(struct iphdr) + off_dport);
+       else
+               __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+                            sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_RET + BPF_K, 0),
+       };
+       struct sock_fprog prog = {};
+
+       prog.filter = filter;
+       prog.len = sizeof(filter) / sizeof(struct sock_filter);
+       if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+               error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+       struct fanout_args args = {
+               .id = 1,
+               .type_flags = PACKET_FANOUT_CPU,
+               .max_num_members = RSS_MAX_CPUS
+       };
+       struct sockaddr_ll ll = { 0 };
+       int fd, val;
+
+       fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+       if (fd == -1)
+               error(1, errno, "socket creation failed");
+
+       val = TPACKET_V3;
+       if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+               error(1, errno, "setsockopt PACKET_VERSION");
+       *ring = setup_ring(fd);
+
+       /* block packets until all rings are added to the fanout group:
+        * else packets can arrive during setup and get misclassified
+        */
+       set_filter_null(fd);
+
+       ll.sll_family = AF_PACKET;
+       ll.sll_ifindex = if_nametoindex(cfg_ifname);
+       ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+                                                 htons(ETH_P_IPV6);
+       if (bind(fd, (void *)&ll, sizeof(ll)))
+               error(1, errno, "bind");
+
+       /* must come after bind: verifies all programs in group match */
+       if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+               /* on failure, retry using old API if that is sufficient:
+                * it has a hard limit of 256 sockets, so only try if
+                * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+                * in this API, the third argument is left implicit.
+                */
+               if (cfg_num_queues || num_cpus > 256 ||
+                   setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+                              &args, sizeof(uint32_t)))
+                       error(1, errno, "setsockopt PACKET_FANOUT cpu");
+       }
+
+       return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+       int fd, val;
+
+       fd = socket(cfg_family, cfg_type, 0);
+       if (fd == -1)
+               error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+       val = 1 << 20;
+       if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+               error(1, errno, "setsockopt rcvbuf");
+
+       return fd;
+}
+
+static void setup_rings(void)
+{
+       int i;
+
+       for (i = 0; i < num_cpus; i++) {
+               rings[i].cpu = i;
+               rings[i].fd = create_ring(&rings[i].mmap);
+       }
+
+       /* accept packets once all rings in the fanout group are up */
+       for (i = 0; i < num_cpus; i++)
+               set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+       int i;
+
+       for (i = 0; i < num_cpus; i++) {
+               if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+                       error(1, errno, "munmap");
+               if (close(rings[i].fd))
+                       error(1, errno, "close");
+       }
+}
+
+static void parse_cpulist(const char *arg)
+{
+       do {
+               rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+               arg = strchr(arg, ',');
+               if (!arg)
+                       break;
+               arg++;                  // skip ','
+       } while (1);
+}
+
+static void show_cpulist(void)
+{
+       int i;
+
+       for (i = 0; i < cfg_num_queues; i++)
+               fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+       int i;
+
+       for (i = 0; i < cfg_num_rps_cpus; i++)
+               fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+       int i, ret, off;
+
+       if (slen < TOEPLITZ_STR_MIN_LEN ||
+           slen > TOEPLITZ_STR_MAX_LEN + 1)
+               error(1, 0, "invalid toeplitz key");
+
+       for (i = 0, off = 0; off < slen; i++, off += 3) {
+               ret = sscanf(str + off, "%hhx", &key[i]);
+               if (ret != 1)
+                       error(1, 0, "key parse error at %d off %d len %d",
+                             i, off, slen);
+       }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+       unsigned long bitmap;
+       int i;
+
+       bitmap = strtoul(arg, NULL, 0);
+
+       if (bitmap & ~(RPS_MAX_CPUS - 1))
+               error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+                     bitmap, RPS_MAX_CPUS - 1);
+
+       for (i = 0; i < RPS_MAX_CPUS; i++)
+               if (bitmap & 1UL << i)
+                       rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+       static struct option long_options[] = {
+           {"dport",   required_argument, 0, 'd'},
+           {"cpus",    required_argument, 0, 'C'},
+           {"key",     required_argument, 0, 'k'},
+           {"iface",   required_argument, 0, 'i'},
+           {"ipv4",    no_argument, 0, '4'},
+           {"ipv6",    no_argument, 0, '6'},
+           {"sink",    no_argument, 0, 's'},
+           {"tcp",     no_argument, 0, 't'},
+           {"timeout", required_argument, 0, 'T'},
+           {"udp",     no_argument, 0, 'u'},
+           {"verbose", no_argument, 0, 'v'},
+           {"rps",     required_argument, 0, 'r'},
+           {0, 0, 0, 0}
+       };
+       bool have_toeplitz = false;
+       int index, c;
+
+       while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+               switch (c) {
+               case '4':
+                       cfg_family = AF_INET;
+                       break;
+               case '6':
+                       cfg_family = AF_INET6;
+                       break;
+               case 'C':
+                       parse_cpulist(optarg);
+                       break;
+               case 'd':
+                       cfg_dport = strtol(optarg, NULL, 0);
+                       break;
+               case 'i':
+                       cfg_ifname = optarg;
+                       break;
+               case 'k':
+                       parse_toeplitz_key(optarg, strlen(optarg),
+                                          toeplitz_key);
+                       have_toeplitz = true;
+                       break;
+               case 'r':
+                       parse_rps_bitmap(optarg);
+                       break;
+               case 's':
+                       cfg_sink = true;
+                       break;
+               case 't':
+                       cfg_type = SOCK_STREAM;
+                       break;
+               case 'T':
+                       cfg_timeout_msec = strtol(optarg, NULL, 0);
+                       break;
+               case 'u':
+                       cfg_type = SOCK_DGRAM;
+                       break;
+               case 'v':
+                       cfg_verbose = true;
+                       break;
+
+               default:
+                       error(1, 0, "unknown option %c", optopt);
+                       break;
+               }
+       }
+
+       if (!have_toeplitz)
+               error(1, 0, "Must supply rss key ('-k')");
+
+       num_cpus = get_nprocs();
+       if (num_cpus > RSS_MAX_CPUS)
+               error(1, 0, "increase RSS_MAX_CPUS");
+
+       if (cfg_num_queues && cfg_num_rps_cpus)
+               error(1, 0,
+                     "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+       if (cfg_verbose) {
+               show_cpulist();
+               show_silos();
+       }
+}
+
+int main(int argc, char **argv)
+{
+       const int min_tests = 10;
+       int fd_sink = -1;
+
+       parse_opts(argc, argv);
+
+       if (cfg_sink)
+               fd_sink = setup_sink();
+
+       setup_rings();
+       process_rings();
+       cleanup_rings();
+
+       if (cfg_sink && close(fd_sink))
+               error(1, errno, "close sink");
+
+       if (frames_received - frames_nohash < min_tests)
+               error(1, 0, "too few frames for verification");
+
+       return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh

new file mode 100755 (executable)

index 0000000..0a49907
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+       echo $(ethtool -x "${DEV}" |
+               egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+               cut -d: -f2- |
+               awk '{$1=$1};1' |
+               tr ' ' '\n' |
+               sort -u |
+               wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+       CPUS=""
+       # sort so that irq 2 is read before irq 10
+       SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+       # Consider only as many queues as RSS actually uses. We assume that
+       # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+       RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+       RXQ_COUNT=0
+
+       for i in ${SORTED_IRQS}
+       do
+               [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+               # lookup relevant IRQs by action name
+               [[ -e "$i/actions" ]] || continue
+               cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+               irqname=$(<"$i/actions")
+
+               # does the IRQ get called
+               irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+               [[ -n "${irqcount}" ]] || continue
+
+               # lookup CPU
+               irq=$(basename "$i")
+               cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+               if [[ -z "${CPUS}" ]]; then
+                       CPUS="${cpu}"
+               else
+                       CPUS="${CPUS},${cpu}"
+               fi
+               RXQ_COUNT=$((RXQ_COUNT+1))
+       done
+
+       echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+       echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+       CMD=""
+       for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+       do
+               CMD="${CMD} echo $1 > ${i};"
+       done
+
+       echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+       echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+       echo "$1"
+       exit 1
+}
+
+check_nic_rxhash_enabled() {
+       local -r pattern="receive-hashing:\ on"
+
+       ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+       local prog=$0
+       shift 1
+
+       while [[ "$1" =~ "-" ]]; do
+               if [[ "$1" = "-irq_prefix" ]]; then
+                       shift
+                       IRQ_PATTERN="^$1-[0-9]*$"
+               elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+                       PROTO_FLAG="$1"
+               elif [[ "$1" = "-4" ]]; then
+                       IP_FLAG="$1"
+                       SERVER_IP="${SERVER_IP4}"
+                       CLIENT_IP="${CLIENT_IP4}"
+               elif [[ "$1" = "-6" ]]; then
+                       IP_FLAG="$1"
+                       SERVER_IP="${SERVER_IP6}"
+                       CLIENT_IP="${CLIENT_IP6}"
+               elif [[ "$1" = "-rss" ]]; then
+                       TEST_RSS=true
+               elif [[ "$1" = "-rps" ]]; then
+                       shift
+                       RPS_MAP="$1"
+               elif [[ "$1" = "-i" ]]; then
+                       shift
+                       DEV="$1"
+               else
+                       die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+                            [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+               fi
+               shift
+       done
+}
+
+setup() {
+       setup_loopback_environment "${DEV}"
+
+       # Set up server_ns namespace and client_ns namespace
+       setup_macvlan_ns "${DEV}" server_ns server \
+       "${SERVER_MAC}" "${SERVER_IP}"
+       setup_macvlan_ns "${DEV}" client_ns client \
+       "${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+       cleanup_macvlan_ns server_ns server client_ns client
+       cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+       # RPS/RFS must be disabled because they move packets between cpus,
+       # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+       eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+         ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+         -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+         -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+       eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+         ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+         -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+         -r "0x${RPS_MAP}" -s -v &
+else
+       ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+         -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+  "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+       echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh

new file mode 100755 (executable)

index 0000000..2fef34f
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz_client.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+       expiration=$((SECONDS+20))
+       while [[ "${SECONDS}" -lt "${expiration}" ]]
+       do
+               if [[ "${PROTO}" == "-u" ]]; then
+                       echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+               else
+                       echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+               fi
+               sleep 0.001
+       done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json

new file mode 100644 (file)

index 0000000..88a20c7
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
@@ -0,0 +1,137 @@
+[
+       {
+           "id": "ce7d",
+           "name": "Add mq Qdisc to multi-queue device (4 queues)",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "0",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "4",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "2f82",
+           "name": "Add mq Qdisc to multi-queue device (256 queues)",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 256\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "0",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "256",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "c525",
+           "name": "Add duplicate mq Qdisc",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: mq"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "4",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "128a",
+           "name": "Delete nonexistent mq Qdisc",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "0",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+       {
+           "id": "03a9",
+           "name": "Delete mq Qdisc twice",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: mq",
+            "$TC qdisc del dev $ETH root handle 1: mq"
+           ],
+           "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "0",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       },
+    {
+           "id": "be0f",
+           "name": "Add mq Qdisc to single-queue device",
+           "category": [
+            "qdisc",
+            "mq"
+           ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+           "setup": [
+            "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+           ],
+           "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+           "expExitCode": "2",
+           "verifyCmd": "$TC qdisc show dev $ETH",
+           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchCount": "0",
+           "teardown": [
+                   "echo \"1\" > /sys/bus/netdevsim/del_device"
+           ]
+       }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py

index cd4a27e..ea04f04 100644 (file)
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -17,6 +17,7 @@ NAMES = {
            'DEV1': 'v0p1',
            'DEV2': '',
            'DUMMY': 'dummy1',
+         'ETH': 'eth0',
            'BATCH_FILE': './batch.txt',
            'BATCH_DIR': 'tmp',
            # Length of time in seconds to wait before terminating a command
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c

index e363bda..2ea438e 100644 (file)
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area)
  
  static void anon_allocate_area(void **alloc_area)
  {
-       if (posix_memalign(alloc_area, page_size, nr_pages * page_size))
-               err("posix_memalign() failed");
+       *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+                          MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+       if (*alloc_area == MAP_FAILED)
+               err("mmap of anonymous memory failed");
  }
  
  static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 9869598..b50dbe2 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -892,6 +892,8 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
  
  static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
  {
+       static DEFINE_MUTEX(kvm_debugfs_lock);
+       struct dentry *dent;
         char dir_name[ITOA_MAX_LEN * 2];
         struct kvm_stat_data *stat_data;
         const struct _kvm_stats_desc *pdesc;
@@ -903,8 +905,20 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
                 return 0;
  
         snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
-       kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+       mutex_lock(&kvm_debugfs_lock);
+       dent = debugfs_lookup(dir_name, kvm_debugfs_dir);
+       if (dent) {
+               pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name);
+               dput(dent);
+               mutex_unlock(&kvm_debugfs_lock);
+               return 0;
+       }
+       dent = debugfs_create_dir(dir_name, kvm_debugfs_dir);
+       mutex_unlock(&kvm_debugfs_lock);
+       if (IS_ERR(dent))
+               return 0;
  
+       kvm->debugfs_dentry = dent;
         kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
                                          sizeof(*kvm->debugfs_stat_data),
                                          GFP_KERNEL_ACCOUNT);
@@ -3110,6 +3124,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
                                         ++vcpu->stat.generic.halt_poll_invalid;
                                 goto out;
                         }
+                       cpu_relax();
                         poll_end = cur = ktime_get();
                 } while (kvm_vcpu_can_poll(cur, stop));
         }
@@ -4390,6 +4405,16 @@ struct compat_kvm_dirty_log {
         };
  };
  
+struct compat_kvm_clear_dirty_log {
+       __u32 slot;
+       __u32 num_pages;
+       __u64 first_page;
+       union {
+               compat_uptr_t dirty_bitmap; /* one bit per page */
+               __u64 padding2;
+       };
+};
+
  static long kvm_vm_compat_ioctl(struct file *filp,
                            unsigned int ioctl, unsigned long arg)
  {
@@ -4399,6 +4424,24 @@ static long kvm_vm_compat_ioctl(struct file *filp,
         if (kvm->mm != current->mm)
                 return -EIO;
         switch (ioctl) {
+#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
+       case KVM_CLEAR_DIRTY_LOG: {
+               struct compat_kvm_clear_dirty_log compat_log;
+               struct kvm_clear_dirty_log log;
+
+               if (copy_from_user(&compat_log, (void __user *)arg,
+                                  sizeof(compat_log)))
+                       return -EFAULT;
+               log.slot         = compat_log.slot;
+               log.num_pages    = compat_log.num_pages;
+               log.first_page   = compat_log.first_page;
+               log.padding2     = compat_log.padding2;
+               log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
+
+               r = kvm_vm_ioctl_clear_dirty_log(kvm, &log);
+               break;
+       }
+#endif
         case KVM_GET_DIRTY_LOG: {
                 struct compat_kvm_dirty_log compat_log;
                 struct kvm_dirty_log log;
@@ -5172,7 +5215,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
         }
         add_uevent_var(env, "PID=%d", kvm->userspace_pid);
  
-       if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
+       if (kvm->debugfs_dentry) {
                 char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
  
                 if (p) {
author	David S. Miller <davem@davemloft.net>
	Wed, 11 Aug 2021 09:22:26 +0000 (10:22 +0100)
committer	David S. Miller <davem@davemloft.net>
	Wed, 11 Aug 2021 09:22:26 +0000 (10:22 +0100)