Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 27 Oct 2018 02:33:41 +0000 (19:33 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 27 Oct 2018 02:33:41 +0000 (19:33 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 27 Oct 2018 02:33:41 +0000 (19:33 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 27 Oct 2018 02:33:41 +0000 (19:33 -0700)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt

index dcd0825..b90fe3b 100644 (file)
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1759,6 +1759,18 @@
                 nobypass        [PPC/POWERNV]
                         Disable IOMMU bypass, using IOMMU for PCI devices.
  
+       iommu.strict=   [ARM64] Configure TLB invalidation behaviour
+                       Format: { "0" | "1" }
+                       0 - Lazy mode.
+                         Request that DMA unmap operations use deferred
+                         invalidation of hardware TLBs, for increased
+                         throughput at the cost of reduced device isolation.
+                         Will fall back to strict mode if not supported by
+                         the relevant IOMMU driver.
+                       1 - Strict mode (default).
+                         DMA unmap operations invalidate IOMMU hardware TLBs
+                         synchronously.
+
         iommu.passthrough=
                         [ARM64] Configure DMA to bypass the IOMMU by default.
                         Format: { "0" | "1" }
@@ -2416,7 +2428,7 @@
                         seconds.  Use this parameter to check at some
                         other rate.  0 disables periodic checking.
  
-       memtest=        [KNL,X86,ARM] Enable memtest
+       memtest=        [KNL,X86,ARM,PPC] Enable memtest
                         Format: <integer>
                         default : 0 <disable>
                         Specifies the number of memtest passes to be
diff --git a/Documentation/devicetree/bindings/arm/al,alpine.txt b/Documentation/devicetree/bindings/arm/al,alpine.txt

index f404a4f..d00debe 100644 (file)
--- a/Documentation/devicetree/bindings/arm/al,alpine.txt
+++ b/Documentation/devicetree/bindings/arm/al,alpine.txt
@@ -14,75 +14,3 @@ compatible: must contain "al,alpine"
  
         ...
  }
-
-* CPU node:
-
-The Alpine platform includes cortex-a15 cores.
-enable-method: must be "al,alpine-smp" to allow smp  [1]
-
-Example:
-
-cpus {
-       #address-cells = <1>;
-       #size-cells = <0>;
-       enable-method = "al,alpine-smp";
-
-       cpu@0 {
-               compatible = "arm,cortex-a15";
-               device_type = "cpu";
-               reg = <0>;
-       };
-
-       cpu@1 {
-               compatible = "arm,cortex-a15";
-               device_type = "cpu";
-               reg = <1>;
-       };
-
-       cpu@2 {
-               compatible = "arm,cortex-a15";
-               device_type = "cpu";
-               reg = <2>;
-       };
-
-       cpu@3 {
-               compatible = "arm,cortex-a15";
-               device_type = "cpu";
-               reg = <3>;
-       };
-};
-
-
-* Alpine CPU resume registers
-
-The CPU resume register are used to define required resume address after
-reset.
-
-Properties:
-- compatible : Should contain "al,alpine-cpu-resume".
-- reg : Offset and length of the register set for the device
-
-Example:
-
-cpu_resume {
-       compatible = "al,alpine-cpu-resume";
-       reg = <0xfbff5ed0 0x30>;
-};
-
-* Alpine System-Fabric Service Registers
-
-The System-Fabric Service Registers allow various operation on CPU and
-system fabric, like powering CPUs off.
-
-Properties:
-- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
-- reg : Offset and length of the register set for the device
-
-Example:
-
-nb_service {
-        compatible = "al,alpine-sysfabric-service", "syscon";
-        reg = <0xfb070000 0x10000>;
-};
-
-[1] arm/cpu-enable-method/al,alpine-smp
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt

index 31220b5..4bf1b4d 100644 (file)
--- a/Documentation/devicetree/bindings/arm/atmel-at91.txt
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt
@@ -70,173 +70,3 @@ compatible: must be one of:
         - "atmel,samv71q19"
         - "atmel,samv71q20"
         - "atmel,samv71q21"
-
-Chipid required properties:
-- compatible: Should be "atmel,sama5d2-chipid"
-- reg : Should contain registers location and length
-
-PIT Timer required properties:
-- compatible: Should be "atmel,at91sam9260-pit"
-- reg: Should contain registers location and length
-- interrupts: Should contain interrupt for the PIT which is the IRQ line
-  shared across all System Controller members.
-
-System Timer (ST) required properties:
-- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
-- reg: Should contain registers location and length
-- interrupts: Should contain interrupt for the ST which is the IRQ line
-  shared across all System Controller members.
-- clocks: phandle to input clock.
-Its subnodes can be:
-- watchdog: compatible should be "atmel,at91rm9200-wdt"
-
-RSTC Reset Controller required properties:
-- compatible: Should be "atmel,<chip>-rstc".
-  <chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-Example:
-
-       rstc@fffffd00 {
-               compatible = "atmel,at91sam9260-rstc";
-               reg = <0xfffffd00 0x10>;
-               clocks = <&clk32k>;
-       };
-
-RAMC SDRAM/DDR Controller required properties:
-- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
-                       "atmel,at91sam9260-sdramc",
-                       "atmel,at91sam9g45-ddramc",
-                       "atmel,sama5d3-ddramc",
-- reg: Should contain registers location and length
-
-Examples:
-
-       ramc0: ramc@ffffe800 {
-               compatible = "atmel,at91sam9g45-ddramc";
-               reg = <0xffffe800 0x200>;
-       };
-
-SHDWC Shutdown Controller
-
-required properties:
-- compatible: Should be "atmel,<chip>-shdwc".
-  <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-optional properties:
-- atmel,wakeup-mode: String, operation mode of the wakeup mode.
-  Supported values are: "none", "high", "low", "any".
-- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
-
-optional at91sam9260 properties:
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9rl properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9x5 properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-
-Example:
-
-       shdwc@fffffd10 {
-               compatible = "atmel,at91sam9260-shdwc";
-               reg = <0xfffffd10 0x10>;
-               clocks = <&clk32k>;
-       };
-
-SHDWC SAMA5D2-Compatible Shutdown Controller
-
-1) shdwc node
-
-required properties:
-- compatible: should be "atmel,sama5d2-shdwc".
-- reg: should contain registers location and length
-- clocks: phandle to input clock.
-- #address-cells: should be one. The cell is the wake-up input index.
-- #size-cells: should be zero.
-
-optional properties:
-
-- debounce-delay-us: minimum wake-up inputs debouncer period in
-  microseconds. It's usually a board-related property.
-- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
-
-The node contains child nodes for each wake-up input that the platform uses.
-
-2) input nodes
-
-Wake-up input nodes are usually described in the "board" part of the Device
-Tree. Note also that input 0 is linked to the wake-up pin and is frequently
-used.
-
-Required properties:
-- reg: should contain the wake-up input index [0 - 15].
-
-Optional properties:
-- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
-  by the child, forces the wake-up of the core power supply on a high level.
-  The default is to be active low.
-
-Example:
-
-On the SoC side:
-       shdwc@f8048010 {
-               compatible = "atmel,sama5d2-shdwc";
-               reg = <0xf8048010 0x10>;
-               clocks = <&clk32k>;
-               #address-cells = <1>;
-               #size-cells = <0>;
-               atmel,wakeup-rtc-timer;
-       };
-
-On the board side:
-       shdwc@f8048010 {
-               debounce-delay-us = <976>;
-
-               input@0 {
-                       reg = <0>;
-               };
-
-               input@1 {
-                       reg = <1>;
-                       atmel,wakeup-active-high;
-               };
-       };
-
-Special Function Registers (SFR)
-
-Special Function Registers (SFR) manage specific aspects of the integrated
-memory, bridge implementations, processor and other functionality not controlled
-elsewhere.
-
-required properties:
-- compatible: Should be "atmel,<chip>-sfr", "syscon" or
-       "atmel,<chip>-sfrbu", "syscon"
-  <chip> can be "sama5d3", "sama5d4" or "sama5d2".
-- reg: Should contain registers location and length
-
-       sfr@f0038000 {
-               compatible = "atmel,sama5d3-sfr", "syscon";
-               reg = <0xf0038000 0x60>;
-       };
-
-Security Module (SECUMOD)
-
-The Security Module macrocell provides all necessary secure functions to avoid
-voltage, temperature, frequency and mechanical attacks on the chip. It also
-embeds secure memories that can be scrambled
-
-required properties:
-- compatible: Should be "atmel,<chip>-secumod", "syscon".
-  <chip> can be "sama5d2".
-- reg: Should contain registers location and length
-
-       secumod@fc040000 {
-               compatible = "atmel,sama5d2-secumod", "syscon";
-               reg = <0xfc040000 0x100>;
-       };
diff --git a/Documentation/devicetree/bindings/arm/atmel-sysregs.txt b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt

new file mode 100644 (file)

index 0000000..4b96608
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/atmel-sysregs.txt
@@ -0,0 +1,171 @@
+Atmel system registers
+
+Chipid required properties:
+- compatible: Should be "atmel,sama5d2-chipid"
+- reg : Should contain registers location and length
+
+PIT Timer required properties:
+- compatible: Should be "atmel,at91sam9260-pit"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the PIT which is the IRQ line
+  shared across all System Controller members.
+
+System Timer (ST) required properties:
+- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the ST which is the IRQ line
+  shared across all System Controller members.
+- clocks: phandle to input clock.
+Its subnodes can be:
+- watchdog: compatible should be "atmel,at91rm9200-wdt"
+
+RSTC Reset Controller required properties:
+- compatible: Should be "atmel,<chip>-rstc".
+  <chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
+- reg: Should contain registers location and length
+- clocks: phandle to input clock.
+
+Example:
+
+       rstc@fffffd00 {
+               compatible = "atmel,at91sam9260-rstc";
+               reg = <0xfffffd00 0x10>;
+               clocks = <&clk32k>;
+       };
+
+RAMC SDRAM/DDR Controller required properties:
+- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
+                       "atmel,at91sam9260-sdramc",
+                       "atmel,at91sam9g45-ddramc",
+                       "atmel,sama5d3-ddramc",
+- reg: Should contain registers location and length
+
+Examples:
+
+       ramc0: ramc@ffffe800 {
+               compatible = "atmel,at91sam9g45-ddramc";
+               reg = <0xffffe800 0x200>;
+       };
+
+SHDWC Shutdown Controller
+
+required properties:
+- compatible: Should be "atmel,<chip>-shdwc".
+  <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
+- reg: Should contain registers location and length
+- clocks: phandle to input clock.
+
+optional properties:
+- atmel,wakeup-mode: String, operation mode of the wakeup mode.
+  Supported values are: "none", "high", "low", "any".
+- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
+
+optional at91sam9260 properties:
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9rl properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9x5 properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+
+Example:
+
+       shdwc@fffffd10 {
+               compatible = "atmel,at91sam9260-shdwc";
+               reg = <0xfffffd10 0x10>;
+               clocks = <&clk32k>;
+       };
+
+SHDWC SAMA5D2-Compatible Shutdown Controller
+
+1) shdwc node
+
+required properties:
+- compatible: should be "atmel,sama5d2-shdwc".
+- reg: should contain registers location and length
+- clocks: phandle to input clock.
+- #address-cells: should be one. The cell is the wake-up input index.
+- #size-cells: should be zero.
+
+optional properties:
+
+- debounce-delay-us: minimum wake-up inputs debouncer period in
+  microseconds. It's usually a board-related property.
+- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
+
+The node contains child nodes for each wake-up input that the platform uses.
+
+2) input nodes
+
+Wake-up input nodes are usually described in the "board" part of the Device
+Tree. Note also that input 0 is linked to the wake-up pin and is frequently
+used.
+
+Required properties:
+- reg: should contain the wake-up input index [0 - 15].
+
+Optional properties:
+- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
+  by the child, forces the wake-up of the core power supply on a high level.
+  The default is to be active low.
+
+Example:
+
+On the SoC side:
+       shdwc@f8048010 {
+               compatible = "atmel,sama5d2-shdwc";
+               reg = <0xf8048010 0x10>;
+               clocks = <&clk32k>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+               atmel,wakeup-rtc-timer;
+       };
+
+On the board side:
+       shdwc@f8048010 {
+               debounce-delay-us = <976>;
+
+               input@0 {
+                       reg = <0>;
+               };
+
+               input@1 {
+                       reg = <1>;
+                       atmel,wakeup-active-high;
+               };
+       };
+
+Special Function Registers (SFR)
+
+Special Function Registers (SFR) manage specific aspects of the integrated
+memory, bridge implementations, processor and other functionality not controlled
+elsewhere.
+
+required properties:
+- compatible: Should be "atmel,<chip>-sfr", "syscon" or
+       "atmel,<chip>-sfrbu", "syscon"
+  <chip> can be "sama5d3", "sama5d4" or "sama5d2".
+- reg: Should contain registers location and length
+
+       sfr@f0038000 {
+               compatible = "atmel,sama5d3-sfr", "syscon";
+               reg = <0xf0038000 0x60>;
+       };
+
+Security Module (SECUMOD)
+
+The Security Module macrocell provides all necessary secure functions to avoid
+voltage, temperature, frequency and mechanical attacks on the chip. It also
+embeds secure memories that can be scrambled
+
+required properties:
+- compatible: Should be "atmel,<chip>-secumod", "syscon".
+  <chip> can be "sama5d2".
+- reg: Should contain registers location and length
+
+       secumod@fc040000 {
+               compatible = "atmel,sama5d2-secumod", "syscon";
+               reg = <0xfc040000 0x100>;
+       };
diff --git a/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp b/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp

index c2e0cc5..35e5afb 100644 (file)
--- a/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
+++ b/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
@@ -14,7 +14,28 @@ Related properties:  (none)
  
  Note:
  This enable method requires valid nodes compatible with
-"al,alpine-cpu-resume" and "al,alpine-nb-service"[1].
+"al,alpine-cpu-resume" and "al,alpine-nb-service".
+
+
+* Alpine CPU resume registers
+
+The CPU resume register are used to define required resume address after
+reset.
+
+Properties:
+- compatible : Should contain "al,alpine-cpu-resume".
+- reg : Offset and length of the register set for the device
+
+
+* Alpine System-Fabric Service Registers
+
+The System-Fabric Service Registers allow various operation on CPU and
+system fabric, like powering CPUs off.
+
+Properties:
+- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
+- reg : Offset and length of the register set for the device
+
  
  Example:
  
@@ -48,5 +69,12 @@ cpus {
         };
  };
  
---
-[1] arm/al,alpine.txt
+cpu_resume {
+       compatible = "al,alpine-cpu-resume";
+       reg = <0xfbff5ed0 0x30>;
+};
+
+nb_service {
+        compatible = "al,alpine-sysfabric-service", "syscon";
+        reg = <0xfb070000 0x10000>;
+};
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt

index 96dfccc..b0198a1 100644 (file)
--- a/Documentation/devicetree/bindings/arm/cpus.txt
+++ b/Documentation/devicetree/bindings/arm/cpus.txt
@@ -276,7 +276,7 @@ described below.
                 Usage: optional
                 Value type: <prop-encoded-array>
                 Definition: A u32 value that represents the running time dynamic
-                           power coefficient in units of mW/MHz/uV^2. The
+                           power coefficient in units of uW/MHz/V^2. The
                             coefficient can either be calculated from power
                             measurements or derived by analysis.
  
@@ -287,7 +287,7 @@ described below.
  
                             Pdyn = dynamic-power-coefficient * V^2 * f
  
-                           where voltage is in uV, frequency is in MHz.
+                           where voltage is in V, frequency is in MHz.
  
  Example 1 (dual-cluster big.LITTLE system 32-bit):
  
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt

new file mode 100644 (file)

index 0000000..b5cb374
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-dcfg.txt
@@ -0,0 +1,19 @@
+Freescale DCFG
+
+DCFG is the device configuration unit, that provides general purpose
+configuration and status for the device. Such as setting the secondary
+core start address and release the secondary core from holdoff and startup.
+
+Required properties:
+  - compatible: Should contain a chip-specific compatible string,
+       Chip-specific strings are of the form "fsl,<chip>-dcfg",
+       The following <chip>s are known to be supported:
+       ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
+
+  - reg : should contain base address and length of DCFG memory-mapped registers
+
+Example:
+       dcfg: dcfg@1ee0000 {
+               compatible = "fsl,ls1021a-dcfg";
+               reg = <0x0 0x1ee0000 0x0 0x10000>;
+       };
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt

new file mode 100644 (file)

index 0000000..0ab67b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,layerscape-scfg.txt
@@ -0,0 +1,19 @@
+Freescale SCFG
+
+SCFG is the supplemental configuration unit, that provides SoC specific
+configuration and status registers for the chip. Such as getting PEX port
+status.
+
+Required properties:
+  - compatible: Should contain a chip-specific compatible string,
+       Chip-specific strings are of the form "fsl,<chip>-scfg",
+       The following <chip>s are known to be supported:
+       ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
+
+  - reg: should contain base address and length of SCFG memory-mapped registers
+
+Example:
+       scfg: scfg@1570000 {
+               compatible = "fsl,ls1021a-scfg";
+               reg = <0x0 0x1570000 0x0 0x10000>;
+       };
diff --git a/Documentation/devicetree/bindings/arm/fsl.txt b/Documentation/devicetree/bindings/arm/fsl.txt

index 8a1baa2..1e775aa 100644 (file)
--- a/Documentation/devicetree/bindings/arm/fsl.txt
+++ b/Documentation/devicetree/bindings/arm/fsl.txt
@@ -101,45 +101,6 @@ Freescale LS1021A Platform Device Tree Bindings
  Required root node compatible properties:
    - compatible = "fsl,ls1021a";
  
-Freescale SoC-specific Device Tree Bindings
--------------------------------------------
-
-Freescale SCFG
-  SCFG is the supplemental configuration unit, that provides SoC specific
-configuration and status registers for the chip. Such as getting PEX port
-status.
-  Required properties:
-  - compatible: Should contain a chip-specific compatible string,
-       Chip-specific strings are of the form "fsl,<chip>-scfg",
-       The following <chip>s are known to be supported:
-       ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
-  - reg: should contain base address and length of SCFG memory-mapped registers
-
-Example:
-       scfg: scfg@1570000 {
-               compatible = "fsl,ls1021a-scfg";
-               reg = <0x0 0x1570000 0x0 0x10000>;
-       };
-
-Freescale DCFG
-  DCFG is the device configuration unit, that provides general purpose
-configuration and status for the device. Such as setting the secondary
-core start address and release the secondary core from holdoff and startup.
-  Required properties:
-  - compatible: Should contain a chip-specific compatible string,
-       Chip-specific strings are of the form "fsl,<chip>-dcfg",
-       The following <chip>s are known to be supported:
-       ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
-  - reg : should contain base address and length of DCFG memory-mapped registers
-
-Example:
-       dcfg: dcfg@1ee0000 {
-               compatible = "fsl,ls1021a-dcfg";
-               reg = <0x0 0x1ee0000 0x0 0x10000>;
-       };
-
  Freescale ARMv8 based Layerscape SoC family Device Tree Bindings
  ----------------------------------------------------------------
  
diff --git a/Documentation/devicetree/bindings/arm/secure.txt b/Documentation/devicetree/bindings/arm/secure.txt

index e31303f..f27bbff 100644 (file)
--- a/Documentation/devicetree/bindings/arm/secure.txt
+++ b/Documentation/devicetree/bindings/arm/secure.txt
@@ -32,7 +32,8 @@ describe the view of Secure world using the standard bindings. These
  secure- bindings only need to be used where both the Secure and Normal
  world views need to be described in a single device tree.
  
-Valid Secure world properties:
+Valid Secure world properties
+-----------------------------
  
  - secure-status : specifies whether the device is present and usable
    in the secure world. The combination of this with "status" allows
@@ -51,3 +52,19 @@ Valid Secure world properties:
     status = "disabled"; secure-status = "okay";     /* S-only */
     status = "disabled";                             /* disabled in both */
     status = "disabled"; secure-status = "disabled"; /* disabled in both */
+
+The secure-chosen node
+----------------------
+
+Similar to the /chosen node which serves as a place for passing data
+between firmware and the operating system, the /secure-chosen node may
+be used to pass data to the Secure OS. Only the properties defined
+below may appear in the /secure-chosen node.
+
+- stdout-path : specifies the device to be used by the Secure OS for
+  its console output. The syntax is the same as for /chosen/stdout-path.
+  If the /secure-chosen node exists but the stdout-path property is not
+  present, the Secure OS should not perform any console output. If
+  /secure-chosen does not exist, the Secure OS should use the value of
+  /chosen/stdout-path instead (that is, use the same device as the
+  Normal world OS).
diff --git a/Documentation/devicetree/bindings/arm/zte,sysctrl.txt b/Documentation/devicetree/bindings/arm/zte,sysctrl.txt

new file mode 100644 (file)

index 0000000..7e66b7f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/zte,sysctrl.txt
@@ -0,0 +1,30 @@
+ZTE sysctrl Registers
+
+Registers for 'zte,zx296702' SoC:
+
+System management required properties:
+      - compatible = "zte,sysctrl"
+
+Low power management required properties:
+      - compatible = "zte,zx296702-pcu"
+
+Bus matrix required properties:
+      - compatible = "zte,zx-bus-matrix"
+
+
+Registers for 'zte,zx296718' SoC:
+
+System management required properties:
+      - compatible = "zte,zx296718-aon-sysctrl"
+      - compatible = "zte,zx296718-sysctrl"
+
+Example:
+aon_sysctrl: aon-sysctrl@116000 {
+       compatible = "zte,zx296718-aon-sysctrl", "syscon";
+       reg = <0x116000 0x1000>;
+};
+
+sysctrl: sysctrl@1463000 {
+       compatible = "zte,zx296718-sysctrl", "syscon";
+       reg = <0x1463000 0x1000>;
+};
diff --git a/Documentation/devicetree/bindings/arm/zte.txt b/Documentation/devicetree/bindings/arm/zte.txt

index 8336978..3406127 100644 (file)
--- a/Documentation/devicetree/bindings/arm/zte.txt
+++ b/Documentation/devicetree/bindings/arm/zte.txt
@@ -1,20 +1,10 @@
  ZTE platforms device tree bindings
----------------------------------------
  
+---------------------------------------
  -  ZX296702 board:
      Required root node properties:
        - compatible = "zte,zx296702-ad1", "zte,zx296702"
  
-System management required properties:
-      - compatible = "zte,sysctrl"
-
-Low power management required properties:
-      - compatible = "zte,zx296702-pcu"
-
-Bus matrix required properties:
-      - compatible = "zte,zx-bus-matrix"
-
-
  ---------------------------------------
  -  ZX296718 SoC:
      Required root node properties:
@@ -22,18 +12,3 @@ Bus matrix required properties:
  
  ZX296718 EVB board:
        - "zte,zx296718-evb"
-
-System management required properties:
-      - compatible = "zte,zx296718-aon-sysctrl"
-      - compatible = "zte,zx296718-sysctrl"
-
-Example:
-aon_sysctrl: aon-sysctrl@116000 {
-       compatible = "zte,zx296718-aon-sysctrl", "syscon";
-       reg = <0x116000 0x1000>;
-};
-
-sysctrl: sysctrl@1463000 {
-       compatible = "zte,zx296718-sysctrl", "syscon";
-       reg = <0x1463000 0x1000>;
-};
diff --git a/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt b/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt

index 78d2db9..d28fd1a 100644 (file)
--- a/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt
+++ b/Documentation/devicetree/bindings/crypto/hisilicon,hip07-sec.txt
@@ -24,7 +24,7 @@ Optional properties:
  
  Example:
  
-p1_sec_a: crypto@400,d2000000 {
+p1_sec_a: crypto@400d2000000 {
         compatible = "hisilicon,hip07-sec";
         reg = <0x400 0xd0000000 0x0 0x10000
                0x400 0xd2000000 0x0 0x10000
diff --git a/Documentation/devicetree/bindings/fpga/fpga-region.txt b/Documentation/devicetree/bindings/fpga/fpga-region.txt

index 6db8aed..90c4469 100644 (file)
--- a/Documentation/devicetree/bindings/fpga/fpga-region.txt
+++ b/Documentation/devicetree/bindings/fpga/fpga-region.txt
@@ -415,7 +415,7 @@ DT Overlay contains:
                         firmware-name = "base.rbf";
  
                         fpga-bridge@4400 {
-                               compatible = "altr,freeze-bridge";
+                               compatible = "altr,freeze-bridge-controller";
                                 reg = <0x4400 0x10>;
  
                                 fpga_region1: fpga-region1 {
@@ -427,7 +427,7 @@ DT Overlay contains:
                         };
  
                         fpga-bridge@4420 {
-                               compatible = "altr,freeze-bridge";
+                               compatible = "altr,freeze-bridge-controller";
                                 reg = <0x4420 0x10>;
  
                                 fpga_region2: fpga-region2 {
diff --git a/Documentation/devicetree/bindings/i2c/i2c.txt b/Documentation/devicetree/bindings/i2c/i2c.txt

index 1126398..44efafd 100644 (file)
--- a/Documentation/devicetree/bindings/i2c/i2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c.txt
@@ -84,7 +84,7 @@ Binding may contain optional "interrupts" property, describing interrupts
  used by the device. I2C core will assign "irq" interrupt (or the very first
  interrupt if not using interrupt names) as primary interrupt for the slave.
  
-Alternatively, devices supporting SMbus Host Notify, and connected to
+Alternatively, devices supporting SMBus Host Notify, and connected to
  adapters that support this feature, may use "host-notify" property. I2C
  core will create a virtual interrupt for Host Notify and assign it as
  primary interrupt for the slave.
diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt

index c6e2d85..377ee63 100644 (file)
--- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
+++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
@@ -12,6 +12,7 @@ Required Properties:
  
      - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
      - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU.
+    - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU.
      - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU.
      - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
      - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
diff --git a/Documentation/devicetree/bindings/mfd/arizona.txt b/Documentation/devicetree/bindings/mfd/arizona.txt

index 9b62831..148ef62 100644 (file)
--- a/Documentation/devicetree/bindings/mfd/arizona.txt
+++ b/Documentation/devicetree/bindings/mfd/arizona.txt
@@ -76,7 +76,7 @@ Deprecated properties:
  Also see child specific device properties:
    Regulator - ../regulator/arizona-regulator.txt
    Extcon    - ../extcon/extcon-arizona.txt
-  Sound     - ../sound/arizona.txt
+  Sound     - ../sound/wlf,arizona.txt
  
  Example:
  
diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt

index 6611a7c..01fdc33 100644 (file)
--- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
@@ -9,6 +9,25 @@ blocks that can be used to create functional hardware objects/devices
  such as network interfaces, crypto accelerator instances, L2 switches,
  etc.
  
+For an overview of the DPAA2 architecture and fsl-mc bus see:
+Documentation/networking/dpaa2/overview.rst
+
+As described in the above overview, all DPAA2 objects in a DPRC share the
+same hardware "isolation context" and a 10-bit value called an ICID
+(isolation context id) is expressed by the hardware to identify
+the requester.
+
+The generic 'iommus' property is insufficient to describe the relationship
+between ICIDs and IOMMUs, so an iommu-map property is used to define
+the set of possible ICIDs under a root DPRC and how they map to
+an IOMMU.
+
+For generic IOMMU bindings, see
+Documentation/devicetree/bindings/iommu/iommu.txt.
+
+For arm-smmu binding, see:
+Documentation/devicetree/bindings/iommu/arm,smmu.txt.
+
  Required properties:
  
      - compatible
@@ -88,14 +107,34 @@ Sub-nodes:
                Value type: <phandle>
                Definition: Specifies the phandle to the PHY device node associated
                            with the this dpmac.
+Optional properties:
+
+- iommu-map: Maps an ICID to an IOMMU and associated iommu-specifier
+  data.
+
+  The property is an arbitrary number of tuples of
+  (icid-base,iommu,iommu-base,length).
+
+  Any ICID i in the interval [icid-base, icid-base + length) is
+  associated with the listed IOMMU, with the iommu-specifier
+  (i - icid-base + iommu-base).
  
  Example:
  
+        smmu: iommu@5000000 {
+               compatible = "arm,mmu-500";
+               #iommu-cells = <1>;
+               stream-match-mask = <0x7C00>;
+               ...
+        };
+
          fsl_mc: fsl-mc@80c000000 {
                  compatible = "fsl,qoriq-mc";
                  reg = <0x00000008 0x0c000000 0 0x40>,    /* MC portal base */
                        <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
                  msi-parent = <&its>;
+                /* define map for ICIDs 23-64 */
+                iommu-map = <23 &smmu 23 41>;
                  #address-cells = <3>;
                  #size-cells = <1>;
  
diff --git a/Documentation/devicetree/bindings/misc/lwn-bk4.txt b/Documentation/devicetree/bindings/misc/lwn-bk4.txt

new file mode 100644 (file)

index 0000000..d6a8c18
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/lwn-bk4.txt
@@ -0,0 +1,26 @@
+* Liebherr's BK4 controller external SPI
+
+A device which handles data acquisition from compatible industrial
+peripherals.
+The SPI is used for data and management purposes in both master and
+slave modes.
+
+Required properties:
+
+- compatible : Should be "lwn,bk4"
+
+Required SPI properties:
+
+- reg : Should be address of the device chip select within
+  the controller.
+
+- spi-max-frequency : Maximum SPI clocking speed of device in Hz, should be
+  30MHz at most for the Liebherr's BK4 external bus.
+
+Example:
+
+spidev0: spi@0 {
+       compatible = "lwn,bk4";
+       spi-max-frequency = <30000000>;
+       reg = <0>;
+};
diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt

index 94a7f33..cc43728 100644 (file)
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -3,6 +3,7 @@ Renesas R-Car CAN controller Device Tree Bindings
  
  Required properties:
  - compatible: "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC.
+             "renesas,can-r8a7744" if CAN controller is a part of R8A7744 SoC.
               "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
               "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
               "renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
diff --git a/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt b/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt

index eeb9e18..4f0879a 100644 (file)
--- a/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt
+++ b/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt
@@ -5,6 +5,7 @@ This file provides information on what the device node for the R-Car generation
  
  Required properties:
  - compatible: "renesas,usb-phy-r8a7743" if the device is a part of R8A7743 SoC.
+             "renesas,usb-phy-r8a7744" if the device is a part of R8A7744 SoC.
               "renesas,usb-phy-r8a7745" if the device is a part of R8A7745 SoC.
               "renesas,usb-phy-r8a7790" if the device is a part of R8A7790 SoC.
               "renesas,usb-phy-r8a7791" if the device is a part of R8A7791 SoC.
diff --git a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt

index 5e1afc3..1ab1d10 100644 (file)
--- a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
+++ b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
@@ -5,7 +5,7 @@ Please also refer to reset.txt in this directory for common reset
  controller binding usage.
  
  Required properties:
-- compatible: Should be "fsl,imx7-src", "syscon"
+- compatible: Should be "fsl,imx7d-src", "syscon"
  - reg: should be register base and length as documented in the
    datasheet
  - interrupts: Should contain SRC interrupt
diff --git a/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt b/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt

index 290ec06..0273a92 100644 (file)
--- a/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt
+++ b/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt
@@ -6,8 +6,7 @@ interrupt signal and status register to identify high PMIC die temperature.
  
  Required properties:
  - compatible:      Should contain "qcom,spmi-temp-alarm".
-- reg:             Specifies the SPMI address and length of the controller's
-                   registers.
+- reg:             Specifies the SPMI address.
  - interrupts:      PMIC temperature alarm interrupt.
  - #thermal-sensor-cells: Should be 0. See thermal.txt for a description.
  
@@ -20,7 +19,7 @@ Example:
  
         pm8941_temp: thermal-alarm@2400 {
                 compatible = "qcom,spmi-temp-alarm";
-               reg = <0x2400 0x100>;
+               reg = <0x2400>;
                 interrupts = <0 0x24 0 IRQ_TYPE_EDGE_RISING>;
                 #thermal-sensor-cells = <0>;
  
@@ -36,19 +35,14 @@ Example:
                         thermal-sensors = <&pm8941_temp>;
  
                         trips {
-                               passive {
-                                       temperature = <1050000>;
+                               stage1 {
+                                       temperature = <105000>;
                                         hysteresis = <2000>;
                                         type = "passive";
                                 };
-                               alert {
+                               stage2 {
                                         temperature = <125000>;
                                         hysteresis = <2000>;
-                                       type = "hot";
-                               };
-                               crit {
-                                       temperature = <145000>;
-                                       hysteresis = <2000>;
                                         type = "critical";
                                 };
                         };
diff --git a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt

index 20ca4ef..04cbb90 100644 (file)
--- a/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/qoriq-thermal.txt
@@ -1,9 +1,9 @@
  * Thermal Monitoring Unit (TMU) on Freescale QorIQ SoCs
  
  Required properties:
-- compatible : Must include "fsl,qoriq-tmu". The version of the device is
-       determined by the TMU IP Block Revision Register (IPBRR0) at
-       offset 0x0BF8.
+- compatible : Must include "fsl,qoriq-tmu" or "fsl,imx8mq-tmu". The
+       version of the device is determined by the TMU IP Block Revision
+       Register (IPBRR0) at offset 0x0BF8.
         Table of correspondences between IPBRR0 values and example  chips:
                 Value           Device
                 ----------      -----
diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt

index cfa154b..ad9a435 100644 (file)
--- a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.txt
@@ -7,9 +7,11 @@ inside the LSI.
  Required properties:
  - compatible           : "renesas,<soctype>-thermal",
                           Examples with soctypes are:
+                           - "renesas,r8a774a1-thermal" (RZ/G2M)
                             - "renesas,r8a7795-thermal" (R-Car H3)
                             - "renesas,r8a7796-thermal" (R-Car M3-W)
                             - "renesas,r8a77965-thermal" (R-Car M3-N)
+                           - "renesas,r8a77980-thermal" (R-Car V3H)
  - reg                  : Address ranges of the thermal registers. Each sensor
                           needs one address range. Sorting must be done in
                           increasing order according to datasheet, i.e.
@@ -19,7 +21,8 @@ Required properties:
  
  Optional properties:
  
-- interrupts           : interrupts routed to the TSC (3 for H3, M3-W and M3-N)
+- interrupts           : interrupts routed to the TSC (3 for H3, M3-W, M3-N,
+                         and V3H)
  - power-domain         : Must contain a reference to the power domain. This
                           property is mandatory if the thermal sensor instance
                           is part of a controllable power domain.
diff --git a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt

index 67c563f..73e1613 100644 (file)
--- a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
@@ -4,15 +4,17 @@ Required properties:
  - compatible           : "renesas,thermal-<soctype>",
                            "renesas,rcar-gen2-thermal" (with thermal-zone) or
                            "renesas,rcar-thermal" (without thermal-zone) as
-                           fallback except R-Car D3.
+                           fallback except R-Car V3M/D3.
                           Examples with soctypes are:
                             - "renesas,thermal-r8a73a4" (R-Mobile APE6)
                             - "renesas,thermal-r8a7743" (RZ/G1M)
+                           - "renesas,thermal-r8a7744" (RZ/G1N)
                             - "renesas,thermal-r8a7779" (R-Car H1)
                             - "renesas,thermal-r8a7790" (R-Car H2)
                             - "renesas,thermal-r8a7791" (R-Car M2-W)
                             - "renesas,thermal-r8a7792" (R-Car V2H)
                             - "renesas,thermal-r8a7793" (R-Car M2-N)
+                           - "renesas,thermal-r8a77970" (R-Car V3M)
                             - "renesas,thermal-r8a77995" (R-Car D3)
  - reg                  : Address range of the thermal registers.
                           The 1st reg will be recognized as common register
@@ -21,7 +23,7 @@ Required properties:
  Option properties:
  
  - interrupts           : If present should contain 3 interrupts for
-                          R-Car D3 or 1 interrupt otherwise.
+                          R-Car V3M/D3 or 1 interrupt otherwise.
  
  Example (non interrupt support):
  
diff --git a/Documentation/devicetree/bindings/thermal/stm32-thermal.txt b/Documentation/devicetree/bindings/thermal/stm32-thermal.txt

new file mode 100644 (file)

index 0000000..8c0d5a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/stm32-thermal.txt
@@ -0,0 +1,61 @@
+Binding for Thermal Sensor for STMicroelectronics STM32 series of SoCs.
+
+On STM32 SoCs, the Digital Temperature Sensor (DTS) is in charge of managing an
+analog block which delivers a frequency depending on the internal SoC's
+temperature. By using a reference frequency, DTS is able to provide a sample
+number which can be translated into a temperature by the user.
+
+DTS provides interrupt notification mechanism by threshold. This mechanism
+offers two temperature trip points: passive and critical. The first is intended
+for passive cooling notification while the second is used for over-temperature
+reset.
+
+Required parameters:
+-------------------
+
+compatible:    Should be "st,stm32-thermal"
+reg:           This should be the physical base address and length of the
+               sensor's registers.
+clocks:        Phandle of the clock used by the thermal sensor.
+                 See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+clock-names:   Should be "pclk" for register access clock and reference clock.
+                 See: Documentation/devicetree/bindings/resource-names.txt
+#thermal-sensor-cells: Should be 0. See ./thermal.txt for a description.
+interrupts:    Standard way to define interrupt number.
+
+Example:
+
+       thermal-zones {
+               cpu_thermal: cpu-thermal {
+                       polling-delay-passive = <0>;
+                       polling-delay = <0>;
+
+                       thermal-sensors = <&thermal>;
+
+                       trips {
+                               cpu_alert1: cpu-alert1 {
+                                       temperature = <85000>;
+                                       hysteresis = <0>;
+                                       type = "passive";
+                               };
+
+                               cpu-crit: cpu-crit {
+                                       temperature = <120000>;
+                                       hysteresis = <0>;
+                                       type = "critical";
+                               };
+                       };
+
+                       cooling-maps {
+                       };
+               };
+       };
+
+       thermal: thermal@50028000 {
+               compatible = "st,stm32-thermal";
+               reg = <0x50028000 0x100>;
+               clocks = <&rcc TMPSENS>;
+               clock-names = "pclk";
+               #thermal-sensor-cells = <0>;
+               interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+       };
diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt

index eb7ee91..ca14ba9 100644 (file)
--- a/Documentation/devicetree/bindings/thermal/thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/thermal.txt
@@ -152,7 +152,7 @@ Optional property:
    Elem size: one cell  the sensors listed in the thermal-sensors property.
    Elem type: signed    Coefficients defaults to 1, in case this property
                         is not specified. A simple linear polynomial is used:
-                       Z = c0 * x0 + c1 + x1 + ... + c(n-1) * x(n-1) + cn.
+                       Z = c0 * x0 + c1 * x1 + ... + c(n-1) * x(n-1) + cn.
  
                         The coefficients are ordered and they match with sensors
                         by means of sensor ID. Additional coefficients are
diff --git a/Documentation/devicetree/bindings/timer/renesas,cmt.txt b/Documentation/devicetree/bindings/timer/renesas,cmt.txt

index 49948fc..3399267 100644 (file)
--- a/Documentation/devicetree/bindings/timer/renesas,cmt.txt
+++ b/Documentation/devicetree/bindings/timer/renesas,cmt.txt
@@ -24,6 +24,8 @@ Required Properties:
      - "renesas,r8a73a4-cmt1" for the 48-bit CMT1 device included in r8a73a4.
      - "renesas,r8a7743-cmt0" for the 32-bit CMT0 device included in r8a7743.
      - "renesas,r8a7743-cmt1" for the 48-bit CMT1 device included in r8a7743.
+    - "renesas,r8a7744-cmt0" for the 32-bit CMT0 device included in r8a7744.
+    - "renesas,r8a7744-cmt1" for the 48-bit CMT1 device included in r8a7744.
      - "renesas,r8a7745-cmt0" for the 32-bit CMT0 device included in r8a7745.
      - "renesas,r8a7745-cmt1" for the 48-bit CMT1 device included in r8a7745.
      - "renesas,r8a7790-cmt0" for the 32-bit CMT0 device included in r8a7790.
diff --git a/Documentation/devicetree/bindings/timer/renesas,ostm.txt b/Documentation/devicetree/bindings/timer/renesas,ostm.txt

index be3ae0f..81a78f8 100644 (file)
--- a/Documentation/devicetree/bindings/timer/renesas,ostm.txt
+++ b/Documentation/devicetree/bindings/timer/renesas,ostm.txt
@@ -9,7 +9,8 @@ Channels are independent from each other.
  Required Properties:
  
    - compatible: must be one or more of the following:
-    - "renesas,r7s72100-ostm" for the r7s72100 OSTM
+    - "renesas,r7s72100-ostm" for the R7S72100 (RZ/A1) OSTM
+    - "renesas,r7s9210-ostm" for the R7S9210 (RZ/A2) OSTM
      - "renesas,ostm" for any OSTM
                 This is a fallback for the above renesas,*-ostm entries
  
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt

index f26bf66..376f244 100644 (file)
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -127,6 +127,7 @@ everspin    Everspin Technologies, Inc.
  exar   Exar Corporation
  excito Excito
  ezchip EZchip Semiconductor
+facebook       Facebook
  fairphone      Fairphone B.V.
  faraday        Faraday Technology Corporation
  fastrax        Fastrax Oy
@@ -275,6 +276,7 @@ nxp NXP Semiconductors
  okaya  Okaya Electric America, Inc.
  oki    Oki Electric Industry Co., Ltd.
  olimex OLIMEX Ltd.
+olpc   One Laptop Per Child
  onion  Onion Corporation
  onnn   ON Semiconductor Corp.
  ontat  On Tat Industrial Company
diff --git a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt

index 9407212..d72d118 100644 (file)
--- a/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/renesas-wdt.txt
@@ -6,6 +6,7 @@ Required properties:
                 version.
                Examples with soctypes are:
                  - "renesas,r8a7743-wdt" (RZ/G1M)
+                - "renesas,r8a7744-wdt" (RZ/G1N)
                  - "renesas,r8a7745-wdt" (RZ/G1E)
                  - "renesas,r8a774a1-wdt" (RZ/G2M)
                  - "renesas,r8a7790-wdt" (R-Car H2)
diff --git a/Makefile b/Makefile

index 2fc5732..7d4ba51 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1063,7 +1063,7 @@ include/config/kernel.release: $(srctree)/Makefile FORCE
  # Carefully list dependencies so we do not try to build scripts twice
  # in parallel
  PHONY += scripts
-scripts: scripts_basic asm-generic gcc-plugins $(autoksyms_h)
+scripts: scripts_basic scripts_dtc asm-generic gcc-plugins $(autoksyms_h)
         $(Q)$(MAKE) $(build)=$(@)
  
  # Things we need to do before we recursively start building the kernel
@@ -1212,6 +1212,35 @@ kselftest-merge:
                 $(srctree)/tools/testing/selftests/*/config
         +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
  
+# ---------------------------------------------------------------------------
+# Devicetree files
+
+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/boot/dts/),)
+dtstree := arch/$(SRCARCH)/boot/dts
+endif
+
+ifneq ($(dtstree),)
+
+%.dtb: prepare3 scripts_dtc
+       $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
+
+PHONY += dtbs dtbs_install
+dtbs: prepare3 scripts_dtc
+       $(Q)$(MAKE) $(build)=$(dtstree)
+
+dtbs_install:
+       $(Q)$(MAKE) $(dtbinst)=$(dtstree)
+
+ifdef CONFIG_OF_EARLY_FLATTREE
+all: dtbs
+endif
+
+endif
+
+PHONY += scripts_dtc
+scripts_dtc: scripts_basic
+       $(Q)$(MAKE) $(build)=scripts/dtc
+
  # ---------------------------------------------------------------------------
  # Modules
  
@@ -1421,6 +1450,12 @@ help:
         @echo  '  kselftest-merge - Merge all the config dependencies of kselftest to existing'
         @echo  '                    .config.'
         @echo  ''
+       @$(if $(dtstree), \
+               echo 'Devicetree:'; \
+               echo '* dtbs            - Build device tree blobs for enabled boards'; \
+               echo '  dtbs_install    - Install dtbs to $(INSTALL_DTBS_PATH)'; \
+               echo '')
+
         @echo 'Userspace tools targets:'
         @echo '  use "make tools/help"'
         @echo '  or  "cd tools; make help"'
diff --git a/arch/arc/Makefile b/arch/arc/Makefile

index 644815c..c64c505 100644 (file)
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -102,11 +102,5 @@ boot_targets += uImage uImage.bin uImage.gz
  $(boot_targets): vmlinux
         $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
  
-%.dtb %.dtb.S %.dtb.o: scripts
-       $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-dtbs: scripts
-       $(Q)$(MAKE) $(build)=$(boot)/dts
-
  archclean:
         $(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm/Makefile b/arch/arm/Makefile

index 5c91e00..05a91d8 100644 (file)
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -303,12 +303,7 @@ else
  KBUILD_IMAGE := $(boot)/zImage
  endif
  
-# Build the DT binary blobs if we have OF configured
-ifeq ($(CONFIG_USE_OF),y)
-KBUILD_DTBS := dtbs
-endif
-
-all:   $(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS)
+all:   $(notdir $(KBUILD_IMAGE))
  
  
  archheaders:
@@ -335,17 +330,6 @@ $(BOOT_TARGETS): vmlinux
  $(INSTALL_TARGETS):
         $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
  
-%.dtb: | scripts
-       $(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) $(boot)/dts/$@
-
-PHONY += dtbs dtbs_install
-
-dtbs: prepare scripts
-       $(Q)$(MAKE) $(build)=$(boot)/dts
-
-dtbs_install:
-       $(Q)$(MAKE) $(dtbinst)=$(boot)/dts
-
  PHONY += vdso_install
  vdso_install:
  ifeq ($(CONFIG_VDSO),y)
@@ -367,8 +351,6 @@ define archhelp
    echo  '  uImage        - U-Boot wrapped zImage'
    echo  '  bootpImage    - Combined zImage and initial RAM disk'
    echo  '                  (supply initrd image via make variable INITRD=<path>)'
-  echo  '* dtbs          - Build device tree blobs for enabled boards'
-  echo  '  dtbs_install  - Install dtbs to $(INSTALL_DTBS_PATH)'
    echo  '  install       - Install uncompressed kernel'
    echo  '  zinstall      - Install compressed kernel'
    echo  '  uinstall      - Install U-Boot wrapped compressed kernel'
diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h

index 0743781..b36c028 100644 (file)
--- a/arch/arm/boot/compressed/libfdt_env.h
+++ b/arch/arm/boot/compressed/libfdt_env.h
@@ -6,6 +6,8 @@
  #include <linux/string.h>
  #include <asm/byteorder.h>
  
+#define INT_MAX                        ((int)(~0U>>1))
+
  typedef __be16 fdt16_t;
  typedef __be32 fdt32_t;
  typedef __be64 fdt64_t;
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c

index ecaa68d..13bcd3b 100644 (file)
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -87,14 +87,11 @@ void __init arm_dt_init_cpu_maps(void)
         if (!cpus)
                 return;
  
-       for_each_child_of_node(cpus, cpu) {
+       for_each_of_cpu_node(cpu) {
                 const __be32 *cell;
                 int prop_bytes;
                 u32 hwid;
  
-               if (of_node_cmp(cpu->type, "cpu"))
-                       continue;
-
                 pr_debug(" * %pOF...\n", cpu);
                 /*
                  * A device tree containing CPU nodes with missing "reg"
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c

index 24ac3ca..60e375c 100644 (file)
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -94,12 +94,6 @@ static void __init parse_dt_topology(void)
         __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
                                  GFP_NOWAIT);
  
-       cn = of_find_node_by_path("/cpus");
-       if (!cn) {
-               pr_err("No CPU information found in DT\n");
-               return;
-       }
-
         for_each_possible_cpu(cpu) {
                 const u32 *rate;
                 int len;
diff --git a/arch/arm/mach-shmobile/pm-rcar-gen2.c b/arch/arm/mach-shmobile/pm-rcar-gen2.c

index 345af3e..7efe95b 100644 (file)
--- a/arch/arm/mach-shmobile/pm-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/pm-rcar-gen2.c
@@ -50,7 +50,7 @@ void __init rcar_gen2_pm_init(void)
         void __iomem *p;
         u32 bar;
         static int once;
-       struct device_node *np, *cpus;
+       struct device_node *np;
         bool has_a7 = false;
         bool has_a15 = false;
         struct resource res;
@@ -59,11 +59,7 @@ void __init rcar_gen2_pm_init(void)
         if (once++)
                 return;
  
-       cpus = of_find_node_by_path("/cpus");
-       if (!cpus)
-               return;
-
-       for_each_child_of_node(cpus, np) {
+       for_each_of_cpu_node(np) {
                 if (of_device_is_compatible(np, "arm,cortex-a15"))
                         has_a15 = true;
                 else if (of_device_is_compatible(np, "arm,cortex-a7"))
diff --git a/arch/arm/mach-shmobile/pm-rmobile.c b/arch/arm/mach-shmobile/pm-rmobile.c

index e348bcf..94fdeef 100644 (file)
--- a/arch/arm/mach-shmobile/pm-rmobile.c
+++ b/arch/arm/mach-shmobile/pm-rmobile.c
@@ -202,7 +202,7 @@ static void __init get_special_pds(void)
         const struct of_device_id *id;
  
         /* PM domains containing CPUs */
-       for_each_node_by_type(np, "cpu")
+       for_each_of_cpu_node(np)
                 add_special_pd(np, PD_CPU);
  
         /* PM domain containing console */
diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c

index 828e8ae..e48b093 100644 (file)
--- a/arch/arm/mach-shmobile/timer.c
+++ b/arch/arm/mach-shmobile/timer.c
@@ -22,22 +22,16 @@
  
  void __init shmobile_init_delay(void)
  {
-       struct device_node *np, *cpus;
+       struct device_node *np;
         u32 max_freq = 0;
  
-       cpus = of_find_node_by_path("/cpus");
-       if (!cpus)
-               return;
-
-       for_each_child_of_node(cpus, np) {
+       for_each_of_cpu_node(np) {
                 u32 freq;
  
                 if (!of_property_read_u32(np, "clock-frequency", &freq))
                         max_freq = max(max_freq, freq);
         }
  
-       of_node_put(cpus);
-
         if (!max_freq)
                 return;
  
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index c03cd0d..964f682 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -11,6 +11,8 @@ config ARM64
         select ARCH_CLOCKSOURCE_DATA
         select ARCH_HAS_DEBUG_VIRTUAL
         select ARCH_HAS_DEVMEM_IS_ALLOWED
+       select ARCH_HAS_DMA_COHERENT_TO_PFN
+       select ARCH_HAS_DMA_MMAP_PGPROT
         select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
         select ARCH_HAS_ELF_RANDOMIZE
         select ARCH_HAS_FAST_MULTIPLIER
@@ -24,6 +26,8 @@ config ARM64
         select ARCH_HAS_SG_CHAIN
         select ARCH_HAS_STRICT_KERNEL_RWX
         select ARCH_HAS_STRICT_MODULE_RWX
+       select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+       select ARCH_HAS_SYNC_DMA_FOR_CPU
         select ARCH_HAS_SYSCALL_WRAPPER
         select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
         select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile

index 106039d..b4e994c 100644 (file)
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -113,9 +113,8 @@ core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
  # Default target when executing plain make
  boot           := arch/arm64/boot
  KBUILD_IMAGE   := $(boot)/Image.gz
-KBUILD_DTBS    := dtbs
  
-all:   Image.gz $(KBUILD_DTBS)
+all:   Image.gz
  
  
  Image: vmlinux
@@ -127,17 +126,6 @@ Image.%: Image
  zinstall install:
         $(Q)$(MAKE) $(build)=$(boot) $@
  
-%.dtb: scripts
-       $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-PHONY += dtbs dtbs_install
-
-dtbs: prepare scripts
-       $(Q)$(MAKE) $(build)=$(boot)/dts
-
-dtbs_install:
-       $(Q)$(MAKE) $(dtbinst)=$(boot)/dts
-
  PHONY += vdso_install
  vdso_install:
         $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@
@@ -145,7 +133,6 @@ vdso_install:
  # We use MRPROPER_FILES and CLEAN_FILES now
  archclean:
         $(Q)$(MAKE) $(clean)=$(boot)
-       $(Q)$(MAKE) $(clean)=$(boot)/dts
  
  # We need to generate vdso-offsets.h before compiling certain files in kernel/.
  # In order to do that, we should use the archprepare target, but we can't since
@@ -160,8 +147,6 @@ vdso_prepare: prepare0
  define archhelp
    echo  '* Image.gz      - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
    echo  '  Image         - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
-  echo  '* dtbs          - Build device tree blobs for enabled boards'
-  echo  '  dtbs_install  - Install dtbs to $(INSTALL_DTBS_PATH)'
    echo  '  install       - Install uncompressed kernel'
    echo  '  zinstall      - Install compressed kernel'
    echo  '                  Install using (your) ~/bin/installkernel or'
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi

index 8cb78dd..90c0faf 100644 (file)
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
@@ -148,6 +148,7 @@
                 #address-cells = <2>;
                 #size-cells = <2>;
                 ranges;
+               dma-ranges = <0x0 0x0 0x0 0x0 0x10000 0x00000000>;
  
                 clockgen: clocking@1300000 {
                         compatible = "fsl,ls2080a-clockgen";
@@ -321,6 +322,8 @@
                         reg = <0x00000008 0x0c000000 0 0x40>,    /* MC portal base */
                               <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
                         msi-parent = <&its>;
+                       iommu-map = <0 &smmu 0 0>;      /* This is fixed-up by u-boot */
+                       dma-coherent;
                         #address-cells = <3>;
                         #size-cells = <1>;
  
@@ -424,6 +427,9 @@
                         compatible = "arm,mmu-500";
                         reg = <0 0x5000000 0 0x800000>;
                         #global-interrupts = <12>;
+                       #iommu-cells = <1>;
+                       stream-match-mask = <0x7C00>;
+                       dma-coherent;
                         interrupts = <0 13 4>, /* global secure fault */
                                      <0 14 4>, /* combined secure interrupt */
                                      <0 15 4>, /* global non-secure fault */
@@ -466,7 +472,6 @@
                                      <0 204 4>, <0 205 4>,
                                      <0 206 4>, <0 207 4>,
                                      <0 208 4>, <0 209 4>;
-                       mmu-masters = <&fsl_mc 0x300 0>;
                 };
  
                 dspi: dspi@2100000 {
diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h

index 5a5fa47..3dd3d66 100644 (file)
--- a/arch/arm64/include/asm/device.h
+++ b/arch/arm64/include/asm/device.h
@@ -23,7 +23,6 @@ struct dev_archdata {
  #ifdef CONFIG_XEN
         const struct dma_map_ops *dev_dma_ops;
  #endif
-       bool dma_coherent;
  };
  
  struct pdev_archdata {
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h

index b7847eb..c41f3fb 100644 (file)
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -44,10 +44,13 @@ void arch_teardown_dma_ops(struct device *dev);
  #define arch_teardown_dma_ops  arch_teardown_dma_ops
  #endif
  
-/* do not use this function in a driver */
+/*
+ * Do not use this function in a driver, it is only provided for
+ * arch/arm/mm/xen.c, which is used by arm64 as well.
+ */
  static inline bool is_device_dma_coherent(struct device *dev)
  {
-       return dev->archdata.dma_coherent;
+       return dev->dma_coherent;
  }
  
  #endif /* __KERNEL__ */
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c

index 25fcd22..96b8f2f 100644 (file)
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -602,7 +602,7 @@ static void __init of_parse_and_init_cpus(void)
  {
         struct device_node *dn;
  
-       for_each_node_by_type(dn, "cpu") {
+       for_each_of_cpu_node(dn) {
                 u64 hwid = of_get_cpu_mpidr(dn);
  
                 if (hwid == INVALID_HWID)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c

index 072c51f..d190612 100644 (file)
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -25,6 +25,7 @@
  #include <linux/slab.h>
  #include <linux/genalloc.h>
  #include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
  #include <linux/dma-contiguous.h>
  #include <linux/vmalloc.h>
  #include <linux/swiotlb.h>
@@ -32,16 +33,6 @@
  
  #include <asm/cacheflush.h>
  
-static int swiotlb __ro_after_init;
-
-static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
-                                bool coherent)
-{
-       if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
-               return pgprot_writecombine(prot);
-       return prot;
-}
-
  static struct gen_pool *atomic_pool __ro_after_init;
  
  #define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
@@ -91,18 +82,16 @@ static int __free_from_pool(void *start, size_t size)
         return 1;
  }
  
-static void *__dma_alloc(struct device *dev, size_t size,
-                        dma_addr_t *dma_handle, gfp_t flags,
-                        unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+               gfp_t flags, unsigned long attrs)
  {
         struct page *page;
         void *ptr, *coherent_ptr;
-       bool coherent = is_device_dma_coherent(dev);
-       pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
+       pgprot_t prot = pgprot_writecombine(PAGE_KERNEL);
  
         size = PAGE_ALIGN(size);
  
-       if (!coherent && !gfpflags_allow_blocking(flags)) {
+       if (!gfpflags_allow_blocking(flags)) {
                 struct page *page = NULL;
                 void *addr = __alloc_from_pool(size, &page, flags);
  
@@ -112,14 +101,10 @@ static void *__dma_alloc(struct device *dev, size_t size,
                 return addr;
         }
  
-       ptr = swiotlb_alloc(dev, size, dma_handle, flags, attrs);
+       ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
         if (!ptr)
                 goto no_mem;
  
-       /* no need for non-cacheable mapping if coherent */
-       if (coherent)
-               return ptr;
-
         /* remove any dirty cache lines on the kernel alias */
         __dma_flush_area(ptr, size);
  
@@ -133,130 +118,57 @@ static void *__dma_alloc(struct device *dev, size_t size,
         return coherent_ptr;
  
  no_map:
-       swiotlb_free(dev, size, ptr, *dma_handle, attrs);
+       dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
  no_mem:
         return NULL;
  }
  
-static void __dma_free(struct device *dev, size_t size,
-                      void *vaddr, dma_addr_t dma_handle,
-                      unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+               dma_addr_t dma_handle, unsigned long attrs)
  {
-       void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+       if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) {
+               void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
  
-       size = PAGE_ALIGN(size);
-
-       if (!is_device_dma_coherent(dev)) {
-               if (__free_from_pool(vaddr, size))
-                       return;
                 vunmap(vaddr);
+               dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
         }
-       swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs);
  }
  
-static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
-                                    unsigned long offset, size_t size,
-                                    enum dma_data_direction dir,
-                                    unsigned long attrs)
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+               dma_addr_t dma_addr)
  {
-       dma_addr_t dev_addr;
-
-       dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
-       if (!is_device_dma_coherent(dev) &&
-           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-               __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-
-       return dev_addr;
-}
-
-
-static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
-                                size_t size, enum dma_data_direction dir,
-                                unsigned long attrs)
-{
-       if (!is_device_dma_coherent(dev) &&
-           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-               __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-       swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+       return __phys_to_pfn(dma_to_phys(dev, dma_addr));
  }
  
-static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
-                                 int nelems, enum dma_data_direction dir,
-                                 unsigned long attrs)
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+               unsigned long attrs)
  {
-       struct scatterlist *sg;
-       int i, ret;
-
-       ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
-       if (!is_device_dma_coherent(dev) &&
-           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-               for_each_sg(sgl, sg, ret, i)
-                       __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-                                      sg->length, dir);
-
-       return ret;
-}
-
-static void __swiotlb_unmap_sg_attrs(struct device *dev,
-                                    struct scatterlist *sgl, int nelems,
-                                    enum dma_data_direction dir,
-                                    unsigned long attrs)
-{
-       struct scatterlist *sg;
-       int i;
-
-       if (!is_device_dma_coherent(dev) &&
-           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-               for_each_sg(sgl, sg, nelems, i)
-                       __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-                                        sg->length, dir);
-       swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+       if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE))
+               return pgprot_writecombine(prot);
+       return prot;
  }
  
-static void __swiotlb_sync_single_for_cpu(struct device *dev,
-                                         dma_addr_t dev_addr, size_t size,
-                                         enum dma_data_direction dir)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+               size_t size, enum dma_data_direction dir)
  {
-       if (!is_device_dma_coherent(dev))
-               __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-       swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
+       __dma_map_area(phys_to_virt(paddr), size, dir);
  }
  
-static void __swiotlb_sync_single_for_device(struct device *dev,
-                                            dma_addr_t dev_addr, size_t size,
-                                            enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+               size_t size, enum dma_data_direction dir)
  {
-       swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
-       if (!is_device_dma_coherent(dev))
-               __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+       __dma_unmap_area(phys_to_virt(paddr), size, dir);
  }
  
-static void __swiotlb_sync_sg_for_cpu(struct device *dev,
-                                     struct scatterlist *sgl, int nelems,
-                                     enum dma_data_direction dir)
+static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
+                                     struct page *page, size_t size)
  {
-       struct scatterlist *sg;
-       int i;
-
-       if (!is_device_dma_coherent(dev))
-               for_each_sg(sgl, sg, nelems, i)
-                       __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-                                        sg->length, dir);
-       swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
-}
+       int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
  
-static void __swiotlb_sync_sg_for_device(struct device *dev,
-                                        struct scatterlist *sgl, int nelems,
-                                        enum dma_data_direction dir)
-{
-       struct scatterlist *sg;
-       int i;
+       if (!ret)
+               sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
  
-       swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
-       if (!is_device_dma_coherent(dev))
-               for_each_sg(sgl, sg, nelems, i)
-                       __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
-                                      sg->length, dir);
+       return ret;
  }
  
  static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
@@ -277,74 +189,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
         return ret;
  }
  
-static int __swiotlb_mmap(struct device *dev,
-                         struct vm_area_struct *vma,
-                         void *cpu_addr, dma_addr_t dma_addr, size_t size,
-                         unsigned long attrs)
-{
-       int ret;
-       unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
-
-       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
-                                            is_device_dma_coherent(dev));
-
-       if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
-               return ret;
-
-       return __swiotlb_mmap_pfn(vma, pfn, size);
-}
-
-static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
-                                     struct page *page, size_t size)
-{
-       int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
-
-       if (!ret)
-               sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
-
-       return ret;
-}
-
-static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
-                                void *cpu_addr, dma_addr_t handle, size_t size,
-                                unsigned long attrs)
-{
-       struct page *page = phys_to_page(dma_to_phys(dev, handle));
-
-       return __swiotlb_get_sgtable_page(sgt, page, size);
-}
-
-static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
-{
-       if (swiotlb)
-               return swiotlb_dma_supported(hwdev, mask);
-       return 1;
-}
-
-static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
-{
-       if (swiotlb)
-               return swiotlb_dma_mapping_error(hwdev, addr);
-       return 0;
-}
-
-static const struct dma_map_ops arm64_swiotlb_dma_ops = {
-       .alloc = __dma_alloc,
-       .free = __dma_free,
-       .mmap = __swiotlb_mmap,
-       .get_sgtable = __swiotlb_get_sgtable,
-       .map_page = __swiotlb_map_page,
-       .unmap_page = __swiotlb_unmap_page,
-       .map_sg = __swiotlb_map_sg_attrs,
-       .unmap_sg = __swiotlb_unmap_sg_attrs,
-       .sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
-       .sync_single_for_device = __swiotlb_sync_single_for_device,
-       .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
-       .sync_sg_for_device = __swiotlb_sync_sg_for_device,
-       .dma_supported = __swiotlb_dma_supported,
-       .mapping_error = __swiotlb_dma_mapping_error,
-};
-
  static int __init atomic_pool_init(void)
  {
         pgprot_t prot = __pgprot(PROT_NORMAL_NC);
@@ -500,10 +344,6 @@ EXPORT_SYMBOL(dummy_dma_ops);
  
  static int __init arm64_dma_init(void)
  {
-       if (swiotlb_force == SWIOTLB_FORCE ||
-           max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
-               swiotlb = 1;
-
         WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
                    TAINT_CPU_OUT_OF_SPEC,
                    "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
@@ -528,7 +368,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
                                  dma_addr_t *handle, gfp_t gfp,
                                  unsigned long attrs)
  {
-       bool coherent = is_device_dma_coherent(dev);
+       bool coherent = dev_is_dma_coherent(dev);
         int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
         size_t iosize = size;
         void *addr;
@@ -569,7 +409,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
                         addr = NULL;
                 }
         } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
-               pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+               pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
                 struct page *page;
  
                 page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
@@ -596,7 +436,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
                                                     size >> PAGE_SHIFT);
                 }
         } else {
-               pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+               pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
                 struct page **pages;
  
                 pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
@@ -658,8 +498,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
         struct vm_struct *area;
         int ret;
  
-       vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
-                                            is_device_dma_coherent(dev));
+       vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
  
         if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
                 return ret;
@@ -709,11 +548,11 @@ static void __iommu_sync_single_for_cpu(struct device *dev,
  {
         phys_addr_t phys;
  
-       if (is_device_dma_coherent(dev))
+       if (dev_is_dma_coherent(dev))
                 return;
  
-       phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
-       __dma_unmap_area(phys_to_virt(phys), size, dir);
+       phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
+       arch_sync_dma_for_cpu(dev, phys, size, dir);
  }
  
  static void __iommu_sync_single_for_device(struct device *dev,
@@ -722,11 +561,11 @@ static void __iommu_sync_single_for_device(struct device *dev,
  {
         phys_addr_t phys;
  
-       if (is_device_dma_coherent(dev))
+       if (dev_is_dma_coherent(dev))
                 return;
  
-       phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
-       __dma_map_area(phys_to_virt(phys), size, dir);
+       phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
+       arch_sync_dma_for_device(dev, phys, size, dir);
  }
  
  static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
@@ -734,13 +573,13 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
                                    enum dma_data_direction dir,
                                    unsigned long attrs)
  {
-       bool coherent = is_device_dma_coherent(dev);
+       bool coherent = dev_is_dma_coherent(dev);
         int prot = dma_info_to_prot(dir, coherent, attrs);
         dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
  
-       if (!iommu_dma_mapping_error(dev, dev_addr) &&
-           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
-               __iommu_sync_single_for_device(dev, dev_addr, size, dir);
+       if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+           !iommu_dma_mapping_error(dev, dev_addr))
+               __dma_map_area(page_address(page) + offset, size, dir);
  
         return dev_addr;
  }
@@ -762,11 +601,11 @@ static void __iommu_sync_sg_for_cpu(struct device *dev,
         struct scatterlist *sg;
         int i;
  
-       if (is_device_dma_coherent(dev))
+       if (dev_is_dma_coherent(dev))
                 return;
  
         for_each_sg(sgl, sg, nelems, i)
-               __dma_unmap_area(sg_virt(sg), sg->length, dir);
+               arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
  }
  
  static void __iommu_sync_sg_for_device(struct device *dev,
@@ -776,18 +615,18 @@ static void __iommu_sync_sg_for_device(struct device *dev,
         struct scatterlist *sg;
         int i;
  
-       if (is_device_dma_coherent(dev))
+       if (dev_is_dma_coherent(dev))
                 return;
  
         for_each_sg(sgl, sg, nelems, i)
-               __dma_map_area(sg_virt(sg), sg->length, dir);
+               arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
  }
  
  static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
                                 int nelems, enum dma_data_direction dir,
                                 unsigned long attrs)
  {
-       bool coherent = is_device_dma_coherent(dev);
+       bool coherent = dev_is_dma_coherent(dev);
  
         if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
                 __iommu_sync_sg_for_device(dev, sgl, nelems, dir);
@@ -879,9 +718,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
                         const struct iommu_ops *iommu, bool coherent)
  {
         if (!dev->dma_ops)
-               dev->dma_ops = &arm64_swiotlb_dma_ops;
+               dev->dma_ops = &swiotlb_dma_ops;
  
-       dev->archdata.dma_coherent = coherent;
+       dev->dma_coherent = coherent;
         __iommu_setup_dma_ops(dev, dma_base, size, iommu);
  
  #ifdef CONFIG_XEN
diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile

index 3fe8a94..b7aa854 100644 (file)
--- a/arch/c6x/Makefile
+++ b/arch/c6x/Makefile
@@ -40,9 +40,7 @@ boot := arch/$(ARCH)/boot
  DTB:=$(subst dtbImage.,,$(filter dtbImage.%, $(MAKECMDGOALS)))
  export DTB
  
-ifneq ($(DTB),)
  core-y += $(boot)/dts/
-endif
  
  # With make 3.82 we cannot mix normal and wildcard targets
  
diff --git a/arch/c6x/boot/dts/Makefile b/arch/c6x/boot/dts/Makefile

index b212d27..f438285 100644 (file)
--- a/arch/c6x/boot/dts/Makefile
+++ b/arch/c6x/boot/dts/Makefile
@@ -5,15 +5,12 @@
  
  DTC_FLAGS ?= -p 1024
  
+dtb-$(CONFIG_SOC_TMS320C6455) += dsk6455.dtb
+dtb-$(CONFIG_SOC_TMS320C6457) += evmc6457.dtb
+dtb-$(CONFIG_SOC_TMS320C6472) += evmc6472.dtb
+dtb-$(CONFIG_SOC_TMS320C6474) += evmc6474.dtb
+dtb-$(CONFIG_SOC_TMS320C6678) += evmc6678.dtb
+
  ifneq ($(DTB),)
-obj-y += linked_dtb.o
+obj-y += $(DTB).dtb.o
  endif
-
-quiet_cmd_cp = CP      $< $@$2
-       cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
-
-# Generate builtin.dtb from $(DTB).dtb
-$(obj)/builtin.dtb: $(obj)/$(DTB).dtb
-       $(call if_changed,cp)
-
-$(obj)/linked_dtb.o: $(obj)/builtin.dtb
diff --git a/arch/c6x/boot/dts/linked_dtb.S b/arch/c6x/boot/dts/linked_dtb.S

deleted file mode 100644 (file)

index cf347f1..0000000
--- a/arch/c6x/boot/dts/linked_dtb.S
+++ /dev/null
@@ -1,2 +0,0 @@
-.section __fdt_blob,"a"
-.incbin "arch/c6x/boot/dts/builtin.dtb"
diff --git a/arch/c6x/include/asm/sections.h b/arch/c6x/include/asm/sections.h

index d6c591a..dc2f15e 100644 (file)
--- a/arch/c6x/include/asm/sections.h
+++ b/arch/c6x/include/asm/sections.h
@@ -8,6 +8,5 @@ extern char _vectors_start[];
  extern char _vectors_end[];
  
  extern char _data_lma[];
-extern char _fdt_start[], _fdt_end[];
  
  #endif /* _ASM_C6X_SECTIONS_H */
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c

index 786e36e..05d96a9 100644 (file)
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c
@@ -96,7 +96,7 @@ static void __init get_cpuinfo(void)
         unsigned long core_khz;
         u64 tmp;
         struct cpuinfo_c6x *p;
-       struct device_node *node, *np;
+       struct device_node *node;
  
         p = &per_cpu(cpu_data, smp_processor_id());
  
@@ -190,13 +190,8 @@ static void __init get_cpuinfo(void)
  
         p->core_id = get_coreid();
  
-       node = of_find_node_by_name(NULL, "cpus");
-       if (node) {
-               for_each_child_of_node(node, np)
-                       if (!strcmp("cpu", np->name))
-                               ++c6x_num_cores;
-               of_node_put(node);
-       }
+       for_each_of_cpu_node(node)
+               ++c6x_num_cores;
  
         node = of_find_node_by_name(NULL, "soc");
         if (node) {
@@ -270,7 +265,7 @@ int __init c6x_add_memory(phys_addr_t start, unsigned long size)
  notrace void __init machine_init(unsigned long dt_ptr)
  {
         void *dtb = __va(dt_ptr);
-       void *fdt = _fdt_start;
+       void *fdt = __dtb_start;
  
         /* interrupts must be masked */
         set_creg(IER, 2);
@@ -363,7 +358,7 @@ void __init setup_arch(char **cmdline_p)
                                          memory_end >> PAGE_SHIFT);
         memblock_reserve(memory_start, bootmap_size);
  
-       unflatten_device_tree();
+       unflatten_and_copy_device_tree();
  
         c6x_cache_init();
  
diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S

index 1fba5b4..584bab2 100644 (file)
--- a/arch/c6x/kernel/vmlinux.lds.S
+++ b/arch/c6x/kernel/vmlinux.lds.S
@@ -90,16 +90,6 @@ SECTIONS
                 *(.switch)
         }
  
-       . = ALIGN (8) ;
-       __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET)
-       {
-               _fdt_start = . ;        /* place for fdt blob */
-               *(__fdt_blob) ;         /* Any link-placed DTB */
-               BYTE(0);                /* section always has contents */
-               . = _fdt_start + 0x4000;        /* Pad up to 16kbyte */
-               _fdt_end = . ;
-       }
-
         _etext = .;
  
         /*
diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile

index 58634e6..4003ddc 100644 (file)
--- a/arch/h8300/Makefile
+++ b/arch/h8300/Makefile
@@ -31,21 +31,12 @@ CROSS_COMPILE := h8300-unknown-linux-
  endif
  
  core-y += arch/$(ARCH)/kernel/ arch/$(ARCH)/mm/
-ifneq '$(CONFIG_H8300_BUILTIN_DTB)' '""'
-core-y += arch/h8300/boot/dts/
-endif
+core-y += arch/$(ARCH)/boot/dts/
  
  libs-y += arch/$(ARCH)/lib/
  
  boot := arch/h8300/boot
  
-%.dtb %.dtb.S %.dtb.o: | scripts
-       $(Q)$(MAKE) $(build)=arch/h8300/boot/dts arch/h8300/boot/dts/$@
-
-PHONY += dtbs
-dtbs: scripts
-       $(Q)$(MAKE) $(build)=arch/h8300/boot/dts
-
  archmrproper:
  
  archclean:
diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c

index 1b083c5..ebb3b6d 100644 (file)
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -37,35 +37,6 @@
  static void (*rom_reset)(void);
  
  #ifdef CONFIG_ADB_CUDA
-static time64_t cuda_read_time(void)
-{
-       struct adb_request req;
-       time64_t time;
-
-       if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
-               return 0;
-       while (!req.complete)
-               cuda_poll();
-
-       time = (u32)((req.reply[3] << 24) | (req.reply[4] << 16) |
-                    (req.reply[5] << 8) | req.reply[6]);
-
-       return time - RTC_OFFSET;
-}
-
-static void cuda_write_time(time64_t time)
-{
-       struct adb_request req;
-       u32 data = lower_32_bits(time + RTC_OFFSET);
-
-       if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
-                        (data >> 24) & 0xFF, (data >> 16) & 0xFF,
-                        (data >> 8) & 0xFF, data & 0xFF) < 0)
-               return;
-       while (!req.complete)
-               cuda_poll();
-}
-
  static __u8 cuda_read_pram(int offset)
  {
         struct adb_request req;
@@ -91,33 +62,6 @@ static void cuda_write_pram(int offset, __u8 data)
  #endif /* CONFIG_ADB_CUDA */
  
  #ifdef CONFIG_ADB_PMU
-static time64_t pmu_read_time(void)
-{
-       struct adb_request req;
-       time64_t time;
-
-       if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
-               return 0;
-       pmu_wait_complete(&req);
-
-       time = (u32)((req.reply[0] << 24) | (req.reply[1] << 16) |
-                    (req.reply[2] << 8) | req.reply[3]);
-
-       return time - RTC_OFFSET;
-}
-
-static void pmu_write_time(time64_t time)
-{
-       struct adb_request req;
-       u32 data = lower_32_bits(time + RTC_OFFSET);
-
-       if (pmu_request(&req, NULL, 5, PMU_SET_RTC,
-                       (data >> 24) & 0xFF, (data >> 16) & 0xFF,
-                       (data >> 8) & 0xFF, data & 0xFF) < 0)
-               return;
-       pmu_wait_complete(&req);
-}
-
  static __u8 pmu_read_pram(int offset)
  {
         struct adb_request req;
@@ -295,13 +239,17 @@ static time64_t via_read_time(void)
   * is basically any machine with Mac II-style ADB.
   */
  
-static void via_write_time(time64_t time)
+static void via_set_rtc_time(struct rtc_time *tm)
  {
         union {
                 __u8 cdata[4];
                 __u32 idata;
         } data;
         __u8 temp;
+       time64_t time;
+
+       time = mktime64(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+                       tm->tm_hour, tm->tm_min, tm->tm_sec);
  
         /* Clear the write protect bit */
  
@@ -641,12 +589,12 @@ int mac_hwclk(int op, struct rtc_time *t)
  #ifdef CONFIG_ADB_CUDA
                 case MAC_ADB_EGRET:
                 case MAC_ADB_CUDA:
-                       now = cuda_read_time();
+                       now = cuda_get_time();
                         break;
  #endif
  #ifdef CONFIG_ADB_PMU
                 case MAC_ADB_PB2:
-                       now = pmu_read_time();
+                       now = pmu_get_time();
                         break;
  #endif
                 default:
@@ -665,24 +613,21 @@ int mac_hwclk(int op, struct rtc_time *t)
                          __func__, t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
                          t->tm_hour, t->tm_min, t->tm_sec);
  
-               now = mktime64(t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
-                              t->tm_hour, t->tm_min, t->tm_sec);
-
                 switch (macintosh_config->adb_type) {
                 case MAC_ADB_IOP:
                 case MAC_ADB_II:
                 case MAC_ADB_PB1:
-                       via_write_time(now);
+                       via_set_rtc_time(t);
                         break;
  #ifdef CONFIG_ADB_CUDA
                 case MAC_ADB_EGRET:
                 case MAC_ADB_CUDA:
-                       cuda_write_time(now);
+                       cuda_set_rtc_time(t);
                         break;
  #endif
  #ifdef CONFIG_ADB_PMU
                 case MAC_ADB_PB2:
-                       pmu_write_time(now);
+                       pmu_set_rtc_time(t);
                         break;
  #endif
                 default:
diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile

index 4f3ab57..0823d29 100644 (file)
--- a/arch/microblaze/Makefile
+++ b/arch/microblaze/Makefile
@@ -65,9 +65,7 @@ boot := arch/microblaze/boot
  # Are we making a simpleImage.<boardname> target? If so, crack out the boardname
  DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
  
-ifneq ($(DTB),)
-       core-y  += $(boot)/dts/
-endif
+core-y += $(boot)/dts/
  
  # defines filename extension depending memory management type
  ifeq ($(CONFIG_MMU),)
diff --git a/arch/microblaze/boot/dts/Makefile b/arch/microblaze/boot/dts/Makefile

index 1f77913..c7324e7 100644 (file)
--- a/arch/microblaze/boot/dts/Makefile
+++ b/arch/microblaze/boot/dts/Makefile
@@ -1,6 +1,9 @@
  # SPDX-License-Identifier: GPL-2.0
  #
  
+dtb-y := system.dtb
+
+ifneq ($(DTB),)
  obj-y += linked_dtb.o
  
  # Ensure system.dtb exists
@@ -11,6 +14,7 @@ ifneq ($(DTB),system)
  $(obj)/system.dtb: $(obj)/$(DTB).dtb
         $(call if_changed,cp)
  endif
+endif
  
  quiet_cmd_cp = CP      $< $@$2
         cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c

index 96b3f26..ef2f494 100644 (file)
--- a/arch/microblaze/kernel/cpu/cpuinfo.c
+++ b/arch/microblaze/kernel/cpu/cpuinfo.c
@@ -89,9 +89,9 @@ static struct device_node *cpu;
  
  void __init setup_cpuinfo(void)
  {
-       cpu = (struct device_node *) of_find_node_by_type(NULL, "cpu");
+       cpu = of_get_cpu_node(0, NULL);
         if (!cpu)
-               pr_err("You don't have cpu!!!\n");
+               pr_err("You don't have cpu or are missing cpu reg property!!!\n");
  
         pr_info("%s: initialising\n", __func__);
  
@@ -117,6 +117,8 @@ void __init setup_cpuinfo(void)
         if (cpuinfo.mmu_privins)
                 pr_warn("%s: Stream instructions enabled"
                         " - USERSPACE CAN LOCK THIS KERNEL!\n", __func__);
+
+       of_node_put(cpu);
  }
  
  void __init setup_cpuinfo_clk(void)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig

index 77c022e..53e75dd 100644 (file)
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -21,6 +21,7 @@ config MIPS
         select GENERIC_CLOCKEVENTS
         select GENERIC_CMOS_UPDATE
         select GENERIC_CPU_AUTOPROBE
+       select GENERIC_IOMAP
         select GENERIC_IRQ_PROBE
         select GENERIC_IRQ_SHOW
         select GENERIC_LIB_ASHLDI3
@@ -28,7 +29,6 @@ config MIPS
         select GENERIC_LIB_CMPDI2
         select GENERIC_LIB_LSHRDI3
         select GENERIC_LIB_UCMPDI2
-       select GENERIC_PCI_IOMAP
         select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
         select GENERIC_SMP_IDLE_THREAD
         select GENERIC_TIME_VSYSCALL
@@ -78,6 +78,7 @@ config MIPS
         select RTC_LIB if !MACH_LOONGSON64
         select SYSCTL_EXCEPTION_TRACE
         select VIRT_TO_BUS
+       select NO_BOOTMEM
  
  menu "Machine selection"
  
@@ -132,6 +133,7 @@ config MIPS_GENERIC
         select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
         select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
         select USE_OF
+       select UHI_BOOT
         help
           Select this to build a kernel which aims to support multiple boards,
           generally using a flattened device tree passed from the bootloader
@@ -1149,6 +1151,7 @@ config NO_IOPORT_MAP
  
  config GENERIC_CSUM
         bool
+       default y if !CPU_HAS_LOAD_STORE_LR
  
  config GENERIC_ISA_DMA
         bool
@@ -1367,6 +1370,7 @@ config CPU_LOONGSON3
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_HUGEPAGES
+       select CPU_HAS_LOAD_STORE_LR
         select WEAK_ORDERING
         select WEAK_REORDERING_BEYOND_LLSC
         select MIPS_PGD_C0_CONTEXT
@@ -1443,6 +1447,7 @@ config CPU_MIPS32_R1
         bool "MIPS32 Release 1"
         depends on SYS_HAS_CPU_MIPS32_R1
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         help
@@ -1460,6 +1465,7 @@ config CPU_MIPS32_R2
         bool "MIPS32 Release 2"
         depends on SYS_HAS_CPU_MIPS32_R2
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_MSA
@@ -1478,7 +1484,6 @@ config CPU_MIPS32_R6
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_MSA
-       select GENERIC_CSUM
         select HAVE_KVM
         select MIPS_O32_FP64_SUPPORT
         help
@@ -1491,6 +1496,7 @@ config CPU_MIPS64_R1
         bool "MIPS64 Release 1"
         depends on SYS_HAS_CPU_MIPS64_R1
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
@@ -1510,6 +1516,7 @@ config CPU_MIPS64_R2
         bool "MIPS64 Release 2"
         depends on SYS_HAS_CPU_MIPS64_R2
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
@@ -1531,7 +1538,6 @@ config CPU_MIPS64_R6
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_MSA
-       select GENERIC_CSUM
         select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
         select HAVE_KVM
         help
@@ -1544,6 +1550,7 @@ config CPU_R3000
         bool "R3000"
         depends on SYS_HAS_CPU_R3000
         select CPU_HAS_WB
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         help
@@ -1558,12 +1565,14 @@ config CPU_TX39XX
         bool "R39XX"
         depends on SYS_HAS_CPU_TX39XX
         select CPU_SUPPORTS_32BIT_KERNEL
+       select CPU_HAS_LOAD_STORE_LR
  
  config CPU_VR41XX
         bool "R41xx"
         depends on SYS_HAS_CPU_VR41XX
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
+       select CPU_HAS_LOAD_STORE_LR
         help
           The options selects support for the NEC VR4100 series of processors.
           Only choose this option if you have one of these processors as a
@@ -1575,6 +1584,7 @@ config CPU_R4300
         depends on SYS_HAS_CPU_R4300
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
+       select CPU_HAS_LOAD_STORE_LR
         help
           MIPS Technologies R4300-series processors.
  
@@ -1584,6 +1594,7 @@ config CPU_R4X00
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HUGEPAGES
+       select CPU_HAS_LOAD_STORE_LR
         help
           MIPS Technologies R4000-series processors other than 4300, including
           the R4000, R4400, R4600, and 4700.
@@ -1592,6 +1603,7 @@ config CPU_TX49XX
         bool "R49XX"
         depends on SYS_HAS_CPU_TX49XX
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HUGEPAGES
@@ -1602,6 +1614,7 @@ config CPU_R5000
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HUGEPAGES
+       select CPU_HAS_LOAD_STORE_LR
         help
           MIPS Technologies R5000-series processors other than the Nevada.
  
@@ -1611,6 +1624,7 @@ config CPU_R5432
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HUGEPAGES
+       select CPU_HAS_LOAD_STORE_LR
  
  config CPU_R5500
         bool "R5500"
@@ -1618,6 +1632,7 @@ config CPU_R5500
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HUGEPAGES
+       select CPU_HAS_LOAD_STORE_LR
         help
           NEC VR5500 and VR5500A series processors implement 64-bit MIPS IV
           instruction set.
@@ -1628,6 +1643,7 @@ config CPU_NEVADA
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HUGEPAGES
+       select CPU_HAS_LOAD_STORE_LR
         help
           QED / PMC-Sierra RM52xx-series ("Nevada") processors.
  
@@ -1635,6 +1651,7 @@ config CPU_R8000
         bool "R8000"
         depends on SYS_HAS_CPU_R8000
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_64BIT_KERNEL
         help
           MIPS Technologies R8000 processors.  Note these processors are
@@ -1644,6 +1661,7 @@ config CPU_R10000
         bool "R10000"
         depends on SYS_HAS_CPU_R10000
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
@@ -1655,6 +1673,7 @@ config CPU_RM7000
         bool "RM7000"
         depends on SYS_HAS_CPU_RM7000
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
@@ -1663,6 +1682,7 @@ config CPU_RM7000
  config CPU_SB1
         bool "SB1"
         depends on SYS_HAS_CPU_SB1
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
@@ -1673,6 +1693,7 @@ config CPU_CAVIUM_OCTEON
         bool "Cavium Octeon processor"
         depends on SYS_HAS_CPU_CAVIUM_OCTEON
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_64BIT_KERNEL
         select WEAK_ORDERING
         select CPU_SUPPORTS_HIGHMEM
@@ -1702,6 +1723,7 @@ config CPU_BMIPS
         select WEAK_ORDERING
         select CPU_SUPPORTS_HIGHMEM
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_CPUFREQ
         select MIPS_EXTERNAL_TIMER
         help
@@ -1710,6 +1732,7 @@ config CPU_BMIPS
  config CPU_XLR
         bool "Netlogic XLR SoC"
         depends on SYS_HAS_CPU_XLR
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_64BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
@@ -1728,6 +1751,7 @@ config CPU_XLP
         select WEAK_ORDERING
         select WEAK_REORDERING_BEYOND_LLSC
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_MIPSR2
         select CPU_SUPPORTS_HUGEPAGES
         select MIPS_ASID_BITS_VARIABLE
@@ -1833,12 +1857,14 @@ config CPU_LOONGSON2
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_HUGEPAGES
         select ARCH_HAS_PHYS_TO_DMA
+       select CPU_HAS_LOAD_STORE_LR
  
  config CPU_LOONGSON1
         bool
         select CPU_MIPS32
         select CPU_MIPSR1
         select CPU_HAS_PREFETCH
+       select CPU_HAS_LOAD_STORE_LR
         select CPU_SUPPORTS_32BIT_KERNEL
         select CPU_SUPPORTS_HIGHMEM
         select CPU_SUPPORTS_CPUFREQ
@@ -2452,6 +2478,13 @@ config XKS01
  config CPU_HAS_RIXI
         bool
  
+config CPU_HAS_LOAD_STORE_LR
+       bool
+       help
+         CPU has support for unaligned load and store instructions:
+         LWL, LWR, SWL, SWR (Load/store word left/right).
+         LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
+
  #
  # Vectored interrupt mode is an R2 feature
  #
@@ -2899,6 +2932,9 @@ config USE_OF
         select OF_EARLY_FLATTREE
         select IRQ_DOMAIN
  
+config UHI_BOOT
+       bool
+
  config BUILTIN_DTB
         bool
  
diff --git a/arch/mips/Makefile b/arch/mips/Makefile

index d74b374..15a84cf 100644 (file)
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -13,6 +13,7 @@
  #
  
  archscripts: scripts_basic
+       $(Q)$(MAKE) $(build)=arch/mips/tools elf-entry
         $(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs
  
  KBUILD_DEFCONFIG := 32r2el_defconfig
@@ -230,6 +231,8 @@ toolchain-xpa                               := $(call cc-option-yn,$(xpa-cflags-y) -mxpa)
  cflags-$(toolchain-xpa)                        += -DTOOLCHAIN_SUPPORTS_XPA
  toolchain-crc                          := $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc)
  cflags-$(toolchain-crc)                        += -DTOOLCHAIN_SUPPORTS_CRC
+toolchain-dsp                          := $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mdsp)
+cflags-$(toolchain-dsp)                        += -DTOOLCHAIN_SUPPORTS_DSP
  
  #
  # Firmware support
@@ -257,13 +260,7 @@ ifdef CONFIG_PHYSICAL_START
  load-y                                 = $(CONFIG_PHYSICAL_START)
  endif
  
-# Sign-extend the entry point to 64 bits if retrieved as a 32-bit number.
-entry-y                = $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \
-                       | sed -n '/^start address / { \
-                               s/^.* //; \
-                               s/0x\([0-7].......\)$$/0x00000000\1/; \
-                               s/0x\(........\)$$/0xffffffff\1/; p }')
-
+entry-y                                = $(shell $(objtree)/arch/mips/tools/elf-entry vmlinux)
  cflags-y                       += -I$(srctree)/arch/mips/include/asm/mach-generic
  drivers-$(CONFIG_PCI)          += arch/mips/pci/
  
@@ -407,18 +404,7 @@ endif
  CLEAN_FILES += vmlinux.32 vmlinux.64
  
  # device-trees
-core-$(CONFIG_BUILTIN_DTB) += arch/mips/boot/dts/
-
-%.dtb %.dtb.S %.dtb.o: | scripts
-       $(Q)$(MAKE) $(build)=arch/mips/boot/dts arch/mips/boot/dts/$@
-
-PHONY += dtbs
-dtbs: scripts
-       $(Q)$(MAKE) $(build)=arch/mips/boot/dts
-
-PHONY += dtbs_install
-dtbs_install:
-       $(Q)$(MAKE) $(dtbinst)=arch/mips/boot/dts
+core-y += arch/mips/boot/dts/
  
  archprepare:
  ifdef CONFIG_MIPS32_N32
@@ -461,8 +447,6 @@ define archhelp
         echo '  uImage.lzma          - U-Boot image (lzma)'
         echo '  uImage.lzo           - U-Boot image (lzo)'
         echo '  uzImage.bin          - U-Boot image (self-extracting)'
-       echo '  dtbs                 - Device-tree blobs for enabled boards'
-       echo '  dtbs_install         - Install dtbs to $(INSTALL_DTBS_PATH)'
         echo
         echo '  These will be default as appropriate for a configured platform.'
         echo
diff --git a/arch/mips/bcm47xx/workarounds.c b/arch/mips/bcm47xx/workarounds.c

index 1a8a07e..46eddbe 100644 (file)
--- a/arch/mips/bcm47xx/workarounds.c
+++ b/arch/mips/bcm47xx/workarounds.c
@@ -5,9 +5,8 @@
  #include <bcm47xx_board.h>
  #include <bcm47xx.h>
  
-static void __init bcm47xx_workarounds_netgear_wnr3500l(void)
+static void __init bcm47xx_workarounds_enable_usb_power(int usb_power)
  {
-       const int usb_power = 12;
         int err;
  
         err = gpio_request_one(usb_power, GPIOF_OUT_INIT_HIGH, "usb_power");
@@ -23,7 +22,10 @@ void __init bcm47xx_workarounds(void)
  
         switch (board) {
         case BCM47XX_BOARD_NETGEAR_WNR3500L:
-               bcm47xx_workarounds_netgear_wnr3500l();
+               bcm47xx_workarounds_enable_usb_power(12);
+               break;
+       case BCM47XX_BOARD_NETGEAR_WNDR3400_V3:
+               bcm47xx_workarounds_enable_usb_power(21);
                 break;
         default:
                 /* No workaround(s) needed */
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c

index 231fc5c..6329c5f 100644 (file)
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -153,8 +153,6 @@ void __init plat_time_init(void)
         mips_hpt_frequency = freq;
  }
  
-extern const char __appended_dtb;
-
  void __init plat_mem_setup(void)
  {
         void *dtb;
@@ -164,15 +162,10 @@ void __init plat_mem_setup(void)
         ioport_resource.start = 0;
         ioport_resource.end = ~0;
  
-#ifdef CONFIG_MIPS_ELF_APPENDED_DTB
-       if (!fdt_check_header(&__appended_dtb))
-               dtb = (void *)&__appended_dtb;
-       else
-#endif
         /* intended to somewhat resemble ARM; see Documentation/arm/Booting */
         if (fw_arg0 == 0 && fw_arg1 == 0xffffffff)
                 dtb = phys_to_virt(fw_arg2);
-       else if (fw_passed_dtb) /* UHI interface */
+       else if (fw_passed_dtb) /* UHI interface or appended dtb */
                 dtb = (void *)fw_passed_dtb;
         else if (__dtb_start != __dtb_end)
                 dtb = (void *)__dtb_start;
diff --git a/arch/mips/boot/dts/lantiq/danube.dtsi b/arch/mips/boot/dts/lantiq/danube.dtsi

index 2dd9501..510be63 100644 (file)
--- a/arch/mips/boot/dts/lantiq/danube.dtsi
+++ b/arch/mips/boot/dts/lantiq/danube.dtsi
@@ -10,12 +10,12 @@
                 };
         };
  
-       biu@1F800000 {
+       biu@1f800000 {
                 #address-cells = <1>;
                 #size-cells = <1>;
                 compatible = "lantiq,biu", "simple-bus";
-               reg = <0x1F800000 0x800000>;
-               ranges = <0x0 0x1F800000 0x7FFFFF>;
+               reg = <0x1f800000 0x800000>;
+               ranges = <0x0 0x1f800000 0x7fffff>;
  
                 icu0: icu@80200 {
                         #interrupt-cells = <1>;
@@ -24,18 +24,18 @@
                         reg = <0x80200 0x120>;
                 };
  
-               watchdog@803F0 {
+               watchdog@803f0 {
                         compatible = "lantiq,wdt";
-                       reg = <0x803F0 0x10>;
+                       reg = <0x803f0 0x10>;
                 };
         };
  
-       sram@1F000000 {
+       sram@1f000000 {
                 #address-cells = <1>;
                 #size-cells = <1>;
                 compatible = "lantiq,sram";
-               reg = <0x1F000000 0x800000>;
-               ranges = <0x0 0x1F000000 0x7FFFFF>;
+               reg = <0x1f000000 0x800000>;
+               ranges = <0x0 0x1f000000 0x7fffff>;
  
                 eiu0: eiu@101000 {
                         #interrupt-cells = <1>;
@@ -66,41 +66,41 @@
                 #address-cells = <1>;
                 #size-cells = <1>;
                 compatible = "lantiq,fpi", "simple-bus";
-               ranges = <0x0 0x10000000 0xEEFFFFF>;
-               reg = <0x10000000 0xEF00000>;
+               ranges = <0x0 0x10000000 0xeefffff>;
+               reg = <0x10000000 0xef00000>;
  
-               gptu@E100A00 {
+               gptu@e100a00 {
                         compatible = "lantiq,gptu-xway";
-                       reg = <0xE100A00 0x100>;
+                       reg = <0xe100a00 0x100>;
                 };
  
-               serial@E100C00 {
+               serial@e100c00 {
                         compatible = "lantiq,asc";
-                       reg = <0xE100C00 0x400>;
+                       reg = <0xe100c00 0x400>;
                         interrupt-parent = <&icu0>;
                         interrupts = <112 113 114>;
                 };
  
-               dma0: dma@E104100 {
+               dma0: dma@e104100 {
                         compatible = "lantiq,dma-xway";
-                       reg = <0xE104100 0x800>;
+                       reg = <0xe104100 0x800>;
                 };
  
-               ebu0: ebu@E105300 {
+               ebu0: ebu@e105300 {
                         compatible = "lantiq,ebu-xway";
-                       reg = <0xE105300 0x100>;
+                       reg = <0xe105300 0x100>;
                 };
  
-               pci0: pci@E105400 {
+               pci0: pci@e105400 {
                         #address-cells = <3>;
                         #size-cells = <2>;
                         #interrupt-cells = <1>;
                         compatible = "lantiq,pci-xway";
                         bus-range = <0x0 0x0>;
                         ranges = <0x2000000 0 0x8000000 0x8000000 0 0x2000000   /* pci memory */
-                                 0x1000000 0 0x00000000 0xAE00000 0 0x200000>; /* io space */
+                                 0x1000000 0 0x00000000 0xae00000 0 0x200000>; /* io space */
                         reg = <0x7000000 0x8000         /* config space */
-                               0xE105400 0x400>;       /* pci bridge */
+                               0xe105400 0x400>;       /* pci bridge */
                 };
         };
  };
diff --git a/arch/mips/boot/dts/lantiq/easy50712.dts b/arch/mips/boot/dts/lantiq/easy50712.dts

index c37a339..1ce20b7 100644 (file)
--- a/arch/mips/boot/dts/lantiq/easy50712.dts
+++ b/arch/mips/boot/dts/lantiq/easy50712.dts
@@ -52,14 +52,14 @@
                         };
                 };
  
-               gpio: pinmux@E100B10 {
+               gpio: pinmux@e100b10 {
                         compatible = "lantiq,danube-pinctrl";
                         pinctrl-names = "default";
                         pinctrl-0 = <&state_default>;
  
                         #gpio-cells = <2>;
                         gpio-controller;
-                       reg = <0xE100B10 0xA0>;
+                       reg = <0xe100b10 0xa0>;
  
                         state_default: pinmux {
                                 stp {
@@ -82,26 +82,26 @@
                         };
                 };
  
-               etop@E180000 {
+               etop@e180000 {
                         compatible = "lantiq,etop-xway";
-                       reg = <0xE180000 0x40000>;
+                       reg = <0xe180000 0x40000>;
                         interrupt-parent = <&icu0>;
                         interrupts = <73 78>;
                         phy-mode = "rmii";
                         mac-address = [ 00 11 22 33 44 55 ];
                 };
  
-               stp0: stp@E100BB0 {
+               stp0: stp@e100bb0 {
                         #gpio-cells = <2>;
                         compatible = "lantiq,gpio-stp-xway";
                         gpio-controller;
-                       reg = <0xE100BB0 0x40>;
+                       reg = <0xe100bb0 0x40>;
  
                         lantiq,shadow = <0xfff>;
                         lantiq,groups = <0x3>;
                 };
  
-               pci@E105400 {
+               pci@e105400 {
                         lantiq,bus-clock = <33333333>;
                         interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
                         interrupt-map = <
diff --git a/arch/mips/boot/dts/mscc/Makefile b/arch/mips/boot/dts/mscc/Makefile

index 9a9bb7e..ec6f5b2 100644 (file)
--- a/arch/mips/boot/dts/mscc/Makefile
+++ b/arch/mips/boot/dts/mscc/Makefile
@@ -1,3 +1,3 @@
-dtb-$(CONFIG_MSCC_OCELOT)      += ocelot_pcb123.dtb
+dtb-$(CONFIG_MSCC_OCELOT)      += ocelot_pcb123.dtb ocelot_pcb120.dtb
  
  obj-$(CONFIG_BUILTIN_DTB)      += $(addsuffix .o, $(dtb-y))
diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi

index 8ce317c..90c60d4 100644 (file)
--- a/arch/mips/boot/dts/mscc/ocelot.dtsi
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -78,6 +78,19 @@
                         status = "disabled";
                 };
  
+               i2c: i2c@100400 {
+                       compatible = "mscc,ocelot-i2c", "snps,designware-i2c";
+                       pinctrl-0 = <&i2c_pins>;
+                       pinctrl-names = "default";
+                       reg = <0x100400 0x100>, <0x198 0x8>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       interrupts = <8>;
+                       clocks = <&ahb_clk>;
+
+                       status = "disabled";
+               };
+
                 uart2: serial@100800 {
                         pinctrl-0 = <&uart2_pins>;
                         pinctrl-names = "default";
@@ -182,6 +195,11 @@
                         interrupts = <13>;
                         #interrupt-cells = <2>;
  
+                       i2c_pins: i2c-pins {
+                               pins = "GPIO_16", "GPIO_17";
+                               function = "twi";
+                       };
+
                         uart_pins: uart-pins {
                                 pins = "GPIO_6", "GPIO_7";
                                 function = "uart";
@@ -196,6 +214,7 @@
                                 pins = "GPIO_14", "GPIO_15";
                                 function = "miim1";
                         };
+
                 };
  
                 mdio0: mdio@107009c {
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts

new file mode 100644 (file)

index 0000000..33991fd
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/phy/phy-ocelot-serdes.h>
+#include "ocelot.dtsi"
+
+/ {
+       compatible = "mscc,ocelot-pcb120", "mscc,ocelot";
+
+       chosen {
+               stdout-path = "serial0:115200n8";
+       };
+
+       memory@0 {
+               device_type = "memory";
+               reg = <0x0 0x0e000000>;
+       };
+};
+
+&gpio {
+       phy_int_pins: phy_int_pins {
+               pins = "GPIO_4";
+               function = "gpio";
+       };
+};
+
+&mdio0 {
+       status = "okay";
+};
+
+&mdio1 {
+       status = "okay";
+       pinctrl-names = "default";
+       pinctrl-0 = <&miim1>, <&phy_int_pins>;
+
+       phy7: ethernet-phy@0 {
+               reg = <0>;
+               interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-parent = <&gpio>;
+       };
+       phy6: ethernet-phy@1 {
+               reg = <1>;
+               interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-parent = <&gpio>;
+       };
+       phy5: ethernet-phy@2 {
+               reg = <2>;
+               interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-parent = <&gpio>;
+       };
+       phy4: ethernet-phy@3 {
+               reg = <3>;
+               interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-parent = <&gpio>;
+       };
+};
+
+&port0 {
+       phy-handle = <&phy0>;
+};
+
+&port1 {
+       phy-handle = <&phy1>;
+};
+
+&port2 {
+       phy-handle = <&phy2>;
+};
+
+&port3 {
+       phy-handle = <&phy3>;
+};
+
+&port4 {
+       phy-handle = <&phy7>;
+       phy-mode = "sgmii";
+       phys = <&serdes 4 SERDES1G(2)>;
+};
+
+&port5 {
+       phy-handle = <&phy4>;
+       phy-mode = "sgmii";
+       phys = <&serdes 5 SERDES1G(5)>;
+};
+
+&port6 {
+       phy-handle = <&phy6>;
+       phy-mode = "sgmii";
+       phys = <&serdes 6 SERDES1G(3)>;
+};
+
+&port9 {
+       phy-handle = <&phy5>;
+       phy-mode = "sgmii";
+       phys = <&serdes 9 SERDES1G(4)>;
+};
+
+&uart0 {
+       status = "okay";
+};
+
+&uart2 {
+       status = "okay";
+};
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts

index 2266027..ef852f3 100644 (file)
--- a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
@@ -36,6 +36,12 @@
         };
  };
  
+&i2c {
+       clock-frequency = <100000>;
+       i2c-sda-hold-time-ns = <300>;
+       status = "okay";
+};
+
  &mdio0 {
         status = "okay";
  };
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c

index 8272d8c..cc1d852 100644 (file)
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1180,8 +1180,8 @@ static int octeon_irq_gpio_xlat(struct irq_domain *d,
                 type = IRQ_TYPE_LEVEL_LOW;
                 break;
         default:
-               pr_err("Error: (%s) Invalid irq trigger specification: %x\n",
-                      node->name,
+               pr_err("Error: (%pOFn) Invalid irq trigger specification: %x\n",
+                      node,
                        trigger);
                 type = IRQ_TYPE_LEVEL_LOW;
                 break;
@@ -2271,8 +2271,8 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
  
         parent_irq = irq_of_parse_and_map(ciu_node, 0);
         if (!parent_irq) {
-               pr_err("ERROR: Couldn't acquire parent_irq for %s\n",
-                       ciu_node->name);
+               pr_err("ERROR: Couldn't acquire parent_irq for %pOFn\n",
+                       ciu_node);
                 return -EINVAL;
         }
  
@@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
  
         addr = of_get_address(ciu_node, 0, NULL, NULL);
         if (!addr) {
-               pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name);
+               pr_err("ERROR: Couldn't acquire reg(0) %pOFn\n", ciu_node);
                 return -EINVAL;
         }
         host_data->raw_reg = (u64)phys_to_virt(
@@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
  
         addr = of_get_address(ciu_node, 1, NULL, NULL);
         if (!addr) {
-               pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name);
+               pr_err("ERROR: Couldn't acquire reg(1) %pOFn\n", ciu_node);
                 return -EINVAL;
         }
         host_data->en_reg = (u64)phys_to_virt(
@@ -2299,8 +2299,8 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
  
         r = of_property_read_u32(ciu_node, "cavium,max-bits", &val);
         if (r) {
-               pr_err("ERROR: Couldn't read cavium,max-bits from %s\n",
-                       ciu_node->name);
+               pr_err("ERROR: Couldn't read cavium,max-bits from %pOFn\n",
+                       ciu_node);
                 return r;
         }
         host_data->max_bits = val;
diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c

index c242623..dfb95cf 100644 (file)
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -1161,15 +1161,12 @@ void __init device_tree_init(void)
         bool do_prune;
         bool fill_mac;
  
-#ifdef CONFIG_MIPS_ELF_APPENDED_DTB
-       if (!fdt_check_header(&__appended_dtb)) {
-               fdt = &__appended_dtb;
+       if (fw_passed_dtb) {
+               fdt = (void *)fw_passed_dtb;
                 do_prune = false;
                 fill_mac = true;
                 pr_info("Using appended Device Tree.\n");
-       } else
-#endif
-       if (octeon_bootinfo->minor_version >= 3 && octeon_bootinfo->fdt_addr) {
+       } else if (octeon_bootinfo->minor_version >= 3 && octeon_bootinfo->fdt_addr) {
                 fdt = phys_to_virt(octeon_bootinfo->fdt_addr);
                 if (fdt_check_header(fdt))
                         panic("Corrupt Device Tree passed to kernel.");
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c

index 75e7c86..39f2a2e 100644 (file)
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -15,6 +15,7 @@
  #include <linux/sched/task_stack.h>
  #include <linux/init.h>
  #include <linux/export.h>
+#include <linux/kexec.h>
  
  #include <asm/mmu_context.h>
  #include <asm/time.h>
@@ -424,6 +425,9 @@ const struct plat_smp_ops octeon_smp_ops = {
         .cpu_disable            = octeon_cpu_disable,
         .cpu_die                = octeon_cpu_die,
  #endif
+#ifdef CONFIG_KEXEC
+       .kexec_nonboot_cpu      = kexec_nonboot_cpu_jump,
+#endif
  };
  
  static irqreturn_t octeon_78xx_reched_interrupt(int irq, void *dev_id)
@@ -501,6 +505,9 @@ static const struct plat_smp_ops octeon_78xx_smp_ops = {
         .cpu_disable            = octeon_cpu_disable,
         .cpu_die                = octeon_cpu_die,
  #endif
+#ifdef CONFIG_KEXEC
+       .kexec_nonboot_cpu      = kexec_nonboot_cpu_jump,
+#endif
  };
  
  void __init octeon_setup_smp(void)
diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config

index aa81576..f607888 100644 (file)
--- a/arch/mips/configs/generic/board-ocelot.config
+++ b/arch/mips/configs/generic/board-ocelot.config
@@ -18,17 +18,25 @@ CONFIG_SERIAL_8250=y
  CONFIG_SERIAL_8250_CONSOLE=y
  CONFIG_SERIAL_OF_PLATFORM=y
  
-CONFIG_GPIO_SYSFS=y
+CONFIG_NETDEVICES=y
+CONFIG_MSCC_OCELOT_SWITCH=y
+CONFIG_MSCC_OCELOT_SWITCH_OCELOT=y
+CONFIG_MDIO_MSCC_MIIM=y
+CONFIG_MICROSEMI_PHY=y
  
  CONFIG_I2C=y
  CONFIG_I2C_CHARDEV=y
  CONFIG_I2C_MUX=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
  
  CONFIG_SPI=y
  CONFIG_SPI_BITBANG=y
  CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
  CONFIG_SPI_SPIDEV=y
  
+CONFIG_GPIO_SYSFS=y
+
  CONFIG_POWER_RESET=y
  CONFIG_POWER_RESET_OCELOT_RESET=y
  
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig

index 08e33c6..fd60198 100644 (file)
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -65,11 +65,11 @@ config FIT_IMAGE_FDT_XILFPGA
           Enable this to include the FDT for the MIPSfpga platform
           from Imagination Technologies in the FIT kernel image.
  
-config FIT_IMAGE_FDT_OCELOT_PCB123
-       bool "Include FDT for Microsemi Ocelot PCB123"
+config FIT_IMAGE_FDT_OCELOT
+       bool "Include FDT for Microsemi Ocelot development platforms"
         select MSCC_OCELOT
         help
-         Enable this to include the FDT for the Ocelot PCB123 platform
+         Enable this to include the FDT for the Ocelot development platforms
           from Microsemi in the FIT kernel image.
           This requires u-boot on the platform.
  
diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile

index d03a36f..181aa13 100644 (file)
--- a/arch/mips/generic/Makefile
+++ b/arch/mips/generic/Makefile
@@ -15,5 +15,4 @@ obj-y += proc.o
  obj-$(CONFIG_YAMON_DT_SHIM)            += yamon-dt.o
  obj-$(CONFIG_LEGACY_BOARD_SEAD3)       += board-sead3.o
  obj-$(CONFIG_LEGACY_BOARD_OCELOT)      += board-ocelot.o
-obj-$(CONFIG_KEXEC)                    += kexec.o
  obj-$(CONFIG_VIRT_BOARD_RANCHU)                += board-ranchu.o
diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform

index 879cb80..eaa19d1 100644 (file)
--- a/arch/mips/generic/Platform
+++ b/arch/mips/generic/Platform
@@ -16,5 +16,5 @@ all-$(CONFIG_MIPS_GENERIC)    := vmlinux.gz.itb
  its-y                                  := vmlinux.its.S
  its-$(CONFIG_FIT_IMAGE_FDT_BOSTON)     += board-boston.its.S
  its-$(CONFIG_FIT_IMAGE_FDT_NI169445)   += board-ni169445.its.S
-its-$(CONFIG_FIT_IMAGE_FDT_OCELOT_PCB123) += board-ocelot_pcb123.its.S
+its-$(CONFIG_FIT_IMAGE_FDT_OCELOT)     += board-ocelot.its.S
  its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA)    += board-xilfpga.its.S
diff --git a/arch/mips/generic/board-ocelot.its.S b/arch/mips/generic/board-ocelot.its.S

new file mode 100644 (file)

index 0000000..3da2398
--- /dev/null
+++ b/arch/mips/generic/board-ocelot.its.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/ {
+       images {
+               fdt@ocelot_pcb123 {
+                       description = "MSCC Ocelot PCB123 Device Tree";
+                       data = /incbin/("boot/dts/mscc/ocelot_pcb123.dtb");
+                       type = "flat_dt";
+                       arch = "mips";
+                       compression = "none";
+                       hash@0 {
+                               algo = "sha1";
+                       };
+               };
+
+               fdt@ocelot_pcb120 {
+                       description = "MSCC Ocelot PCB120 Device Tree";
+                       data = /incbin/("boot/dts/mscc/ocelot_pcb120.dtb");
+                       type = "flat_dt";
+                       arch = "mips";
+                       compression = "none";
+                       hash@0 {
+                               algo = "sha1";
+                       };
+               };
+       };
+
+       configurations {
+               conf@ocelot_pcb123 {
+                       description = "Ocelot Linux kernel";
+                       kernel = "kernel@0";
+                       fdt = "fdt@ocelot_pcb123";
+               };
+
+               conf@ocelot_pcb120 {
+                       description = "Ocelot Linux kernel";
+                       kernel = "kernel@0";
+                       fdt = "fdt@ocelot_pcb120";
+               };
+       };
+};
diff --git a/arch/mips/generic/board-ocelot_pcb123.its.S b/arch/mips/generic/board-ocelot_pcb123.its.S

deleted file mode 100644 (file)

index 5a7d5e1..0000000
--- a/arch/mips/generic/board-ocelot_pcb123.its.S
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/ {
-       images {
-               fdt@ocelot_pcb123 {
-                       description = "MSCC Ocelot PCB123 Device Tree";
-                       data = /incbin/("boot/dts/mscc/ocelot_pcb123.dtb");
-                       type = "flat_dt";
-                       arch = "mips";
-                       compression = "none";
-                       hash@0 {
-                               algo = "sha1";
-                       };
-               };
-       };
-
-       configurations {
-               conf@ocelot_pcb123 {
-                       description = "Ocelot Linux kernel";
-                       kernel = "kernel@0";
-                       fdt = "fdt@ocelot_pcb123";
-               };
-       };
-};
diff --git a/arch/mips/generic/kexec.c b/arch/mips/generic/kexec.c

deleted file mode 100644 (file)

index 1ca409f..0000000
--- a/arch/mips/generic/kexec.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2016 Imagination Technologies
- * Author: Marcin Nowakowski <marcin.nowakowski@mips.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/kexec.h>
-#include <linux/libfdt.h>
-#include <linux/uaccess.h>
-
-static int generic_kexec_prepare(struct kimage *image)
-{
-       int i;
-
-       for (i = 0; i < image->nr_segments; i++) {
-               struct fdt_header fdt;
-
-               if (image->segment[i].memsz <= sizeof(fdt))
-                       continue;
-
-               if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt)))
-                       continue;
-
-               if (fdt_check_header(&fdt))
-                       continue;
-
-               kexec_args[0] = -2;
-               kexec_args[1] = (unsigned long)
-                       phys_to_virt((unsigned long)image->segment[i].mem);
-               break;
-       }
-       return 0;
-}
-
-static int __init register_generic_kexec(void)
-{
-       _machine_kexec_prepare = generic_kexec_prepare;
-       return 0;
-}
-arch_initcall(register_generic_kexec);
diff --git a/arch/mips/include/asm/asm-eva.h b/arch/mips/include/asm/asm-eva.h

index 1e38f0e..d80be38 100644 (file)
--- a/arch/mips/include/asm/asm-eva.h
+++ b/arch/mips/include/asm/asm-eva.h
@@ -15,6 +15,7 @@
  /* Kernel variants */
  
  #define kernel_cache(op, base)         "cache " op ", " base "\n"
+#define kernel_pref(hint, base)                "pref " hint ", " base "\n"
  #define kernel_ll(reg, addr)           "ll " reg ", " addr "\n"
  #define kernel_sc(reg, addr)           "sc " reg ", " addr "\n"
  #define kernel_lw(reg, addr)           "lw " reg ", " addr "\n"
@@ -51,6 +52,7 @@
                                 "       .set    pop\n"
  
  #define user_cache(op, base)           __BUILD_EVA_INSN("cachee", op, base)
+#define user_pref(hint, base)          __BUILD_EVA_INSN("prefe", hint, base)
  #define user_ll(reg, addr)             __BUILD_EVA_INSN("lle", reg, addr)
  #define user_sc(reg, addr)             __BUILD_EVA_INSN("sce", reg, addr)
  #define user_lw(reg, addr)             __BUILD_EVA_INSN("lwe", reg, addr)
@@ -72,6 +74,7 @@
  #else
  
  #define user_cache(op, base)           kernel_cache(op, base)
+#define user_pref(hint, base)          kernel_pref(hint, base)
  #define user_ll(reg, addr)             kernel_ll(reg, addr)
  #define user_sc(reg, addr)             kernel_sc(reg, addr)
  #define user_lw(reg, addr)             kernel_lw(reg, addr)
@@ -99,6 +102,7 @@
  #else /* __ASSEMBLY__ */
  
  #define kernel_cache(op, base)         cache op, base
+#define kernel_pref(hint, base)                pref hint, base
  #define kernel_ll(reg, addr)           ll reg, addr
  #define kernel_sc(reg, addr)           sc reg, addr
  #define kernel_lw(reg, addr)           lw reg, addr
@@ -135,6 +139,7 @@
                                 .set    pop;
  
  #define user_cache(op, base)           __BUILD_EVA_INSN(cachee, op, base)
+#define user_pref(hint, base)          __BUILD_EVA_INSN(prefe, hint, base)
  #define user_ll(reg, addr)             __BUILD_EVA_INSN(lle, reg, addr)
  #define user_sc(reg, addr)             __BUILD_EVA_INSN(sce, reg, addr)
  #define user_lw(reg, addr)             __BUILD_EVA_INSN(lwe, reg, addr)
@@ -155,6 +160,7 @@
  #else
  
  #define user_cache(op, base)           kernel_cache(op, base)
+#define user_pref(hint, base)          kernel_pref(hint, base)
  #define user_ll(reg, addr)             kernel_ll(reg, addr)
  #define user_sc(reg, addr)             kernel_sc(reg, addr)
  #define user_lw(reg, addr)             kernel_lw(reg, addr)
diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h

index 81fae23..c23527b 100644 (file)
--- a/arch/mips/include/asm/asm.h
+++ b/arch/mips/include/asm/asm.h
@@ -20,32 +20,6 @@
  #include <asm/sgidefs.h>
  #include <asm/asm-eva.h>
  
-#ifndef CAT
-#ifdef __STDC__
-#define __CAT(str1, str2) str1##str2
-#else
-#define __CAT(str1, str2) str1/**/str2
-#endif
-#define CAT(str1, str2) __CAT(str1, str2)
-#endif
-
-/*
- * PIC specific declarations
- * Not used for the kernel but here seems to be the right place.
- */
-#ifdef __PIC__
-#define CPRESTORE(register)                            \
-               .cprestore register
-#define CPADD(register)                                        \
-               .cpadd  register
-#define CPLOAD(register)                               \
-               .cpload register
-#else
-#define CPRESTORE(register)
-#define CPADD(register)
-#define CPLOAD(register)
-#endif
-
  /*
   * LEAF - declare leaf routine
   */
@@ -129,96 +103,6 @@ symbol             =       value
  8:             .asciiz msg;                            \
                 .popsection;
  
-/*
- * Build text tables
- */
-#define TTABLE(string)                                 \
-               .pushsection .text;                     \
-               .word   1f;                             \
-               .popsection                             \
-               .pushsection .data;                     \
-1:             .asciiz string;                         \
-               .popsection
-
-/*
- * MIPS IV pref instruction.
- * Use with .set noreorder only!
- *
- * MIPS IV implementations are free to treat this as a nop.  The R5000
- * is one of them.  So we should have an option not to use this instruction.
- */
-#ifdef CONFIG_CPU_HAS_PREFETCH
-
-#define PREF(hint,addr)                                        \
-               .set    push;                           \
-               .set    arch=r5000;                     \
-               pref    hint, addr;                     \
-               .set    pop
-
-#define PREFE(hint, addr)                              \
-               .set    push;                           \
-               .set    mips0;                          \
-               .set    eva;                            \
-               prefe   hint, addr;                     \
-               .set    pop
-
-#define PREFX(hint,addr)                               \
-               .set    push;                           \
-               .set    arch=r5000;                     \
-               prefx   hint, addr;                     \
-               .set    pop
-
-#else /* !CONFIG_CPU_HAS_PREFETCH */
-
-#define PREF(hint, addr)
-#define PREFE(hint, addr)
-#define PREFX(hint, addr)
-
-#endif /* !CONFIG_CPU_HAS_PREFETCH */
-
-/*
- * MIPS ISA IV/V movn/movz instructions and equivalents for older CPUs.
- */
-#if (_MIPS_ISA == _MIPS_ISA_MIPS1)
-#define MOVN(rd, rs, rt)                               \
-               .set    push;                           \
-               .set    reorder;                        \
-               beqz    rt, 9f;                         \
-               move    rd, rs;                         \
-               .set    pop;                            \
-9:
-#define MOVZ(rd, rs, rt)                               \
-               .set    push;                           \
-               .set    reorder;                        \
-               bnez    rt, 9f;                         \
-               move    rd, rs;                         \
-               .set    pop;                            \
-9:
-#endif /* _MIPS_ISA == _MIPS_ISA_MIPS1 */
-#if (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS3)
-#define MOVN(rd, rs, rt)                               \
-               .set    push;                           \
-               .set    noreorder;                      \
-               bnezl   rt, 9f;                         \
-                move   rd, rs;                         \
-               .set    pop;                            \
-9:
-#define MOVZ(rd, rs, rt)                               \
-               .set    push;                           \
-               .set    noreorder;                      \
-               beqzl   rt, 9f;                         \
-                move   rd, rs;                         \
-               .set    pop;                            \
-9:
-#endif /* (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS3) */
-#if (_MIPS_ISA == _MIPS_ISA_MIPS4 ) || (_MIPS_ISA == _MIPS_ISA_MIPS5) || \
-    (_MIPS_ISA == _MIPS_ISA_MIPS32) || (_MIPS_ISA == _MIPS_ISA_MIPS64)
-#define MOVN(rd, rs, rt)                               \
-               movn    rd, rs, rt
-#define MOVZ(rd, rs, rt)                               \
-               movz    rd, rs, rt
-#endif /* MIPS IV, MIPS V, MIPS32 or MIPS64 */
-
  /*
   * Stack alignment
   */
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h

index 54c730a..266257d 100644 (file)
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -20,6 +20,7 @@
  #include <linux/irqflags.h>
  
  #include <asm/addrspace.h>
+#include <asm/barrier.h>
  #include <asm/bug.h>
  #include <asm/byteorder.h>
  #include <asm/cpu.h>
@@ -33,11 +34,6 @@
  #include <ioremap.h>
  #include <mangle-port.h>
  
-/*
- * Slowdown I/O port space accesses for antique hardware.
- */
-#undef CONF_SLOWDOWN_IO
-
  /*
   * Raw operations are never swapped in software.  OTOH values that raw
   * operations are working on may or may not have been swapped by the bus
@@ -50,6 +46,11 @@
  # define __raw_ioswabq(a, x)   (x)
  # define ____raw_ioswabq(a, x) (x)
  
+# define __relaxed_ioswabb ioswabb
+# define __relaxed_ioswabw ioswabw
+# define __relaxed_ioswabl ioswabl
+# define __relaxed_ioswabq ioswabq
+
  /* ioswab[bwlq], __mem_ioswab[bwlq] are defined in mangle-port.h */
  
  #define IO_SPACE_LIMIT 0xffff
@@ -80,31 +81,29 @@ static inline void set_io_port_base(unsigned long base)
  }
  
  /*
- * Thanks to James van Artsdalen for a better timing-fix than
- * the two short jumps: using outb's to a nonexistent port seems
- * to guarantee better timings even on fast machines.
- *
- * On the other hand, I'd like to be sure of a non-existent port:
- * I feel a bit unsafe about using 0x80 (should be safe, though)
- *
- *             Linus
- *
+ * Provide the necessary definitions for generic iomap. We make use of
+ * mips_io_port_base for iomap(), but we don't reserve any low addresses for
+ * use with I/O ports.
   */
  
-#define __SLOW_DOWN_IO \
-       __asm__ __volatile__( \
-               "sb\t$0,0x80(%0)" \
-               : : "r" (mips_io_port_base));
+#define HAVE_ARCH_PIO_SIZE
+#define PIO_OFFSET     mips_io_port_base
+#define PIO_MASK       IO_SPACE_LIMIT
+#define PIO_RESERVED   0x0UL
  
-#ifdef CONF_SLOWDOWN_IO
-#ifdef REALLY_SLOW_IO
-#define SLOW_DOWN_IO { __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; }
-#else
-#define SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
-#else
-#define SLOW_DOWN_IO
-#endif
+/*
+ * Enforce in-order execution of data I/O.  In the MIPS architecture
+ * these are equivalent to corresponding platform-specific memory
+ * barriers defined in <asm/barrier.h>.  API pinched from PowerPC,
+ * with sync additionally defined.
+ */
+#define iobarrier_rw() mb()
+#define iobarrier_r() rmb()
+#define iobarrier_w() wmb()
+#define iobarrier_sync() iob()
+
+/* Some callers use this older API instead.  */
+#define mmiowb() iobarrier_w()
  
  /*
   *     virt_to_phys    -       map virtual addresses to physical
@@ -172,11 +171,6 @@ static inline void *isa_bus_to_virt(unsigned long address)
  extern void __iomem * __ioremap(phys_addr_t offset, phys_addr_t size, unsigned long flags);
  extern void __iounmap(const volatile void __iomem *addr);
  
-#ifndef CONFIG_PCI
-struct pci_dev;
-static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
-#endif
-
  static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long size,
         unsigned long flags)
  {
@@ -316,13 +310,13 @@ static inline void iounmap(const volatile void __iomem *addr)
  #undef __IS_KSEG1
  }
  
-#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_LOONGSON3_ENHANCEMENT)
+#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON3)
  #define war_io_reorder_wmb()           wmb()
  #else
  #define war_io_reorder_wmb()           barrier()
  #endif
  
-#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq)                    \
+#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, barrier, relax, irq)    \
                                                                         \
  static inline void pfx##write##bwlq(type val,                          \
                                     volatile void __iomem *mem)         \
@@ -330,7 +324,10 @@ static inline void pfx##write##bwlq(type val,                              \
         volatile type *__mem;                                           \
         type __val;                                                     \
                                                                         \
-       war_io_reorder_wmb();                                   \
+       if (barrier)                                                    \
+               iobarrier_rw();                                         \
+       else                                                            \
+               war_io_reorder_wmb();                                   \
                                                                         \
         __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem));    \
                                                                         \
@@ -367,6 +364,9 @@ static inline type pfx##read##bwlq(const volatile void __iomem *mem)        \
                                                                         \
         __mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem));    \
                                                                         \
+       if (barrier)                                                    \
+               iobarrier_rw();                                         \
+                                                                       \
         if (sizeof(type) != sizeof(u64) || sizeof(u64) == sizeof(long)) \
                 __val = *__mem;                                         \
         else if (cpu_has_64bits) {                                      \
@@ -390,18 +390,22 @@ static inline type pfx##read##bwlq(const volatile void __iomem *mem)      \
         }                                                               \
                                                                         \
         /* prevent prefetching of coherent DMA data prematurely */      \
-       rmb();                                                          \
+       if (!relax)                                                     \
+               rmb();                                                  \
         return pfx##ioswab##bwlq(__mem, __val);                         \
  }
  
-#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, p, slow)                        \
+#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, barrier, relax, p)      \
                                                                         \
  static inline void pfx##out##bwlq##p(type val, unsigned long port)     \
  {                                                                      \
         volatile type *__addr;                                          \
         type __val;                                                     \
                                                                         \
-       war_io_reorder_wmb();                                   \
+       if (barrier)                                                    \
+               iobarrier_rw();                                         \
+       else                                                            \
+               war_io_reorder_wmb();                                   \
                                                                         \
         __addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \
                                                                         \
@@ -411,7 +415,6 @@ static inline void pfx##out##bwlq##p(type val, unsigned long port)  \
         BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long));             \
                                                                         \
         *__addr = __val;                                                \
-       slow;                                                           \
  }                                                                      \
                                                                         \
  static inline type pfx##in##bwlq##p(unsigned long port)                        \
@@ -423,23 +426,27 @@ static inline type pfx##in##bwlq##p(unsigned long port)                   \
                                                                         \
         BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long));             \
                                                                         \
+       if (barrier)                                                    \
+               iobarrier_rw();                                         \
+                                                                       \
         __val = *__addr;                                                \
-       slow;                                                           \
                                                                         \
         /* prevent prefetching of coherent DMA data prematurely */      \
-       rmb();                                                          \
+       if (!relax)                                                     \
+               rmb();                                                  \
         return pfx##ioswab##bwlq(__addr, __val);                        \
  }
  
-#define __BUILD_MEMORY_PFX(bus, bwlq, type)                            \
+#define __BUILD_MEMORY_PFX(bus, bwlq, type, relax)                     \
                                                                         \
-__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1)
+__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1, relax, 1)
  
  #define BUILDIO_MEM(bwlq, type)                                                \
                                                                         \
-__BUILD_MEMORY_PFX(__raw_, bwlq, type)                                 \
-__BUILD_MEMORY_PFX(, bwlq, type)                                       \
-__BUILD_MEMORY_PFX(__mem_, bwlq, type)                                 \
+__BUILD_MEMORY_PFX(__raw_, bwlq, type, 0)                              \
+__BUILD_MEMORY_PFX(__relaxed_, bwlq, type, 1)                          \
+__BUILD_MEMORY_PFX(__mem_, bwlq, type, 0)                              \
+__BUILD_MEMORY_PFX(, bwlq, type, 0)
  
  BUILDIO_MEM(b, u8)
  BUILDIO_MEM(w, u16)
@@ -447,8 +454,8 @@ BUILDIO_MEM(l, u32)
  BUILDIO_MEM(q, u64)
  
  #define __BUILD_IOPORT_PFX(bus, bwlq, type)                            \
-       __BUILD_IOPORT_SINGLE(bus, bwlq, type, ,)                       \
-       __BUILD_IOPORT_SINGLE(bus, bwlq, type, _p, SLOW_DOWN_IO)
+       __BUILD_IOPORT_SINGLE(bus, bwlq, type, 1, 0,)                   \
+       __BUILD_IOPORT_SINGLE(bus, bwlq, type, 1, 0, _p)
  
  #define BUILDIO_IOPORT(bwlq, type)                                     \
         __BUILD_IOPORT_PFX(, bwlq, type)                                \
@@ -463,19 +470,19 @@ BUILDIO_IOPORT(q, u64)
  
  #define __BUILDIO(bwlq, type)                                          \
                                                                         \
-__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 0)
+__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 1, 0, 0)
  
  __BUILDIO(q, u64)
  
-#define readb_relaxed                  readb
-#define readw_relaxed                  readw
-#define readl_relaxed                  readl
-#define readq_relaxed                  readq
+#define readb_relaxed                  __relaxed_readb
+#define readw_relaxed                  __relaxed_readw
+#define readl_relaxed                  __relaxed_readl
+#define readq_relaxed                  __relaxed_readq
  
-#define writeb_relaxed                 writeb
-#define writew_relaxed                 writew
-#define writel_relaxed                 writel
-#define writeq_relaxed                 writeq
+#define writeb_relaxed                 __relaxed_writeb
+#define writew_relaxed                 __relaxed_writew
+#define writel_relaxed                 __relaxed_writel
+#define writeq_relaxed                 __relaxed_writeq
  
  #define readb_be(addr)                                                 \
         __raw_readb((__force unsigned *)(addr))
@@ -561,14 +568,6 @@ BUILDSTRING(l, u32)
  BUILDSTRING(q, u64)
  #endif
  
-
-#ifdef CONFIG_CPU_CAVIUM_OCTEON
-#define mmiowb() wmb()
-#else
-/* Depends on MIPS II instruction set */
-#define mmiowb() asm volatile ("sync" ::: "memory")
-#endif
-
  static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
  {
         memset((void __force *) addr, val, count);
diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h

index 493a3cc..40795ca 100644 (file)
--- a/arch/mips/include/asm/kexec.h
+++ b/arch/mips/include/asm/kexec.h
@@ -12,11 +12,11 @@
  #include <asm/stacktrace.h>
  
  /* Maximum physical address we can use pages from */
-#define KEXEC_SOURCE_MEMORY_LIMIT (0x20000000)
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
  /* Maximum address we can reach in physical address mode */
-#define KEXEC_DESTINATION_MEMORY_LIMIT (0x20000000)
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
   /* Maximum address we can use for the control code buffer */
-#define KEXEC_CONTROL_MEMORY_LIMIT (0x20000000)
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
  /* Reserve 3*4096 bytes for board-specific info */
  #define KEXEC_CONTROL_PAGE_SIZE (4096 + 3*4096)
  
@@ -39,11 +39,12 @@ extern unsigned long kexec_args[4];
  extern int (*_machine_kexec_prepare)(struct kimage *);
  extern void (*_machine_kexec_shutdown)(void);
  extern void (*_machine_crash_shutdown)(struct pt_regs *regs);
-extern void default_machine_crash_shutdown(struct pt_regs *regs);
+void default_machine_crash_shutdown(struct pt_regs *regs);
+void kexec_nonboot_cpu_jump(void);
+void kexec_reboot(void);
  #ifdef CONFIG_SMP
  extern const unsigned char kexec_smp_wait[];
  extern unsigned long secondary_kexec_args[4];
-extern void (*relocated_kexec_smp_wait) (void *);
  extern atomic_t kexec_ready_to_reboot;
  extern void (*_crash_smp_send_stop)(void);
  #endif
diff --git a/arch/mips/include/asm/mach-loongson64/irq.h b/arch/mips/include/asm/mach-loongson64/irq.h

index 3644b68..be9f727 100644 (file)
--- a/arch/mips/include/asm/mach-loongson64/irq.h
+++ b/arch/mips/include/asm/mach-loongson64/irq.h
@@ -10,7 +10,7 @@
  #define MIPS_CPU_IRQ_BASE 56
  
  #define LOONGSON_UART_IRQ   (MIPS_CPU_IRQ_BASE + 2) /* UART */
-#define LOONGSON_HT1_IRQ    (MIPS_CPU_IRQ_BASE + 3) /* HT1 */
+#define LOONGSON_BRIDGE_IRQ (MIPS_CPU_IRQ_BASE + 3) /* CASCADE */
  #define LOONGSON_TIMER_IRQ  (MIPS_CPU_IRQ_BASE + 7) /* CPU Timer */
  
  #define LOONGSON_HT1_CFG_BASE          loongson_sysconf.ht_control_base
diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h

index 3127391..cbac603 100644 (file)
--- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h
@@ -11,6 +11,8 @@
  #ifndef __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H
  #define __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H
  
+#include <asm/cpu.h>
+
  /*
   * Override macros used in arch/mips/kernel/head.S.
   */
@@ -26,12 +28,15 @@
         mfc0    t0, CP0_PAGEGRAIN
         or      t0, (0x1 << 29)
         mtc0    t0, CP0_PAGEGRAIN
-#ifdef CONFIG_LOONGSON3_ENHANCEMENT
         /* Enable STFill Buffer */
+       mfc0    t0, CP0_PRID
+       andi    t0, (PRID_IMP_MASK | PRID_REV_MASK)
+       slti    t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2)
+       bnez    t0, 1f
         mfc0    t0, CP0_CONFIG6
         or      t0, 0x100
         mtc0    t0, CP0_CONFIG6
-#endif
+1:
         _ehb
         .set    pop
  #endif
@@ -52,12 +57,15 @@
         mfc0    t0, CP0_PAGEGRAIN
         or      t0, (0x1 << 29)
         mtc0    t0, CP0_PAGEGRAIN
-#ifdef CONFIG_LOONGSON3_ENHANCEMENT
         /* Enable STFill Buffer */
+       mfc0    t0, CP0_PRID
+       andi    t0, (PRID_IMP_MASK | PRID_REV_MASK)
+       slti    t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2)
+       bnez    t0, 1f
         mfc0    t0, CP0_CONFIG6
         or      t0, 0x100
         mtc0    t0, CP0_CONFIG6
-#endif
+1:
         _ehb
         .set    pop
  #endif
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h

index 01df9ad..341a02c 100644 (file)
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -2287,13 +2287,14 @@ do {                                                                    \
         _write_32bit_cp1_register(dest, val, )
  #endif
  
-#ifdef HAVE_AS_DSP
+#ifdef TOOLCHAIN_SUPPORTS_DSP
  #define rddsp(mask)                                                    \
  ({                                                                     \
         unsigned int __dspctl;                                          \
                                                                         \
         __asm__ __volatile__(                                           \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       rddsp   %0, %x1                                 \n"     \
         "       .set pop                                        \n"     \
@@ -2306,6 +2307,7 @@ do {                                                                      \
  do {                                                                   \
         __asm__ __volatile__(                                           \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       wrdsp   %0, %x1                                 \n"     \
         "       .set pop                                        \n"     \
@@ -2318,6 +2320,7 @@ do {                                                                      \
         long mflo0;                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mflo %0, $ac0                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2330,6 +2333,7 @@ do {                                                                      \
         long mflo1;                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mflo %0, $ac1                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2342,6 +2346,7 @@ do {                                                                      \
         long mflo2;                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mflo %0, $ac2                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2354,6 +2359,7 @@ do {                                                                      \
         long mflo3;                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mflo %0, $ac3                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2366,6 +2372,7 @@ do {                                                                      \
         long mfhi0;                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mfhi %0, $ac0                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2378,6 +2385,7 @@ do {                                                                      \
         long mfhi1;                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mfhi %0, $ac1                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2390,6 +2398,7 @@ do {                                                                      \
         long mfhi2;                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mfhi %0, $ac2                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2402,6 +2411,7 @@ do {                                                                      \
         long mfhi3;                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mfhi %0, $ac3                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2414,6 +2424,7 @@ do {                                                                      \
  ({                                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mtlo %0, $ac0                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2425,6 +2436,7 @@ do {                                                                      \
  ({                                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mtlo %0, $ac1                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2436,6 +2448,7 @@ do {                                                                      \
  ({                                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mtlo %0, $ac2                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2447,6 +2460,7 @@ do {                                                                      \
  ({                                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mtlo %0, $ac3                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2458,6 +2472,7 @@ do {                                                                      \
  ({                                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mthi %0, $ac0                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2469,6 +2484,7 @@ do {                                                                      \
  ({                                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mthi %0, $ac1                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2480,6 +2496,7 @@ do {                                                                      \
  ({                                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mthi %0, $ac2                                   \n"     \
         "       .set pop                                        \n"     \
@@ -2491,6 +2508,7 @@ do {                                                                      \
  ({                                                                     \
         __asm__(                                                        \
         "       .set push                                       \n"     \
+       "       .set " MIPS_ISA_LEVEL "                         \n"     \
         "       .set dsp                                        \n"     \
         "       mthi %0, $ac3                                   \n"     \
         "       .set pop                                        \n"     \
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h

index 49d6046..c373eb6 100644 (file)
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -81,7 +81,7 @@ extern unsigned int vced_count, vcei_count;
  
  #endif
  
-#define VDSO_RANDOMIZE_SIZE    (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M)
+#define VDSO_RANDOMIZE_SIZE    (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_64M)
  
  extern unsigned long mips_stack_top(void);
  #define STACK_TOP              mips_stack_top()
diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h

index 7f12d7e..d19b2d6 100644 (file)
--- a/arch/mips/include/asm/r4kcache.h
+++ b/arch/mips/include/asm/r4kcache.h
@@ -48,58 +48,14 @@ extern void (*r4k_blast_icache)(void);
         :                                                               \
         : "i" (op), "R" (*(unsigned char *)(addr)))
  
-#ifdef CONFIG_MIPS_MT
-
-#define __iflush_prologue                                              \
-       unsigned long redundance;                                       \
-       extern int mt_n_iflushes;                                       \
-       for (redundance = 0; redundance < mt_n_iflushes; redundance++) {
-
-#define __iflush_epilogue                                              \
-       }
-
-#define __dflush_prologue                                              \
-       unsigned long redundance;                                       \
-       extern int mt_n_dflushes;                                       \
-       for (redundance = 0; redundance < mt_n_dflushes; redundance++) {
-
-#define __dflush_epilogue \
-       }
-
-#define __inv_dflush_prologue __dflush_prologue
-#define __inv_dflush_epilogue __dflush_epilogue
-#define __sflush_prologue {
-#define __sflush_epilogue }
-#define __inv_sflush_prologue __sflush_prologue
-#define __inv_sflush_epilogue __sflush_epilogue
-
-#else /* CONFIG_MIPS_MT */
-
-#define __iflush_prologue {
-#define __iflush_epilogue }
-#define __dflush_prologue {
-#define __dflush_epilogue }
-#define __inv_dflush_prologue {
-#define __inv_dflush_epilogue }
-#define __sflush_prologue {
-#define __sflush_epilogue }
-#define __inv_sflush_prologue {
-#define __inv_sflush_epilogue }
-
-#endif /* CONFIG_MIPS_MT */
-
  static inline void flush_icache_line_indexed(unsigned long addr)
  {
-       __iflush_prologue
         cache_op(Index_Invalidate_I, addr);
-       __iflush_epilogue
  }
  
  static inline void flush_dcache_line_indexed(unsigned long addr)
  {
-       __dflush_prologue
         cache_op(Index_Writeback_Inv_D, addr);
-       __dflush_epilogue
  }
  
  static inline void flush_scache_line_indexed(unsigned long addr)
@@ -109,7 +65,6 @@ static inline void flush_scache_line_indexed(unsigned long addr)
  
  static inline void flush_icache_line(unsigned long addr)
  {
-       __iflush_prologue
         switch (boot_cpu_type()) {
         case CPU_LOONGSON2:
                 cache_op(Hit_Invalidate_I_Loongson2, addr);
@@ -119,21 +74,16 @@ static inline void flush_icache_line(unsigned long addr)
                 cache_op(Hit_Invalidate_I, addr);
                 break;
         }
-       __iflush_epilogue
  }
  
  static inline void flush_dcache_line(unsigned long addr)
  {
-       __dflush_prologue
         cache_op(Hit_Writeback_Inv_D, addr);
-       __dflush_epilogue
  }
  
  static inline void invalidate_dcache_line(unsigned long addr)
  {
-       __dflush_prologue
         cache_op(Hit_Invalidate_D, addr);
-       __dflush_epilogue
  }
  
  static inline void invalidate_scache_line(unsigned long addr)
@@ -586,13 +536,9 @@ static inline void extra##blast_##pfx##cache##lsize(void)          \
                                current_cpu_data.desc.waybit;            \
         unsigned long ws, addr;                                         \
                                                                         \
-       __##pfx##flush_prologue                                         \
-                                                                       \
         for (ws = 0; ws < ws_end; ws += ws_inc)                         \
                 for (addr = start; addr < end; addr += lsize * 32)      \
                         cache##lsize##_unroll32(addr|ws, indexop);      \
-                                                                       \
-       __##pfx##flush_epilogue                                         \
  }                                                                      \
                                                                         \
  static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
@@ -600,14 +546,10 @@ static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
         unsigned long start = page;                                     \
         unsigned long end = page + PAGE_SIZE;                           \
                                                                         \
-       __##pfx##flush_prologue                                         \
-                                                                       \
         do {                                                            \
                 cache##lsize##_unroll32(start, hitop);                  \
                 start += lsize * 32;                                    \
         } while (start < end);                                          \
-                                                                       \
-       __##pfx##flush_epilogue                                         \
  }                                                                      \
                                                                         \
  static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long page) \
@@ -620,13 +562,9 @@ static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long
                                current_cpu_data.desc.waybit;            \
         unsigned long ws, addr;                                         \
                                                                         \
-       __##pfx##flush_prologue                                         \
-                                                                       \
         for (ws = 0; ws < ws_end; ws += ws_inc)                         \
                 for (addr = start; addr < end; addr += lsize * 32)      \
                         cache##lsize##_unroll32(addr|ws, indexop);      \
-                                                                       \
-       __##pfx##flush_epilogue                                         \
  }
  
  __BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16, )
@@ -656,14 +594,10 @@ static inline void blast_##pfx##cache##lsize##_user_page(unsigned long page) \
         unsigned long start = page;                                     \
         unsigned long end = page + PAGE_SIZE;                           \
                                                                         \
-       __##pfx##flush_prologue                                         \
-                                                                       \
         do {                                                            \
                 cache##lsize##_unroll32_user(start, hitop);             \
                 start += lsize * 32;                                    \
         } while (start < end);                                          \
-                                                                       \
-       __##pfx##flush_epilogue                                         \
  }
  
  __BUILD_BLAST_USER_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D,
@@ -685,16 +619,12 @@ static inline void prot##extra##blast_##pfx##cache##_range(unsigned long start,
         unsigned long addr = start & ~(lsize - 1);                      \
         unsigned long aend = (end - 1) & ~(lsize - 1);                  \
                                                                         \
-       __##pfx##flush_prologue                                         \
-                                                                       \
         while (1) {                                                     \
                 prot##cache_op(hitop, addr);                            \
                 if (addr == aend)                                       \
                         break;                                          \
                 addr += lsize;                                          \
         }                                                               \
-                                                                       \
-       __##pfx##flush_epilogue                                         \
  }
  
  #ifndef CONFIG_EVA
@@ -712,8 +642,6 @@ static inline void protected_blast_##pfx##cache##_range(unsigned long start,\
         unsigned long addr = start & ~(lsize - 1);                      \
         unsigned long aend = (end - 1) & ~(lsize - 1);                  \
                                                                         \
-       __##pfx##flush_prologue                                         \
-                                                                       \
         if (!uaccess_kernel()) {                                        \
                 while (1) {                                             \
                         protected_cachee_op(hitop, addr);               \
@@ -730,7 +658,6 @@ static inline void protected_blast_##pfx##cache##_range(unsigned long start,\
                 }                                                       \
                                                                         \
         }                                                               \
-       __##pfx##flush_epilogue                                         \
  }
  
  __BUILD_PROT_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D)
diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h

index 53b2cb8..b7123f9 100644 (file)
--- a/arch/mips/include/asm/smp-ops.h
+++ b/arch/mips/include/asm/smp-ops.h
@@ -33,6 +33,9 @@ struct plat_smp_ops {
         int (*cpu_disable)(void);
         void (*cpu_die)(unsigned int cpu);
  #endif
+#ifdef CONFIG_KEXEC
+       void (*kexec_nonboot_cpu)(void);
+#endif
  };
  
  extern void register_smp_ops(const struct plat_smp_ops *ops);
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h

index 056a6bf..7990c1c 100644 (file)
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -91,6 +91,22 @@ static inline void __cpu_die(unsigned int cpu)
  extern void play_dead(void);
  #endif
  
+#ifdef CONFIG_KEXEC
+static inline void kexec_nonboot_cpu(void)
+{
+       extern const struct plat_smp_ops *mp_ops;       /* private */
+
+       return mp_ops->kexec_nonboot_cpu();
+}
+
+static inline void *kexec_nonboot_cpu_func(void)
+{
+       extern const struct plat_smp_ops *mp_ops;       /* private */
+
+       return mp_ops->kexec_nonboot_cpu;
+}
+#endif
+
  /*
   * This function will set up the necessary IPIs for Linux to communicate
   * with the CPUs in mask.
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile

index f10e1e1..210c280 100644 (file)
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -113,22 +113,4 @@ obj-$(CONFIG_MIPS_CPC)             += mips-cpc.o
  obj-$(CONFIG_CPU_PM)           += pm.o
  obj-$(CONFIG_MIPS_CPS_PM)      += pm-cps.o
  
-#
-# DSP ASE supported for MIPS32 or MIPS64 Release 2 cores only. It is not
-# safe to unconditionnaly use the assembler -mdsp / -mdspr2 switches
-# here because the compiler may use DSP ASE instructions (such as lwx) in
-# code paths where we cannot check that the CPU we are running on supports it.
-# Proper abstraction using HAVE_AS_DSP and macros is done in
-# arch/mips/include/asm/mipsregs.h.
-#
-ifeq ($(CONFIG_CPU_MIPSR2), y)
-CFLAGS_DSP                     = -DHAVE_AS_DSP
-
-CFLAGS_signal.o                        = $(CFLAGS_DSP)
-CFLAGS_signal32.o              = $(CFLAGS_DSP)
-CFLAGS_process.o               = $(CFLAGS_DSP)
-CFLAGS_branch.o                        = $(CFLAGS_DSP)
-CFLAGS_ptrace.o                        = $(CFLAGS_DSP)
-endif
-
  CPPFLAGS_vmlinux.lds           := $(KBUILD_CFLAGS)
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c

index d455363..2c72880 100644 (file)
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -36,6 +36,9 @@ static void crash_shutdown_secondary(void *passed_regs)
         if (!cpu_online(cpu))
                 return;
  
+       /* We won't be sent IPIs any more. */
+       set_cpu_online(cpu, false);
+
         local_irq_disable();
         if (!cpumask_test_cpu(cpu, &cpus_in_crash))
                 crash_save_cpu(regs, cpu);
@@ -43,7 +46,9 @@ static void crash_shutdown_secondary(void *passed_regs)
  
         while (!atomic_read(&kexec_ready_to_reboot))
                 cpu_relax();
-       relocated_kexec_smp_wait(NULL);
+
+       kexec_reboot();
+
         /* NOTREACHED */
  }
  
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S

index d1bb506..351d40f 100644 (file)
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -77,7 +77,7 @@ EXPORT(_stext)
          */
  FEXPORT(__kernel_entry)
         j       kernel_entry
-#endif
+#endif /* CONFIG_BOOT_RAW */
  
         __REF
  
@@ -94,24 +94,26 @@ NESTED(kernel_entry, 16, sp)                        # kernel entry point
  0:
  
  #ifdef CONFIG_USE_OF
-#ifdef CONFIG_MIPS_RAW_APPENDED_DTB
+#if defined(CONFIG_MIPS_RAW_APPENDED_DTB) || \
+       defined(CONFIG_MIPS_ELF_APPENDED_DTB)
+
         PTR_LA          t2, __appended_dtb
  
  #ifdef CONFIG_CPU_BIG_ENDIAN
         li              t1, 0xd00dfeed
-#else
+#else  /* !CONFIG_CPU_BIG_ENDIAN */
         li              t1, 0xedfe0dd0
-#endif
+#endif /* !CONFIG_CPU_BIG_ENDIAN */
         lw              t0, (t2)
         beq             t0, t1, dtb_found
-#endif
+#endif /* CONFIG_MIPS_RAW_APPENDED_DTB || CONFIG_MIPS_ELF_APPENDED_DTB */
         li              t1, -2
         move            t2, a1
         beq             a0, t1, dtb_found
  
         li              t2, 0
  dtb_found:
-#endif
+#endif /* CONFIG_USE_OF */
         PTR_LA          t0, __bss_start         # clear .bss
         LONG_S          zero, (t0)
         PTR_LA          t1, __bss_stop - LONGSIZE
@@ -156,9 +158,9 @@ dtb_found:
          * newly sync'd icache.
          */
         jr.hb           v0
-#else
+#else  /* !CONFIG_RELOCATABLE */
         j               start_kernel
-#endif
+#endif /* !CONFIG_RELOCATABLE */
         END(kernel_entry)
  
  #ifdef CONFIG_SMP
diff --git a/arch/mips/kernel/machine_kexec.c b/arch/mips/kernel/machine_kexec.c

index 8b574bc..93936dc 100644 (file)
--- a/arch/mips/kernel/machine_kexec.c
+++ b/arch/mips/kernel/machine_kexec.c
@@ -9,6 +9,7 @@
  #include <linux/kexec.h>
  #include <linux/mm.h>
  #include <linux/delay.h>
+#include <linux/libfdt.h>
  
  #include <asm/cacheflush.h>
  #include <asm/page.h>
@@ -19,15 +20,18 @@ extern const size_t relocate_new_kernel_size;
  extern unsigned long kexec_start_address;
  extern unsigned long kexec_indirection_page;
  
-int (*_machine_kexec_prepare)(struct kimage *) = NULL;
-void (*_machine_kexec_shutdown)(void) = NULL;
-void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL;
+static unsigned long reboot_code_buffer;
+
  #ifdef CONFIG_SMP
-void (*relocated_kexec_smp_wait) (void *);
+static void (*relocated_kexec_smp_wait)(void *);
+
  atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
  void (*_crash_smp_send_stop)(void) = NULL;
  #endif
  
+void (*_machine_kexec_shutdown)(void) = NULL;
+void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL;
+
  static void kexec_image_info(const struct kimage *kimage)
  {
         unsigned long i;
@@ -48,13 +52,59 @@ static void kexec_image_info(const struct kimage *kimage)
         }
  }
  
+#ifdef CONFIG_UHI_BOOT
+
+static int uhi_machine_kexec_prepare(struct kimage *kimage)
+{
+       int i;
+
+       /*
+        * In case DTB file is not passed to the new kernel, a flat device
+        * tree will be created by kexec tool. It holds modified command
+        * line for the new kernel.
+        */
+       for (i = 0; i < kimage->nr_segments; i++) {
+               struct fdt_header fdt;
+
+               if (kimage->segment[i].memsz <= sizeof(fdt))
+                       continue;
+
+               if (copy_from_user(&fdt, kimage->segment[i].buf, sizeof(fdt)))
+                       continue;
+
+               if (fdt_check_header(&fdt))
+                       continue;
+
+               kexec_args[0] = -2;
+               kexec_args[1] = (unsigned long)
+                       phys_to_virt((unsigned long)kimage->segment[i].mem);
+               break;
+       }
+
+       return 0;
+}
+
+int (*_machine_kexec_prepare)(struct kimage *) = uhi_machine_kexec_prepare;
+
+#else
+
+int (*_machine_kexec_prepare)(struct kimage *) = NULL;
+
+#endif /* CONFIG_UHI_BOOT */
+
  int
  machine_kexec_prepare(struct kimage *kimage)
  {
+#ifdef CONFIG_SMP
+       if (!kexec_nonboot_cpu_func())
+               return -EINVAL;
+#endif
+
         kexec_image_info(kimage);
  
         if (_machine_kexec_prepare)
                 return _machine_kexec_prepare(kimage);
+
         return 0;
  }
  
@@ -63,11 +113,41 @@ machine_kexec_cleanup(struct kimage *kimage)
  {
  }
  
+#ifdef CONFIG_SMP
+static void kexec_shutdown_secondary(void *param)
+{
+       int cpu = smp_processor_id();
+
+       if (!cpu_online(cpu))
+               return;
+
+       /* We won't be sent IPIs any more. */
+       set_cpu_online(cpu, false);
+
+       local_irq_disable();
+       while (!atomic_read(&kexec_ready_to_reboot))
+               cpu_relax();
+
+       kexec_reboot();
+
+       /* NOTREACHED */
+}
+#endif
+
  void
  machine_shutdown(void)
  {
         if (_machine_kexec_shutdown)
                 _machine_kexec_shutdown();
+
+#ifdef CONFIG_SMP
+       smp_call_function(kexec_shutdown_secondary, NULL, 0);
+
+       while (num_online_cpus() > 1) {
+               cpu_relax();
+               mdelay(1);
+       }
+#endif
  }
  
  void
@@ -79,12 +159,57 @@ machine_crash_shutdown(struct pt_regs *regs)
                 default_machine_crash_shutdown(regs);
  }
  
-typedef void (*noretfun_t)(void) __noreturn;
+#ifdef CONFIG_SMP
+void kexec_nonboot_cpu_jump(void)
+{
+       local_flush_icache_range((unsigned long)relocated_kexec_smp_wait,
+                                reboot_code_buffer + relocate_new_kernel_size);
+
+       relocated_kexec_smp_wait(NULL);
+}
+#endif
+
+void kexec_reboot(void)
+{
+       void (*do_kexec)(void) __noreturn;
+
+       /*
+        * We know we were online, and there will be no incoming IPIs at
+        * this point. Mark online again before rebooting so that the crash
+        * analysis tool will see us correctly.
+        */
+       set_cpu_online(smp_processor_id(), true);
+
+       /* Ensure remote CPUs observe that we're online before rebooting. */
+       smp_mb__after_atomic();
+
+#ifdef CONFIG_SMP
+       if (smp_processor_id() > 0) {
+               /*
+                * Instead of cpu_relax() or wait, this is needed for kexec
+                * smp reboot. Kdump usually doesn't require an smp new
+                * kernel, but kexec may do.
+                */
+               kexec_nonboot_cpu();
+
+               /* NOTREACHED */
+       }
+#endif
+
+       /*
+        * Make sure we get correct instructions written by the
+        * machine_kexec() CPU.
+        */
+       local_flush_icache_range(reboot_code_buffer,
+                                reboot_code_buffer + relocate_new_kernel_size);
+
+       do_kexec = (void *)reboot_code_buffer;
+       do_kexec();
+}
  
  void
  machine_kexec(struct kimage *image)
  {
-       unsigned long reboot_code_buffer;
         unsigned long entry;
         unsigned long *ptr;
  
@@ -118,6 +243,9 @@ machine_kexec(struct kimage *image)
                         *ptr = (unsigned long) phys_to_virt(*ptr);
         }
  
+       /* Mark offline BEFORE disabling local irq. */
+       set_cpu_online(smp_processor_id(), false);
+
         /*
          * we do not want to be bothered.
          */
@@ -125,6 +253,7 @@ machine_kexec(struct kimage *image)
  
         printk("Will call new kernel at %08lx\n", image->start);
         printk("Bye ...\n");
+       /* Make reboot code buffer available to the boot CPU. */
         __flush_cache_all();
  #ifdef CONFIG_SMP
         /* All secondary cpus now may jump to kexec_wait cycle */
@@ -133,5 +262,5 @@ machine_kexec(struct kimage *image)
         smp_wmb();
         atomic_set(&kexec_ready_to_reboot, 1);
  #endif
-       ((noretfun_t) reboot_code_buffer)();
+       kexec_reboot();
  }
diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c

index efaa252..9f85b98 100644 (file)
--- a/arch/mips/kernel/mips-mt.c
+++ b/arch/mips/kernel/mips-mt.c
@@ -154,40 +154,6 @@ static int __init config7_set(char *str)
  }
  __setup("config7=", config7_set);
  
-/* Experimental cache flush control parameters that should go away some day */
-int mt_protiflush;
-int mt_protdflush;
-int mt_n_iflushes = 1;
-int mt_n_dflushes = 1;
-
-static int __init set_protiflush(char *s)
-{
-       mt_protiflush = 1;
-       return 1;
-}
-__setup("protiflush", set_protiflush);
-
-static int __init set_protdflush(char *s)
-{
-       mt_protdflush = 1;
-       return 1;
-}
-__setup("protdflush", set_protdflush);
-
-static int __init niflush(char *s)
-{
-       get_option(&s, &mt_n_iflushes);
-       return 1;
-}
-__setup("niflush=", niflush);
-
-static int __init ndflush(char *s)
-{
-       get_option(&s, &mt_n_dflushes);
-       return 1;
-}
-__setup("ndflush=", ndflush);
-
  static unsigned int itc_base;
  
  static int __init set_itc_base(char *str)
@@ -232,16 +198,6 @@ void mips_mt_set_cpuoptions(void)
                 printk("Config7: 0x%08x\n", read_c0_config7());
         }
  
-       /* Report Cache management debug options */
-       if (mt_protiflush)
-               printk("I-cache flushes single-threaded\n");
-       if (mt_protdflush)
-               printk("D-cache flushes single-threaded\n");
-       if (mt_n_iflushes != 1)
-               printk("I-Cache Flushes Repeated %d times\n", mt_n_iflushes);
-       if (mt_n_dflushes != 1)
-               printk("D-Cache Flushes Repeated %d times\n", mt_n_dflushes);
-
         if (itc_base != 0) {
                 /*
                  * Configure ITC mapping.  This code is very
@@ -283,21 +239,6 @@ void mips_mt_set_cpuoptions(void)
         }
  }
  
-/*
- * Function to protect cache flushes from concurrent execution
- * depends on MP software model chosen.
- */
-
-void mt_cflush_lockdown(void)
-{
-       /* FILL IN VSMP and AP/SP VERSIONS HERE */
-}
-
-void mt_cflush_release(void)
-{
-       /* FILL IN VSMP and AP/SP VERSIONS HERE */
-}
-
  struct class *mt_class;
  
  static int __init mt_init(void)
diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c

index cbf4cc0..3d80a51 100644 (file)
--- a/arch/mips/kernel/relocate.c
+++ b/arch/mips/kernel/relocate.c
@@ -146,7 +146,7 @@ int __init do_relocations(void *kbase_old, void *kbase_new, long offset)
                         break;
  
                 type = (*r >> 24) & 0xff;
-               loc_orig = (void *)(kbase_old + ((*r & 0x00ffffff) << 2));
+               loc_orig = kbase_old + ((*r & 0x00ffffff) << 2);
                 loc_new = RELOCATED(loc_orig);
  
                 if (reloc_handlers_rel[type] == NULL) {
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c

index e64b9e8..01a5ff4 100644 (file)
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -333,7 +333,7 @@ static void __init finalize_initrd(void)
  
         maybe_bswap_initrd();
  
-       reserve_bootmem(__pa(initrd_start), size, BOOTMEM_DEFAULT);
+       memblock_reserve(__pa(initrd_start), size);
         initrd_below_start_ok = 1;
  
         pr_info("Initial ramdisk at: 0x%lx (%lu bytes)\n",
@@ -370,20 +370,10 @@ static void __init bootmem_init(void)
  
  #else  /* !CONFIG_SGI_IP27 */
  
-static unsigned long __init bootmap_bytes(unsigned long pages)
-{
-       unsigned long bytes = DIV_ROUND_UP(pages, 8);
-
-       return ALIGN(bytes, sizeof(long));
-}
-
  static void __init bootmem_init(void)
  {
         unsigned long reserved_end;
-       unsigned long mapstart = ~0UL;
-       unsigned long bootmap_size;
         phys_addr_t ramstart = PHYS_ADDR_MAX;
-       bool bootmap_valid = false;
         int i;
  
         /*
@@ -395,6 +385,8 @@ static void __init bootmem_init(void)
         init_initrd();
         reserved_end = (unsigned long) PFN_UP(__pa_symbol(&_end));
  
+       memblock_reserve(PHYS_OFFSET, reserved_end << PAGE_SHIFT);
+
         /*
          * max_low_pfn is not a number of pages. The number of pages
          * of the system is given by 'max_low_pfn - min_low_pfn'.
@@ -442,9 +434,6 @@ static void __init bootmem_init(void)
                 if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end)))
                         continue;
  #endif
-               if (start >= mapstart)
-                       continue;
-               mapstart = max(reserved_end, start);
         }
  
         if (min_low_pfn >= max_low_pfn)
@@ -456,9 +445,11 @@ static void __init bootmem_init(void)
         /*
          * Reserve any memory between the start of RAM and PHYS_OFFSET
          */
-       if (ramstart > PHYS_OFFSET)
+       if (ramstart > PHYS_OFFSET) {
                 add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
                                   BOOT_MEM_RESERVED);
+               memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET);
+       }
  
         if (min_low_pfn > ARCH_PFN_OFFSET) {
                 pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
@@ -483,52 +474,6 @@ static void __init bootmem_init(void)
                 max_low_pfn = PFN_DOWN(HIGHMEM_START);
         }
  
-#ifdef CONFIG_BLK_DEV_INITRD
-       /*
-        * mapstart should be after initrd_end
-        */
-       if (initrd_end)
-               mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end)));
-#endif
-
-       /*
-        * check that mapstart doesn't overlap with any of
-        * memory regions that have been reserved through eg. DTB
-        */
-       bootmap_size = bootmap_bytes(max_low_pfn - min_low_pfn);
-
-       bootmap_valid = memory_region_available(PFN_PHYS(mapstart),
-                                               bootmap_size);
-       for (i = 0; i < boot_mem_map.nr_map && !bootmap_valid; i++) {
-               unsigned long mapstart_addr;
-
-               switch (boot_mem_map.map[i].type) {
-               case BOOT_MEM_RESERVED:
-                       mapstart_addr = PFN_ALIGN(boot_mem_map.map[i].addr +
-                                               boot_mem_map.map[i].size);
-                       if (PHYS_PFN(mapstart_addr) < mapstart)
-                               break;
-
-                       bootmap_valid = memory_region_available(mapstart_addr,
-                                                               bootmap_size);
-                       if (bootmap_valid)
-                               mapstart = PHYS_PFN(mapstart_addr);
-                       break;
-               default:
-                       break;
-               }
-       }
-
-       if (!bootmap_valid)
-               panic("No memory area to place a bootmap bitmap");
-
-       /*
-        * Initialize the boot-time allocator with low memory only.
-        */
-       if (bootmap_size != init_bootmem_node(NODE_DATA(0), mapstart,
-                                        min_low_pfn, max_low_pfn))
-               panic("Unexpected memory size required for bootmap");
-
         for (i = 0; i < boot_mem_map.nr_map; i++) {
                 unsigned long start, end;
  
@@ -577,9 +522,9 @@ static void __init bootmem_init(void)
                 default:
                         /* Not usable memory */
                         if (start > min_low_pfn && end < max_low_pfn)
-                               reserve_bootmem(boot_mem_map.map[i].addr,
-                                               boot_mem_map.map[i].size,
-                                               BOOTMEM_DEFAULT);
+                               memblock_reserve(boot_mem_map.map[i].addr,
+                                               boot_mem_map.map[i].size);
+
                         continue;
                 }
  
@@ -602,15 +547,9 @@ static void __init bootmem_init(void)
                 size = end - start;
  
                 /* Register lowmem ranges */
-               free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT);
                 memory_present(0, start, end);
         }
  
-       /*
-        * Reserve the bootmap memory.
-        */
-       reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT);
-
  #ifdef CONFIG_RELOCATABLE
         /*
          * The kernel reserves all memory below its _end symbol as bootmem,
@@ -642,29 +581,6 @@ static void __init bootmem_init(void)
  
  #endif /* CONFIG_SGI_IP27 */
  
-/*
- * arch_mem_init - initialize memory management subsystem
- *
- *  o plat_mem_setup() detects the memory configuration and will record detected
- *    memory areas using add_memory_region.
- *
- * At this stage the memory configuration of the system is known to the
- * kernel but generic memory management system is still entirely uninitialized.
- *
- *  o bootmem_init()
- *  o sparse_init()
- *  o paging_init()
- *  o dma_contiguous_reserve()
- *
- * At this stage the bootmem allocator is ready to use.
- *
- * NOTE: historically plat_mem_setup did the entire platform initialization.
- *      This was rather impractical because it meant plat_mem_setup had to
- * get away without any kind of memory allocator.  To keep old code from
- * breaking plat_setup was just renamed to plat_mem_setup and a second platform
- * initialization hook for anything else was introduced.
- */
-
  static int usermem __initdata;
  
  static int __init early_parse_mem(char *p)
@@ -841,6 +757,28 @@ static void __init request_crashkernel(struct resource *res)
  #define BUILTIN_EXTEND_WITH_PROM       \
         IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND)
  
+/*
+ * arch_mem_init - initialize memory management subsystem
+ *
+ *  o plat_mem_setup() detects the memory configuration and will record detected
+ *    memory areas using add_memory_region.
+ *
+ * At this stage the memory configuration of the system is known to the
+ * kernel but generic memory management system is still entirely uninitialized.
+ *
+ *  o bootmem_init()
+ *  o sparse_init()
+ *  o paging_init()
+ *  o dma_contiguous_reserve()
+ *
+ * At this stage the bootmem allocator is ready to use.
+ *
+ * NOTE: historically plat_mem_setup did the entire platform initialization.
+ *      This was rather impractical because it meant plat_mem_setup had to
+ * get away without any kind of memory allocator.  To keep old code from
+ * breaking plat_setup was just renamed to plat_mem_setup and a second platform
+ * initialization hook for anything else was introduced.
+ */
  static void __init arch_mem_init(char **cmdline_p)
  {
         struct memblock_region *reg;
@@ -916,21 +854,29 @@ static void __init arch_mem_init(char **cmdline_p)
         early_init_fdt_scan_reserved_mem();
  
         bootmem_init();
+
+       /*
+        * Prevent memblock from allocating high memory.
+        * This cannot be done before max_low_pfn is detected, so up
+        * to this point is possible to only reserve physical memory
+        * with memblock_reserve; memblock_virt_alloc* can be used
+        * only after this point
+        */
+       memblock_set_current_limit(PFN_PHYS(max_low_pfn));
+
  #ifdef CONFIG_PROC_VMCORE
         if (setup_elfcorehdr && setup_elfcorehdr_size) {
                 printk(KERN_INFO "kdump reserved memory at %lx-%lx\n",
                        setup_elfcorehdr, setup_elfcorehdr_size);
-               reserve_bootmem(setup_elfcorehdr, setup_elfcorehdr_size,
-                               BOOTMEM_DEFAULT);
+               memblock_reserve(setup_elfcorehdr, setup_elfcorehdr_size);
         }
  #endif
  
         mips_parse_crashkernel();
  #ifdef CONFIG_KEXEC
         if (crashk_res.start != crashk_res.end)
-               reserve_bootmem(crashk_res.start,
-                               crashk_res.end - crashk_res.start + 1,
-                               BOOTMEM_DEFAULT);
+               memblock_reserve(crashk_res.start,
+                                crashk_res.end - crashk_res.start + 1);
  #endif
         device_tree_init();
         sparse_init();
@@ -940,7 +886,7 @@ static void __init arch_mem_init(char **cmdline_p)
         /* Tell bootmem about cma reserved memblock section */
         for_each_memblock(reserved, reg)
                 if (reg->size != 0)
-                       reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
+                       memblock_reserve(reg->base, reg->size);
  
         reserve_bootmem_region(__pa_symbol(&__nosave_begin),
                         __pa_symbol(&__nosave_end)); /* Reserve for hibernation */
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c

index 159e83a..76fae9b 100644 (file)
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -25,6 +25,7 @@
  #include <linux/linkage.h>
  #include <linux/bug.h>
  #include <linux/kernel.h>
+#include <linux/kexec.h>
  
  #include <asm/time.h>
  #include <asm/pgtable.h>
@@ -423,6 +424,9 @@ const struct plat_smp_ops bmips43xx_smp_ops = {
         .cpu_disable            = bmips_cpu_disable,
         .cpu_die                = bmips_cpu_die,
  #endif
+#ifdef CONFIG_KEXEC
+       .kexec_nonboot_cpu      = kexec_nonboot_cpu_jump,
+#endif
  };
  
  const struct plat_smp_ops bmips5000_smp_ops = {
@@ -437,6 +441,9 @@ const struct plat_smp_ops bmips5000_smp_ops = {
         .cpu_disable            = bmips_cpu_disable,
         .cpu_die                = bmips_cpu_die,
  #endif
+#ifdef CONFIG_KEXEC
+       .kexec_nonboot_cpu      = kexec_nonboot_cpu_jump,
+#endif
  };
  
  #endif /* CONFIG_SMP */
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c

index 03f1026..faccfa4 100644 (file)
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -398,6 +398,55 @@ static void cps_smp_finish(void)
         local_irq_enable();
  }
  
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
+
+enum cpu_death {
+       CPU_DEATH_HALT,
+       CPU_DEATH_POWER,
+};
+
+static void cps_shutdown_this_cpu(enum cpu_death death)
+{
+       unsigned int cpu, core, vpe_id;
+
+       cpu = smp_processor_id();
+       core = cpu_core(&cpu_data[cpu]);
+
+       if (death == CPU_DEATH_HALT) {
+               vpe_id = cpu_vpe_id(&cpu_data[cpu]);
+
+               pr_debug("Halting core %d VP%d\n", core, vpe_id);
+               if (cpu_has_mipsmt) {
+                       /* Halt this TC */
+                       write_c0_tchalt(TCHALT_H);
+                       instruction_hazard();
+               } else if (cpu_has_vp) {
+                       write_cpc_cl_vp_stop(1 << vpe_id);
+
+                       /* Ensure that the VP_STOP register is written */
+                       wmb();
+               }
+       } else {
+               pr_debug("Gating power to core %d\n", core);
+               /* Power down the core */
+               cps_pm_enter_state(CPS_PM_POWER_GATED);
+       }
+}
+
+#ifdef CONFIG_KEXEC
+
+static void cps_kexec_nonboot_cpu(void)
+{
+       if (cpu_has_mipsmt || cpu_has_vp)
+               cps_shutdown_this_cpu(CPU_DEATH_HALT);
+       else
+               cps_shutdown_this_cpu(CPU_DEATH_POWER);
+}
+
+#endif /* CONFIG_KEXEC */
+
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */
+
  #ifdef CONFIG_HOTPLUG_CPU
  
  static int cps_cpu_disable(void)
@@ -421,19 +470,15 @@ static int cps_cpu_disable(void)
  }
  
  static unsigned cpu_death_sibling;
-static enum {
-       CPU_DEATH_HALT,
-       CPU_DEATH_POWER,
-} cpu_death;
+static enum cpu_death cpu_death;
  
  void play_dead(void)
  {
-       unsigned int cpu, core, vpe_id;
+       unsigned int cpu;
  
         local_irq_disable();
         idle_task_exit();
         cpu = smp_processor_id();
-       core = cpu_core(&cpu_data[cpu]);
         cpu_death = CPU_DEATH_POWER;
  
         pr_debug("CPU%d going offline\n", cpu);
@@ -456,25 +501,7 @@ void play_dead(void)
         /* This CPU has chosen its way out */
         (void)cpu_report_death();
  
-       if (cpu_death == CPU_DEATH_HALT) {
-               vpe_id = cpu_vpe_id(&cpu_data[cpu]);
-
-               pr_debug("Halting core %d VP%d\n", core, vpe_id);
-               if (cpu_has_mipsmt) {
-                       /* Halt this TC */
-                       write_c0_tchalt(TCHALT_H);
-                       instruction_hazard();
-               } else if (cpu_has_vp) {
-                       write_cpc_cl_vp_stop(1 << vpe_id);
-
-                       /* Ensure that the VP_STOP register is written */
-                       wmb();
-               }
-       } else {
-               pr_debug("Gating power to core %d\n", core);
-               /* Power down the core */
-               cps_pm_enter_state(CPS_PM_POWER_GATED);
-       }
+       cps_shutdown_this_cpu(cpu_death);
  
         /* This should never be reached */
         panic("Failed to offline CPU %u", cpu);
@@ -593,6 +620,9 @@ static const struct plat_smp_ops cps_smp_ops = {
         .cpu_disable            = cps_cpu_disable,
         .cpu_die                = cps_cpu_die,
  #endif
+#ifdef CONFIG_KEXEC
+       .kexec_nonboot_cpu      = cps_kexec_nonboot_cpu,
+#endif
  };
  
  bool mips_cps_smp_in_use(void)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c

index 9dab0ed..5feef28 100644 (file)
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -29,6 +29,7 @@
  #include <linux/spinlock.h>
  #include <linux/kallsyms.h>
  #include <linux/bootmem.h>
+#include <linux/memblock.h>
  #include <linux/interrupt.h>
  #include <linux/ptrace.h>
  #include <linux/kgdb.h>
@@ -348,7 +349,7 @@ static void __show_regs(const struct pt_regs *regs)
   */
  void show_regs(struct pt_regs *regs)
  {
-       __show_regs((struct pt_regs *)regs);
+       __show_regs(regs);
         dump_stack();
  }
  
@@ -2260,8 +2261,10 @@ void __init trap_init(void)
                 unsigned long size = 0x200 + VECTORSPACING*64;
                 phys_addr_t ebase_pa;
  
+               memblock_set_bottom_up(true);
                 ebase = (unsigned long)
                         __alloc_bootmem(size, 1 << fls(size), 0);
+               memblock_set_bottom_up(false);
  
                 /*
                  * Try to ensure ebase resides in KSeg0 if possible.
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c

index 2d0b912..ce446ee 100644 (file)
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -130,7 +130,7 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
  #define     _LoadW(addr, value, res, type)   \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -151,8 +151,8 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#else
-/* MIPSR6 has no lwl instruction */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without lwl instruction */
  #define     _LoadW(addr, value, res, type) \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -186,7 +186,7 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  
  #define     _LoadHWU(addr, value, res, type) \
  do {                                                        \
@@ -212,7 +212,7 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
  #define     _LoadWU(addr, value, res, type)  \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -255,8 +255,8 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#else
-/* MIPSR6 has not lwl and ldl instructions */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without lwl and ldl instructions */
  #define            _LoadWU(addr, value, res, type) \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -339,7 +339,7 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  
  
  #define     _StoreHW(addr, value, res, type) \
@@ -365,7 +365,7 @@ do {                                                        \
                         : "r" (value), "r" (addr), "i" (-EFAULT));\
  } while(0)
  
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
  #define     _StoreW(addr, value, res, type)  \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -406,8 +406,7 @@ do {                                                        \
                 : "r" (value), "r" (addr), "i" (-EFAULT));  \
  } while(0)
  
-#else
-/* MIPSR6 has no swl and sdl instructions */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  #define     _StoreW(addr, value, res, type)  \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -483,7 +482,7 @@ do {                                                        \
                 : "memory");                                \
  } while(0)
  
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  
  #else /* __BIG_ENDIAN */
  
@@ -509,7 +508,7 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
  #define     _LoadW(addr, value, res, type)   \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -530,8 +529,8 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#else
-/* MIPSR6 has no lwl instruction */
+#else  /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without lwl instruction */
  #define     _LoadW(addr, value, res, type) \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -565,7 +564,7 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  
  
  #define     _LoadHWU(addr, value, res, type) \
@@ -592,7 +591,7 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
  #define     _LoadWU(addr, value, res, type)  \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -635,8 +634,8 @@ do {                                                        \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
  
-#else
-/* MIPSR6 has not lwl and ldl instructions */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without lwl and ldl instructions */
  #define            _LoadWU(addr, value, res, type) \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -718,7 +717,7 @@ do {                                                        \
                         : "=&r" (value), "=r" (res)         \
                         : "r" (addr), "i" (-EFAULT));       \
  } while(0)
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  
  #define     _StoreHW(addr, value, res, type) \
  do {                                                        \
@@ -743,7 +742,7 @@ do {                                                        \
                         : "r" (value), "r" (addr), "i" (-EFAULT));\
  } while(0)
  
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
  #define     _StoreW(addr, value, res, type)  \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -784,8 +783,8 @@ do {                                                        \
                 : "r" (value), "r" (addr), "i" (-EFAULT));  \
  } while(0)
  
-#else
-/* MIPSR6 has no swl and sdl instructions */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without swl and sdl instructions */
  #define     _StoreW(addr, value, res, type)  \
  do {                                                        \
                 __asm__ __volatile__ (                      \
@@ -861,7 +860,7 @@ do {                                                        \
                 : "memory");                                \
  } while(0)
  
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  #endif
  
  #define LoadHWU(addr, value, res)      _LoadHWU(addr, value, res, kernel)
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile

index 6537e02..479f505 100644 (file)
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -7,7 +7,7 @@ lib-y   += bitops.o csum_partial.o delay.o memcpy.o memset.o \
            mips-atomic.o strncpy_user.o \
            strnlen_user.o uncached.o
  
-obj-y                  += iomap.o iomap_copy.o
+obj-y                  += iomap_copy.o
  obj-$(CONFIG_PCI)      += iomap-pci.o
  lib-$(CONFIG_GENERIC_CSUM)     := $(filter-out csum_partial.o, $(lib-y))
  
diff --git a/arch/mips/lib/iomap-pci.c b/arch/mips/lib/iomap-pci.c

index 4850509..210f5a9 100644 (file)
--- a/arch/mips/lib/iomap-pci.c
+++ b/arch/mips/lib/iomap-pci.c
@@ -44,10 +44,3 @@ void __iomem *__pci_ioport_map(struct pci_dev *dev,
  }
  
  #endif /* CONFIG_PCI_DRIVERS_LEGACY */
-
-void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
-{
-       iounmap(addr);
-}
-
-EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/mips/lib/iomap.c b/arch/mips/lib/iomap.c

deleted file mode 100644 (file)

index 9b31653..0000000
--- a/arch/mips/lib/iomap.c
+++ /dev/null
@@ -1,227 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Implement the default iomap interfaces
- *
- * (C) Copyright 2004 Linus Torvalds
- * (C) Copyright 2006 Ralf Baechle <ralf@linux-mips.org>
- * (C) Copyright 2007 MIPS Technologies, Inc.
- *     written by Ralf Baechle <ralf@linux-mips.org>
- */
-#include <linux/export.h>
-#include <asm/io.h>
-
-/*
- * Read/write from/to an (offsettable) iomem cookie. It might be a PIO
- * access or a MMIO access, these functions don't care. The info is
- * encoded in the hardware mapping set up by the mapping functions
- * (or the cookie itself, depending on implementation and hw).
- *
- * The generic routines don't assume any hardware mappings, and just
- * encode the PIO/MMIO as part of the cookie. They coldly assume that
- * the MMIO IO mappings are not in the low address range.
- *
- * Architectures for which this is not true can't use this generic
- * implementation and should do their own copy.
- */
-
-#define PIO_MASK       0x0ffffUL
-
-unsigned int ioread8(void __iomem *addr)
-{
-       return readb(addr);
-}
-
-EXPORT_SYMBOL(ioread8);
-
-unsigned int ioread16(void __iomem *addr)
-{
-       return readw(addr);
-}
-
-EXPORT_SYMBOL(ioread16);
-
-unsigned int ioread16be(void __iomem *addr)
-{
-       return be16_to_cpu(__raw_readw(addr));
-}
-
-EXPORT_SYMBOL(ioread16be);
-
-unsigned int ioread32(void __iomem *addr)
-{
-       return readl(addr);
-}
-
-EXPORT_SYMBOL(ioread32);
-
-unsigned int ioread32be(void __iomem *addr)
-{
-       return be32_to_cpu(__raw_readl(addr));
-}
-
-EXPORT_SYMBOL(ioread32be);
-
-void iowrite8(u8 val, void __iomem *addr)
-{
-       writeb(val, addr);
-}
-
-EXPORT_SYMBOL(iowrite8);
-
-void iowrite16(u16 val, void __iomem *addr)
-{
-       writew(val, addr);
-}
-
-EXPORT_SYMBOL(iowrite16);
-
-void iowrite16be(u16 val, void __iomem *addr)
-{
-       __raw_writew(cpu_to_be16(val), addr);
-}
-
-EXPORT_SYMBOL(iowrite16be);
-
-void iowrite32(u32 val, void __iomem *addr)
-{
-       writel(val, addr);
-}
-
-EXPORT_SYMBOL(iowrite32);
-
-void iowrite32be(u32 val, void __iomem *addr)
-{
-       __raw_writel(cpu_to_be32(val), addr);
-}
-
-EXPORT_SYMBOL(iowrite32be);
-
-/*
- * These are the "repeat MMIO read/write" functions.
- * Note the "__mem" accesses, since we want to convert
- * to CPU byte order if the host bus happens to not match the
- * endianness of PCI/ISA (see mach-generic/mangle-port.h).
- */
-static inline void mmio_insb(void __iomem *addr, u8 *dst, int count)
-{
-       while (--count >= 0) {
-               u8 data = __mem_readb(addr);
-               *dst = data;
-               dst++;
-       }
-}
-
-static inline void mmio_insw(void __iomem *addr, u16 *dst, int count)
-{
-       while (--count >= 0) {
-               u16 data = __mem_readw(addr);
-               *dst = data;
-               dst++;
-       }
-}
-
-static inline void mmio_insl(void __iomem *addr, u32 *dst, int count)
-{
-       while (--count >= 0) {
-               u32 data = __mem_readl(addr);
-               *dst = data;
-               dst++;
-       }
-}
-
-static inline void mmio_outsb(void __iomem *addr, const u8 *src, int count)
-{
-       while (--count >= 0) {
-               __mem_writeb(*src, addr);
-               src++;
-       }
-}
-
-static inline void mmio_outsw(void __iomem *addr, const u16 *src, int count)
-{
-       while (--count >= 0) {
-               __mem_writew(*src, addr);
-               src++;
-       }
-}
-
-static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count)
-{
-       while (--count >= 0) {
-               __mem_writel(*src, addr);
-               src++;
-       }
-}
-
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
-{
-       mmio_insb(addr, dst, count);
-}
-
-EXPORT_SYMBOL(ioread8_rep);
-
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
-{
-       mmio_insw(addr, dst, count);
-}
-
-EXPORT_SYMBOL(ioread16_rep);
-
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
-{
-       mmio_insl(addr, dst, count);
-}
-
-EXPORT_SYMBOL(ioread32_rep);
-
-void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
-{
-       mmio_outsb(addr, src, count);
-}
-
-EXPORT_SYMBOL(iowrite8_rep);
-
-void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
-{
-       mmio_outsw(addr, src, count);
-}
-
-EXPORT_SYMBOL(iowrite16_rep);
-
-void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
-{
-       mmio_outsl(addr, src, count);
-}
-
-EXPORT_SYMBOL(iowrite32_rep);
-
-/*
- * Create a virtual mapping cookie for an IO port range
- *
- * This uses the same mapping are as the in/out family which has to be setup
- * by the platform initialization code.
- *
- * Just to make matters somewhat more interesting on MIPS systems with
- * multiple host bridge each will have it's own ioport address space.
- */
-static void __iomem *ioport_map_legacy(unsigned long port, unsigned int nr)
-{
-       return (void __iomem *) (mips_io_port_base + port);
-}
-
-void __iomem *ioport_map(unsigned long port, unsigned int nr)
-{
-       if (port > PIO_MASK)
-               return NULL;
-
-       return ioport_map_legacy(port, nr);
-}
-
-EXPORT_SYMBOL(ioport_map);
-
-void ioport_unmap(void __iomem *addr)
-{
-       /* Nothing to do */
-}
-
-EXPORT_SYMBOL(ioport_unmap);
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S

index 03e3304..cdd19d8 100644 (file)
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -204,9 +204,10 @@
  #define LOADB(reg, addr, handler)      EXC(lb, LD_INSN, reg, addr, handler)
  #define STOREB(reg, addr, handler)     EXC(sb, ST_INSN, reg, addr, handler)
  
-#define _PREF(hint, addr, type)                                                \
+#ifdef CONFIG_CPU_HAS_PREFETCH
+# define _PREF(hint, addr, type)                                       \
         .if \mode == LEGACY_MODE;                                       \
-               PREF(hint, addr);                                       \
+               kernel_pref(hint, addr);                                \
         .else;                                                          \
                 .if ((\from == USEROP) && (type == SRC_PREFETCH)) ||    \
                     ((\to == USEROP) && (type == DST_PREFETCH));        \
@@ -218,12 +219,15 @@
                          * used later on. Therefore use $v1.            \
                          */                                             \
                         .set at=v1;                                     \
-                       PREFE(hint, addr);                              \
+                       user_pref(hint, addr);                          \
                         .set noat;                                      \
                 .else;                                                  \
-                       PREF(hint, addr);                               \
+                       kernel_pref(hint, addr);                        \
                 .endif;                                                 \
         .endif
+#else
+# define _PREF(hint, addr, type)
+#endif
  
  #define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
  #define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)
@@ -297,7 +301,7 @@
          and    t0, src, ADDRMASK
         PREFS(  0, 2*32(src) )
         PREFD(  1, 2*32(dst) )
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
         bnez    t1, .Ldst_unaligned\@
          nop
         bnez    t0, .Lsrc_unaligned_dst_aligned\@
@@ -385,7 +389,7 @@
         bne     rem, len, 1b
         .set    noreorder
  
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
         /*
          * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
          * A loop would do only a byte at a time with possible branch
@@ -487,7 +491,7 @@
         bne     len, rem, 1b
         .set    noreorder
  
-#endif /* !CONFIG_CPU_MIPSR6 */
+#endif /* CONFIG_CPU_HAS_LOAD_STORE_LR */
  .Lcopy_bytes_checklen\@:
         beqz    len, .Ldone\@
          nop
@@ -516,7 +520,7 @@
         jr      ra
          nop
  
-#ifdef CONFIG_CPU_MIPSR6
+#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR
  .Lcopy_unaligned_bytes\@:
  1:
         COPY_BYTE(0)
@@ -530,7 +534,7 @@
         ADD     src, src, 8
         b       1b
          ADD    dst, dst, 8
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
         .if __memcpy == 1
         END(memcpy)
         .set __memcpy, 0
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S

index 069acec..418611e 100644 (file)
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -78,7 +78,6 @@
  #endif
         .endm
  
-       .set    noreorder
         .align  5
  
         /*
@@ -94,13 +93,16 @@
         .endif
  
         sltiu           t0, a2, STORSIZE        /* very small region? */
+       .set            noreorder
         bnez            t0, .Lsmall_memset\@
          andi           t0, a0, STORMASK        /* aligned? */
+       .set            reorder
  
  #ifdef CONFIG_CPU_MICROMIPS
         move            t8, a1                  /* used by 'swp' instruction */
         move            t9, a1
  #endif
+       .set            noreorder
  #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
         beqz            t0, 1f
          PTR_SUBU       t0, STORSIZE            /* alignment in bytes */
@@ -111,8 +113,9 @@
          PTR_SUBU       t0, AT                  /* alignment in bytes */
         .set            at
  #endif
+       .set            reorder
  
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
         R10KCBARRIER(0(ra))
  #ifdef __MIPSEB__
         EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */
@@ -122,11 +125,13 @@
         PTR_SUBU        a0, t0                  /* long align ptr */
         PTR_ADDU        a2, t0                  /* correct size */
  
-#else /* CONFIG_CPU_MIPSR6 */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  #define STORE_BYTE(N)                          \
         EX(sb, a1, N(a0), .Lbyte_fixup\@);      \
+       .set            noreorder;              \
         beqz            t0, 0f;                 \
-       PTR_ADDU        t0, 1;
+        PTR_ADDU       t0, 1;                  \
+       .set            reorder;
  
         PTR_ADDU        a2, t0                  /* correct size */
         PTR_ADDU        t0, 1
@@ -145,19 +150,17 @@
         ori             a0, STORMASK
         xori            a0, STORMASK
         PTR_ADDIU       a0, STORSIZE
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  1:     ori             t1, a2, 0x3f            /* # of full blocks */
         xori            t1, 0x3f
+       andi            t0, a2, 0x40-STORSIZE
         beqz            t1, .Lmemset_partial\@  /* no block to fill */
-        andi           t0, a2, 0x40-STORSIZE
  
         PTR_ADDU        t1, a0                  /* end address */
-       .set            reorder
  1:     PTR_ADDIU       a0, 64
         R10KCBARRIER(0(ra))
         f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
         bne             t1, a0, 1b
-       .set            noreorder
  
  .Lmemset_partial\@:
         R10KCBARRIER(0(ra))
@@ -173,20 +176,18 @@
         PTR_SUBU        t1, AT
         .set            at
  #endif
+       PTR_ADDU        a0, t0                  /* dest ptr */
         jr              t1
-        PTR_ADDU       a0, t0                  /* dest ptr */
  
-       .set            push
-       .set            noreorder
-       .set            nomacro
         /* ... but first do longs ... */
         f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
-2:     .set            pop
-       andi            a2, STORMASK            /* At most one long to go */
+2:     andi            a2, STORMASK            /* At most one long to go */
  
+       .set            noreorder
         beqz            a2, 1f
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
          PTR_ADDU       a0, a2                  /* What's left */
+       .set            reorder
         R10KCBARRIER(0(ra))
  #ifdef __MIPSEB__
         EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
@@ -195,6 +196,7 @@
  #endif
  #else
          PTR_SUBU       t0, $0, a2
+       .set            reorder
         move            a2, zero                /* No remaining longs */
         PTR_ADDIU       t0, 1
         STORE_BYTE(0)
@@ -210,41 +212,42 @@
  #endif
  0:
  #endif
-1:     jr              ra
-        move           a2, zero
+1:     move            a2, zero
+       jr              ra
  
  .Lsmall_memset\@:
+       PTR_ADDU        t1, a0, a2
         beqz            a2, 2f
-        PTR_ADDU       t1, a0, a2
  
  1:     PTR_ADDIU       a0, 1                   /* fill bytewise */
         R10KCBARRIER(0(ra))
+       .set            noreorder
         bne             t1, a0, 1b
          EX(sb, a1, -1(a0), .Lsmall_fixup\@)
+       .set            reorder
  
-2:     jr              ra                      /* done */
-        move           a2, zero
+2:     move            a2, zero
+       jr              ra                      /* done */
         .if __memset == 1
         END(memset)
         .set __memset, 0
         .hidden __memset
         .endif
  
-#ifdef CONFIG_CPU_MIPSR6
+#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR
  .Lbyte_fixup\@:
         /*
          * unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1
          *      a2     =             a2                -              t0                   + 1
          */
         PTR_SUBU        a2, t0
+       PTR_ADDIU       a2, 1
         jr              ra
-        PTR_ADDIU      a2, 1
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
  
  .Lfirst_fixup\@:
         /* unset_bytes already in a2 */
         jr      ra
-        nop
  
  .Lfwd_fixup\@:
         /*
@@ -255,8 +258,8 @@
         andi            a2, 0x3f
         LONG_L          t0, THREAD_BUADDR(t0)
         LONG_ADDU       a2, t1
+       LONG_SUBU       a2, t0
         jr              ra
-        LONG_SUBU      a2, t0
  
  .Lpartial_fixup\@:
         /*
@@ -267,24 +270,21 @@
         andi            a2, STORMASK
         LONG_L          t0, THREAD_BUADDR(t0)
         LONG_ADDU       a2, a0
+       LONG_SUBU       a2, t0
         jr              ra
-        LONG_SUBU      a2, t0
  
  .Llast_fixup\@:
         /* unset_bytes already in a2 */
         jr              ra
-        nop
  
  .Lsmall_fixup\@:
         /*
          * unset_bytes = end_addr - current_addr + 1
          *      a2     =    t1    -      a0      + 1
          */
-       .set            reorder
         PTR_SUBU        a2, t1, a0
         PTR_ADDIU       a2, 1
         jr              ra
-       .set            noreorder
  
         .endm
  
@@ -298,8 +298,8 @@
  
  LEAF(memset)
  EXPORT_SYMBOL(memset)
+       move            v0, a0                  /* result */
         beqz            a1, 1f
-        move           v0, a0                  /* result */
  
         andi            a1, 0xff                /* spread fillword */
         LONG_SLL                t1, a1, 8
diff --git a/arch/mips/loongson64/common/Makefile b/arch/mips/loongson64/common/Makefile

index 57ee030..684624f 100644 (file)
--- a/arch/mips/loongson64/common/Makefile
+++ b/arch/mips/loongson64/common/Makefile
@@ -6,7 +6,6 @@
  obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
      bonito-irq.o mem.o machtype.o platform.o serial.o
  obj-$(CONFIG_PCI) += pci.o
-obj-$(CONFIG_CPU_LOONGSON2) += dma.o
  
  #
  # Serial port support
diff --git a/arch/mips/loongson64/common/dma.c b/arch/mips/loongson64/common/dma.c

deleted file mode 100644 (file)

index 48f0412..0000000
--- a/arch/mips/loongson64/common/dma.c
+++ /dev/null
@@ -1,18 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/dma-direct.h>
-
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
-       return paddr | 0x80000000;
-}
-
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
-{
-#if defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT)
-       if (dma_addr > 0x8fffffff)
-               return dma_addr;
-       return dma_addr & 0x0fffffff;
-#else
-       return dma_addr & 0x7fffffff;
-#endif
-}
diff --git a/arch/mips/loongson64/fuloong-2e/Makefile b/arch/mips/loongson64/fuloong-2e/Makefile

index b762272..0a9a472 100644 (file)
--- a/arch/mips/loongson64/fuloong-2e/Makefile
+++ b/arch/mips/loongson64/fuloong-2e/Makefile
@@ -2,4 +2,4 @@
  # Makefile for Lemote Fuloong2e mini-PC board.
  #
  
-obj-y += irq.o reset.o
+obj-y += irq.o reset.o dma.o
diff --git a/arch/mips/loongson64/fuloong-2e/dma.c b/arch/mips/loongson64/fuloong-2e/dma.c

new file mode 100644 (file)

index 0000000..e122292
--- /dev/null
+++ b/arch/mips/loongson64/fuloong-2e/dma.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/dma-direct.h>
+
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+       return paddr | 0x80000000;
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+       return dma_addr & 0x7fffffff;
+}
diff --git a/arch/mips/loongson64/lemote-2f/Makefile b/arch/mips/loongson64/lemote-2f/Makefile

index 08b8abc..b5792c3 100644 (file)
--- a/arch/mips/loongson64/lemote-2f/Makefile
+++ b/arch/mips/loongson64/lemote-2f/Makefile
@@ -2,7 +2,7 @@
  # Makefile for lemote loongson2f family machines
  #
  
-obj-y += clock.o machtype.o irq.o reset.o ec_kb3310b.o
+obj-y += clock.o machtype.o irq.o reset.o dma.o ec_kb3310b.o
  
  #
  # Suspend Support
diff --git a/arch/mips/loongson64/lemote-2f/dma.c b/arch/mips/loongson64/lemote-2f/dma.c

new file mode 100644 (file)

index 0000000..abf0e39
--- /dev/null
+++ b/arch/mips/loongson64/lemote-2f/dma.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/dma-direct.h>
+
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+       return paddr | 0x80000000;
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+       if (dma_addr > 0x8fffffff)
+               return dma_addr;
+       return dma_addr & 0x0fffffff;
+}
diff --git a/arch/mips/loongson64/loongson-3/irq.c b/arch/mips/loongson64/loongson-3/irq.c

index cbeb20f..5605061 100644 (file)
--- a/arch/mips/loongson64/loongson-3/irq.c
+++ b/arch/mips/loongson64/loongson-3/irq.c
@@ -96,51 +96,8 @@ void mach_irq_dispatch(unsigned int pending)
         }
  }
  
-static struct irqaction cascade_irqaction = {
-       .handler = no_action,
-       .flags = IRQF_NO_SUSPEND,
-       .name = "cascade",
-};
-
-static inline void mask_loongson_irq(struct irq_data *d)
-{
-       clear_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
-       irq_disable_hazard();
-
-       /* Workaround: UART IRQ may deliver to any core */
-       if (d->irq == LOONGSON_UART_IRQ) {
-               int cpu = smp_processor_id();
-               int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node;
-               int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node;
-               u64 intenclr_addr = smp_group[node_id] |
-                       (u64)(&LOONGSON_INT_ROUTER_INTENCLR);
-               u64 introuter_lpc_addr = smp_group[node_id] |
-                       (u64)(&LOONGSON_INT_ROUTER_LPC);
-
-               *(volatile u32 *)intenclr_addr = 1 << 10;
-               *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<<core_id);
-       }
-}
-
-static inline void unmask_loongson_irq(struct irq_data *d)
-{
-       /* Workaround: UART IRQ may deliver to any core */
-       if (d->irq == LOONGSON_UART_IRQ) {
-               int cpu = smp_processor_id();
-               int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node;
-               int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node;
-               u64 intenset_addr = smp_group[node_id] |
-                       (u64)(&LOONGSON_INT_ROUTER_INTENSET);
-               u64 introuter_lpc_addr = smp_group[node_id] |
-                       (u64)(&LOONGSON_INT_ROUTER_LPC);
-
-               *(volatile u32 *)intenset_addr = 1 << 10;
-               *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<<core_id);
-       }
-
-       set_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
-       irq_enable_hazard();
-}
+static inline void mask_loongson_irq(struct irq_data *d) { }
+static inline void unmask_loongson_irq(struct irq_data *d) { }
  
   /* For MIPS IRQs which shared by all cores */
  static struct irq_chip loongson_irq_chip = {
@@ -183,12 +140,11 @@ void __init mach_init_irq(void)
         chip->irq_set_affinity = plat_set_irq_affinity;
  
         irq_set_chip_and_handler(LOONGSON_UART_IRQ,
-                       &loongson_irq_chip, handle_level_irq);
-
-       /* setup HT1 irq */
-       setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction);
+                       &loongson_irq_chip, handle_percpu_irq);
+       irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ,
+                       &loongson_irq_chip, handle_percpu_irq);
  
-       set_c0_status(STATUSF_IP2 | STATUSF_IP6);
+       set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6);
  }
  
  #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c

index 9717106..c1e6ec5 100644 (file)
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -180,43 +180,39 @@ static void __init szmem(unsigned int node)
  
  static void __init node_mem_init(unsigned int node)
  {
-       unsigned long bootmap_size;
         unsigned long node_addrspace_offset;
-       unsigned long start_pfn, end_pfn, freepfn;
+       unsigned long start_pfn, end_pfn;
  
         node_addrspace_offset = nid_to_addroffset(node);
         pr_info("Node%d's addrspace_offset is 0x%lx\n",
                         node, node_addrspace_offset);
  
         get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
-       freepfn = start_pfn;
-       if (node == 0)
-               freepfn = PFN_UP(__pa_symbol(&_end)); /* kernel end address */
-       pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx, freepfn=0x%lx\n",
-               node, start_pfn, end_pfn, freepfn);
+       pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n",
+               node, start_pfn, end_pfn);
  
         __node_data[node] = prealloc__node_data + node;
  
-       NODE_DATA(node)->bdata = &bootmem_node_data[node];
         NODE_DATA(node)->node_start_pfn = start_pfn;
         NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
  
-       bootmap_size = init_bootmem_node(NODE_DATA(node), freepfn,
-                                       start_pfn, end_pfn);
         free_bootmem_with_active_regions(node, end_pfn);
-       if (node == 0) /* used by finalize_initrd() */
+
+       if (node == 0) {
+               /* kernel end address */
+               unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end));
+
+               /* used by finalize_initrd() */
                 max_low_pfn = end_pfn;
  
-       /* This is reserved for the kernel and bdata->node_bootmem_map */
-       reserve_bootmem_node(NODE_DATA(node), start_pfn << PAGE_SHIFT,
-               ((freepfn - start_pfn) << PAGE_SHIFT) + bootmap_size,
-               BOOTMEM_DEFAULT);
+               /* Reserve the kernel text/data/bss */
+               memblock_reserve(start_pfn << PAGE_SHIFT,
+                                ((kernel_end_pfn - start_pfn) << PAGE_SHIFT));
  
-       if (node == 0 && node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT)) {
                 /* Reserve 0xfe000000~0xffffffff for RS780E integrated GPU */
-               reserve_bootmem_node(NODE_DATA(node),
-                               (node_addrspace_offset | 0xfe000000),
-                               32 << 20, BOOTMEM_DEFAULT);
+               if (node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT))
+                       memblock_reserve((node_addrspace_offset | 0xfe000000),
+                                        32 << 20);
         }
  
         sparse_memory_present_with_active_regions(node);
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c

index fea95d0..b5c1e0a 100644 (file)
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -21,6 +21,7 @@
  #include <linux/sched/task_stack.h>
  #include <linux/smp.h>
  #include <linux/cpufreq.h>
+#include <linux/kexec.h>
  #include <asm/processor.h>
  #include <asm/time.h>
  #include <asm/clock.h>
@@ -349,7 +350,7 @@ static void loongson3_smp_finish(void)
         write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
         local_irq_enable();
         loongson3_ipi_write64(0,
-                       (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0));
+                       ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0);
         pr_info("CPU#%d finished, CP0_ST=%x\n",
                         smp_processor_id(), read_c0_status());
  }
@@ -416,13 +417,13 @@ static int loongson3_boot_secondary(int cpu, struct task_struct *idle)
                         cpu, startargs[0], startargs[1], startargs[2]);
  
         loongson3_ipi_write64(startargs[3],
-                       (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x18));
+                       ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x18);
         loongson3_ipi_write64(startargs[2],
-                       (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x10));
+                       ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x10);
         loongson3_ipi_write64(startargs[1],
-                       (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x8));
+                       ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x8);
         loongson3_ipi_write64(startargs[0],
-                       (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0));
+                       ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0);
         return 0;
  }
  
@@ -749,4 +750,7 @@ const struct plat_smp_ops loongson3_smp_ops = {
         .cpu_disable = loongson3_cpu_disable,
         .cpu_die = loongson3_cpu_die,
  #endif
+#ifdef CONFIG_KEXEC
+       .kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
+#endif
  };
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c

index 400676c..15cae0f 100644 (file)
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -32,7 +32,6 @@
  #include <linux/kcore.h>
  #include <linux/initrd.h>
  
-#include <asm/asm-offsets.h>
  #include <asm/bootinfo.h>
  #include <asm/cachectl.h>
  #include <asm/cpu.h>
@@ -521,17 +520,13 @@ unsigned long pgd_current[NR_CPUS];
  #endif
  
  /*
- * gcc 3.3 and older have trouble determining that PTRS_PER_PGD and PGD_ORDER
- * are constants.  So we use the variants from asm-offset.h until that gcc
- * will officially be retired.
- *
   * Align swapper_pg_dir in to 64K, allows its address to be loaded
   * with a single LUI instruction in the TLB handlers.  If we used
   * __aligned(64K), its size would get rounded up to the alignment
   * size, and waste space.  So we place it in its own section and align
   * it in the linker script.
   */
-pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
  #ifndef __PAGETABLE_PUD_FOLDED
  pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
  #endif
diff --git a/arch/mips/netlogic/common/irq.c b/arch/mips/netlogic/common/irq.c

index f4961bc..cf33dd8 100644 (file)
--- a/arch/mips/netlogic/common/irq.c
+++ b/arch/mips/netlogic/common/irq.c
@@ -291,7 +291,7 @@ static int __init xlp_of_pic_init(struct device_node *node,
         /* we need a hack to get the PIC's SoC chip id */
         ret = of_address_to_resource(node, 0, &res);
         if (ret < 0) {
-               pr_err("PIC %s: reg property not found!\n", node->name);
+               pr_err("PIC %pOFn: reg property not found!\n", node);
                 return -EINVAL;
         }
  
@@ -304,21 +304,21 @@ static int __init xlp_of_pic_init(struct device_node *node,
                                 break;
                 }
                 if (socid == NLM_NR_NODES) {
-                       pr_err("PIC %s: Node mapping for bus %d not found!\n",
-                                       node->name, bus);
+                       pr_err("PIC %pOFn: Node mapping for bus %d not found!\n",
+                                       node, bus);
                         return -EINVAL;
                 }
         } else {
                 socid = (res.start >> 18) & 0x3;
                 if (!nlm_node_present(socid)) {
-                       pr_err("PIC %s: node %d does not exist!\n",
-                                                       node->name, socid);
+                       pr_err("PIC %pOFn: node %d does not exist!\n",
+                                                       node, socid);
                         return -EINVAL;
                 }
         }
  
         if (!nlm_node_present(socid)) {
-               pr_err("PIC %s: node %d does not exist!\n", node->name, socid);
+               pr_err("PIC %pOFn: node %d does not exist!\n", node, socid);
                 return -EINVAL;
         }
  
@@ -326,7 +326,7 @@ static int __init xlp_of_pic_init(struct device_node *node,
                 nlm_irq_to_xirq(socid, PIC_IRQ_BASE), PIC_IRQ_BASE,
                 &xlp_pic_irq_domain_ops, NULL);
         if (xlp_pic_domain == NULL) {
-               pr_err("PIC %s: Creating legacy domain failed!\n", node->name);
+               pr_err("PIC %pOFn: Creating legacy domain failed!\n", node);
                 return -EINVAL;
         }
         pr_info("Node %d: IRQ domain created for PIC@%pR\n", socid, &res);
diff --git a/arch/mips/pci/ops-loongson3.c b/arch/mips/pci/ops-loongson3.c

index 9e11843..2f6ad36 100644 (file)
--- a/arch/mips/pci/ops-loongson3.c
+++ b/arch/mips/pci/ops-loongson3.c
@@ -18,22 +18,36 @@ static int loongson3_pci_config_access(unsigned char access_type,
                 int where, u32 *data)
  {
         unsigned char busnum = bus->number;
-       u_int64_t addr, type;
-       void *addrp;
-       int device = PCI_SLOT(devfn);
         int function = PCI_FUNC(devfn);
+       int device = PCI_SLOT(devfn);
         int reg = where & ~3;
+       void *addrp;
+       u64 addr;
+
+       if (where < PCI_CFG_SPACE_SIZE) { /* standard config */
+               addr = (busnum << 16) | (device << 11) | (function << 8) | reg;
+               if (busnum == 0) {
+                       if (device > 31)
+                               return PCIBIOS_DEVICE_NOT_FOUND;
+                       addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE | addr);
+               } else {
+                       addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE_TP1 | addr);
+               }
+       } else if (where < PCI_CFG_SPACE_EXP_SIZE) {  /* extended config */
+               struct pci_dev *rootdev;
+
+               rootdev = pci_get_domain_bus_and_slot(0, 0, 0);
+               if (!rootdev)
+                       return PCIBIOS_DEVICE_NOT_FOUND;
  
-       addr = (busnum << 16) | (device << 11) | (function << 8) | reg;
-       if (busnum == 0) {
-               if (device > 31)
+               addr = pci_resource_start(rootdev, 3);
+               if (!addr)
                         return PCIBIOS_DEVICE_NOT_FOUND;
-               addrp = (void *)(TO_UNCAC(HT1LO_PCICFG_BASE) | (addr & 0xffff));
-               type = 0;
  
+               addr |= busnum << 20 | device << 15 | function << 12 | reg;
+               addrp = (void *)TO_UNCAC(addr);
         } else {
-               addrp = (void *)(TO_UNCAC(HT1LO_PCICFG_BASE_TP1) | (addr));
-               type = 0x10000;
+               return PCIBIOS_DEVICE_NOT_FOUND;
         }
  
         if (access_type == PCI_ACCESS_WRITE)
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c

index f1e92bf..3c3b1e6 100644 (file)
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -127,8 +127,12 @@ static void pcibios_scanbus(struct pci_controller *hose)
         if (pci_has_flag(PCI_PROBE_ONLY)) {
                 pci_bus_claim_resources(bus);
         } else {
+               struct pci_bus *child;
+
                 pci_bus_size_bridges(bus);
                 pci_bus_assign_resources(bus);
+               list_for_each_entry(child, &bus->children, node)
+                       pcie_bus_configure_settings(child);
         }
         pci_bus_add_devices(bus);
  }
diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c

index 711cdcc..f376a1d 100644 (file)
--- a/arch/mips/pci/pci-rt2880.c
+++ b/arch/mips/pci/pci-rt2880.c
@@ -246,6 +246,8 @@ static int rt288x_pci_probe(struct platform_device *pdev)
         rt2880_pci_write_u32(PCI_BASE_ADDRESS_0, 0x08000000);
         (void) rt2880_pci_read_u32(PCI_BASE_ADDRESS_0);
  
+       rt2880_pci_controller.of_node = pdev->dev.of_node;
+
         register_pci_controller(&rt2880_pci_controller);
         return 0;
  }
diff --git a/arch/mips/pmcs-msp71xx/msp_usb.c b/arch/mips/pmcs-msp71xx/msp_usb.c

index c87c5f8..d38ac70 100644 (file)
--- a/arch/mips/pmcs-msp71xx/msp_usb.c
+++ b/arch/mips/pmcs-msp71xx/msp_usb.c
@@ -133,13 +133,13 @@ static int __init msp_usb_setup(void)
          * "D" for device-mode.  If it works for Ethernet, why not USB...
          *  -- hammtrev, 2007/03/22
          */
-       snprintf((char *)&envstr[0], sizeof(envstr), "usbmode");
+       snprintf(&envstr[0], sizeof(envstr), "usbmode");
  
         /* set default host mode */
         val = 1;
  
         /* get environment string */
-       strp = prom_getenv((char *)&envstr[0]);
+       strp = prom_getenv(&envstr[0]);
         if (strp) {
                 /* compare string */
                 if (!strcmp(strp, "device"))
diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c

index 92f284d..61a0894 100644 (file)
--- a/arch/mips/ralink/cevt-rt3352.c
+++ b/arch/mips/ralink/cevt-rt3352.c
@@ -134,7 +134,7 @@ static int __init ralink_systick_init(struct device_node *np)
         systick.dev.min_delta_ticks = 0x3;
         systick.dev.irq = irq_of_parse_and_map(np, 0);
         if (!systick.dev.irq) {
-               pr_err("%s: request_irq failed", np->name);
+               pr_err("%pOFn: request_irq failed", np);
                 return -EINVAL;
         }
  
@@ -146,8 +146,8 @@ static int __init ralink_systick_init(struct device_node *np)
  
         clockevents_register_device(&systick.dev);
  
-       pr_info("%s: running - mult: %d, shift: %d\n",
-                       np->name, systick.dev.mult, systick.dev.shift);
+       pr_info("%pOFn: running - mult: %d, shift: %d\n",
+                       np, systick.dev.mult, systick.dev.shift);
  
         return 0;
  }
diff --git a/arch/mips/ralink/ill_acc.c b/arch/mips/ralink/ill_acc.c

index 765d5ba..fc056f2 100644 (file)
--- a/arch/mips/ralink/ill_acc.c
+++ b/arch/mips/ralink/ill_acc.c
@@ -62,7 +62,7 @@ static int __init ill_acc_of_setup(void)
  
         pdev = of_find_device_by_node(np);
         if (!pdev) {
-               pr_err("%s: failed to lookup pdev\n", np->name);
+               pr_err("%pOFn: failed to lookup pdev\n", np);
                 return -EINVAL;
         }
  
diff --git a/arch/mips/ralink/rt305x.c b/arch/mips/ralink/rt305x.c

index 93d472c..0f2264e 100644 (file)
--- a/arch/mips/ralink/rt305x.c
+++ b/arch/mips/ralink/rt305x.c
@@ -49,6 +49,10 @@ static struct rt2880_pmx_func rgmii_func[] = { FUNC("rgmii", 0, 40, 12) };
  static struct rt2880_pmx_func rt3352_lna_func[] = { FUNC("lna", 0, 36, 2) };
  static struct rt2880_pmx_func rt3352_pa_func[] = { FUNC("pa", 0, 38, 2) };
  static struct rt2880_pmx_func rt3352_led_func[] = { FUNC("led", 0, 40, 5) };
+static struct rt2880_pmx_func rt3352_cs1_func[] = {
+       FUNC("spi_cs1", 0, 45, 1),
+       FUNC("wdg_cs1", 1, 45, 1),
+};
  
  static struct rt2880_pmx_group rt3050_pinmux_data[] = {
         GRP("i2c", i2c_func, 1, RT305X_GPIO_MODE_I2C),
@@ -75,6 +79,7 @@ static struct rt2880_pmx_group rt3352_pinmux_data[] = {
         GRP("lna", rt3352_lna_func, 1, RT3352_GPIO_MODE_LNA),
         GRP("pa", rt3352_pa_func, 1, RT3352_GPIO_MODE_PA),
         GRP("led", rt3352_led_func, 1, RT5350_GPIO_MODE_PHY_LED),
+       GRP("spi_cs1", rt3352_cs1_func, 2, RT5350_GPIO_MODE_SPI_CS1),
         { 0 }
  };
  
diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c

index 2ed8e49..082541d 100644 (file)
--- a/arch/mips/sgi-ip22/ip28-berr.c
+++ b/arch/mips/sgi-ip22/ip28-berr.c
@@ -464,7 +464,7 @@ void ip22_be_interrupt(int irq)
                 die_if_kernel("Oops", regs);
                 force_sig(SIGBUS, current);
         } else if (debug_be_interrupt)
-               show_regs((struct pt_regs *)regs);
+               show_regs(regs);
  }
  
  static int ip28_be_handler(struct pt_regs *regs, int is_fixup)
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c

index 59133d0..6f7bef0 100644 (file)
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -389,7 +389,6 @@ static void __init node_mem_init(cnodeid_t node)
  {
         unsigned long slot_firstpfn = slot_getbasepfn(node, 0);
         unsigned long slot_freepfn = node_getfirstfree(node);
-       unsigned long bootmap_size;
         unsigned long start_pfn, end_pfn;
  
         get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
@@ -400,7 +399,6 @@ static void __init node_mem_init(cnodeid_t node)
         __node_data[node] = __va(slot_freepfn << PAGE_SHIFT);
         memset(__node_data[node], 0, PAGE_SIZE);
  
-       NODE_DATA(node)->bdata = &bootmem_node_data[node];
         NODE_DATA(node)->node_start_pfn = start_pfn;
         NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
  
@@ -409,12 +407,11 @@ static void __init node_mem_init(cnodeid_t node)
         slot_freepfn += PFN_UP(sizeof(struct pglist_data) +
                                sizeof(struct hub_data));
  
-       bootmap_size = init_bootmem_node(NODE_DATA(node), slot_freepfn,
-                                       start_pfn, end_pfn);
         free_bootmem_with_active_regions(node, end_pfn);
-       reserve_bootmem_node(NODE_DATA(node), slot_firstpfn << PAGE_SHIFT,
-               ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT) + bootmap_size,
-               BOOTMEM_DEFAULT);
+
+       memblock_reserve(slot_firstpfn << PAGE_SHIFT,
+                        ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT));
+
         sparse_memory_present_with_active_regions(node);
  }
  
diff --git a/arch/mips/tools/.gitignore b/arch/mips/tools/.gitignore

new file mode 100644 (file)

index 0000000..56d34cc
--- /dev/null
+++ b/arch/mips/tools/.gitignore
@@ -0,0 +1 @@
+elf-entry
diff --git a/arch/mips/tools/Makefile b/arch/mips/tools/Makefile

new file mode 100644 (file)

index 0000000..3baee4b
--- /dev/null
+++ b/arch/mips/tools/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+hostprogs-y := elf-entry
+PHONY += elf-entry
+elf-entry: $(obj)/elf-entry
+       @:
diff --git a/arch/mips/tools/elf-entry.c b/arch/mips/tools/elf-entry.c

new file mode 100644 (file)

index 0000000..adde79c
--- /dev/null
+++ b/arch/mips/tools/elf-entry.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <byteswap.h>
+#include <elf.h>
+#include <endian.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef be32toh
+/* If libc provides [bl]e{32,64}toh() then we'll use them */
+#elif BYTE_ORDER == LITTLE_ENDIAN
+# define be32toh(x)    bswap_32(x)
+# define le32toh(x)    (x)
+# define be64toh(x)    bswap_64(x)
+# define le64toh(x)    (x)
+#elif BYTE_ORDER == BIG_ENDIAN
+# define be32toh(x)    (x)
+# define le32toh(x)    bswap_32(x)
+# define be64toh(x)    (x)
+# define le64toh(x)    bswap_64(x)
+#endif
+
+__attribute__((noreturn))
+static void die(const char *msg)
+{
+       fputs(msg, stderr);
+       exit(EXIT_FAILURE);
+}
+
+int main(int argc, const char *argv[])
+{
+       uint64_t entry;
+       size_t nread;
+       FILE *file;
+       union {
+               Elf32_Ehdr ehdr32;
+               Elf64_Ehdr ehdr64;
+       } hdr;
+
+       if (argc != 2)
+               die("Usage: elf-entry <elf-file>\n");
+
+       file = fopen(argv[1], "r");
+       if (!file) {
+               perror("Unable to open input file");
+               return EXIT_FAILURE;
+       }
+
+       nread = fread(&hdr, 1, sizeof(hdr), file);
+       if (nread != sizeof(hdr)) {
+               perror("Unable to read input file");
+               return EXIT_FAILURE;
+       }
+
+       if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG))
+               die("Input is not an ELF\n");
+
+       switch (hdr.ehdr32.e_ident[EI_CLASS]) {
+       case ELFCLASS32:
+               switch (hdr.ehdr32.e_ident[EI_DATA]) {
+               case ELFDATA2LSB:
+                       entry = le32toh(hdr.ehdr32.e_entry);
+                       break;
+               case ELFDATA2MSB:
+                       entry = be32toh(hdr.ehdr32.e_entry);
+                       break;
+               default:
+                       die("Invalid ELF encoding\n");
+               }
+
+               /* Sign extend to form a canonical address */
+               entry = (int64_t)(int32_t)entry;
+               break;
+
+       case ELFCLASS64:
+               switch (hdr.ehdr32.e_ident[EI_DATA]) {
+               case ELFDATA2LSB:
+                       entry = le64toh(hdr.ehdr64.e_entry);
+                       break;
+               case ELFDATA2MSB:
+                       entry = be64toh(hdr.ehdr64.e_entry);
+                       break;
+               default:
+                       die("Invalid ELF encoding\n");
+               }
+               break;
+
+       default:
+               die("Invalid ELF class\n");
+       }
+
+       printf("0x%016" PRIx64 "\n", entry);
+       return EXIT_SUCCESS;
+}
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c

index f6d9182..70a1ab6 100644 (file)
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -960,12 +960,11 @@ void __init txx9_sramc_init(struct resource *r)
                 goto exit_put;
         err = sysfs_create_bin_file(&dev->dev.kobj, &dev->bindata_attr);
         if (err) {
-               device_unregister(&dev->dev);
                 iounmap(dev->base);
-               kfree(dev);
+               device_unregister(&dev->dev);
         }
         return;
  exit_put:
+       iounmap(dev->base);
         put_device(&dev->dev);
-       return;
  }
diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile

index 3509fac..9f525ed 100644 (file)
--- a/arch/nds32/Makefile
+++ b/arch/nds32/Makefile
@@ -47,7 +47,7 @@ CHECKFLAGS      += -D__NDS32_EB__
  endif
  
  boot := arch/nds32/boot
-core-$(BUILTIN_DTB) += $(boot)/dts/
+core-y += $(boot)/dts/
  
  .PHONY: FORCE
  
diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile

index 8673a79..52c03e6 100644 (file)
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile
@@ -49,21 +49,13 @@ BOOT_TARGETS = vmImage zImage
  PHONY += $(BOOT_TARGETS) install
  KBUILD_IMAGE := $(nios2-boot)/vmImage
  
-ifneq ($(CONFIG_NIOS2_DTB_SOURCE),"")
-       core-y  += $(nios2-boot)/
-endif
+core-y += $(nios2-boot)/dts/
  
  all: vmImage
  
  archclean:
         $(Q)$(MAKE) $(clean)=$(nios2-boot)
  
-%.dtb: | scripts
-       $(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@
-
-dtbs:
-       $(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@
-
  $(BOOT_TARGETS): vmlinux
         $(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@
  
@@ -76,5 +68,4 @@ define archhelp
    echo  '                     (your) ~/bin/$(INSTALLKERNEL) or'
    echo  '                     (distribution) /sbin/$(INSTALLKERNEL) or'
    echo  '                     install to $$(INSTALL_PATH)'
-  echo  '  dtbs            - Build device tree blobs for enabled boards'
  endef
diff --git a/arch/nios2/boot/Makefile b/arch/nios2/boot/Makefile

index 2ba23a6..37dfc7e 100644 (file)
--- a/arch/nios2/boot/Makefile
+++ b/arch/nios2/boot/Makefile
@@ -31,27 +31,5 @@ $(obj)/zImage: $(obj)/compressed/vmlinux FORCE
  $(obj)/compressed/vmlinux: $(obj)/vmlinux.gz FORCE
         $(Q)$(MAKE) $(build)=$(obj)/compressed $@
  
-# Rule to build device tree blobs
-DTB_SRC := $(patsubst "%",%,$(CONFIG_NIOS2_DTB_SOURCE))
-
-# Make sure the generated dtb gets removed during clean
-extra-$(CONFIG_NIOS2_DTB_SOURCE_BOOL) += system.dtb
-
-$(obj)/system.dtb: $(DTB_SRC) FORCE
-       $(call cmd,dtc)
-
-# Ensure system.dtb exists
-$(obj)/linked_dtb.o: $(obj)/system.dtb
-
-obj-$(CONFIG_NIOS2_DTB_SOURCE_BOOL) += linked_dtb.o
-
-targets += $(dtb-y)
-
-# Rule to build device tree blobs with make command
-$(obj)/%.dtb: $(src)/dts/%.dts FORCE
-       $(call if_changed_dep,dtc)
-
-$(obj)/dtbs: $(addprefix $(obj)/, $(dtb-y))
-
  install:
         sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)"
diff --git a/arch/nios2/boot/dts/Makefile b/arch/nios2/boot/dts/Makefile

new file mode 100644 (file)

index 0000000..a91a0b0
--- /dev/null
+++ b/arch/nios2/boot/dts/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := $(patsubst "%.dts",%.dtb.o,$(CONFIG_NIOS2_DTB_SOURCE))
+
+dtstree                := $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/nios2/boot/linked_dtb.S b/arch/nios2/boot/linked_dtb.S

deleted file mode 100644 (file)

index 071f922..0000000
--- a/arch/nios2/boot/linked_dtb.S
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (C) 2011 Thomas Chou <thomas@wytron.com.tw>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- */
-.section .dtb.init.rodata,"a"
-.incbin "arch/nios2/boot/system.dtb"
diff --git a/arch/nios2/kernel/cpuinfo.c b/arch/nios2/kernel/cpuinfo.c

index 9320771..ccc1d2a 100644 (file)
--- a/arch/nios2/kernel/cpuinfo.c
+++ b/arch/nios2/kernel/cpuinfo.c
@@ -47,7 +47,7 @@ void __init setup_cpuinfo(void)
         const char *str;
         int len;
  
-       cpu = of_find_node_by_type(NULL, "cpu");
+       cpu = of_get_cpu_node(0, NULL);
         if (!cpu)
                 panic("%s: No CPU found in devicetree!\n", __func__);
  
@@ -120,6 +120,8 @@ void __init setup_cpuinfo(void)
         cpuinfo.reset_addr = fcpu(cpu, "altr,reset-addr");
         cpuinfo.exception_addr = fcpu(cpu, "altr,exception-addr");
         cpuinfo.fast_tlb_miss_exc_addr = fcpu(cpu, "altr,fast-tlb-miss-addr");
+
+       of_node_put(cpu);
  }
  
  #ifdef CONFIG_PROC_FS
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c

index ab88b6d..54467d0 100644 (file)
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -214,12 +214,12 @@ static int __init nios2_timer_get_base_and_freq(struct device_node *np,
  {
         *base = of_iomap(np, 0);
         if (!*base) {
-               pr_crit("Unable to map reg for %s\n", np->name);
+               pr_crit("Unable to map reg for %pOFn\n", np);
                 return -ENXIO;
         }
  
         if (of_property_read_u32(np, "clock-frequency", freq)) {
-               pr_crit("Unable to get %s clock frequency\n", np->name);
+               pr_crit("Unable to get %pOFn clock frequency\n", np);
                 return -EINVAL;
         }
  
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c

index 9d28ab1..e17fcd8 100644 (file)
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -158,9 +158,8 @@ static struct device_node *setup_find_cpu_node(int cpu)
  {
         u32 hwid;
         struct device_node *cpun;
-       struct device_node *cpus = of_find_node_by_path("/cpus");
  
-       for_each_available_child_of_node(cpus, cpun) {
+       for_each_of_cpu_node(cpun) {
                 if (of_property_read_u32(cpun, "reg", &hwid))
                         continue;
                 if (hwid == cpu)
diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild

new file mode 100644 (file)

index 0000000..1625a06
--- /dev/null
+++ b/arch/powerpc/Kbuild
@@ -0,0 +1,16 @@
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-y += kernel/
+obj-y += mm/
+obj-y += lib/
+obj-y += sysdev/
+obj-y += platforms/
+obj-y += math-emu/
+obj-y += crypto/
+obj-y += net/
+
+obj-$(CONFIG_XMON) += xmon/
+obj-$(CONFIG_KVM)  += kvm/
+
+obj-$(CONFIG_PERF_EVENTS) += perf/
+obj-$(CONFIG_KEXEC_FILE)  += purgatory/
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig

index a806692..e84943d 100644 (file)
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,7 +137,7 @@ config PPC
         select ARCH_HAS_PMEM_API                if PPC64
         select ARCH_HAS_PTE_SPECIAL
         select ARCH_HAS_MEMBARRIER_CALLBACKS
-       select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE
+       select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
         select ARCH_HAS_SG_CHAIN
         select ARCH_HAS_STRICT_KERNEL_RWX       if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
         select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
@@ -180,6 +180,8 @@ config PPC
         select HAVE_ARCH_SECCOMP_FILTER
         select HAVE_ARCH_TRACEHOOK
         select HAVE_CBPF_JIT                    if !PPC64
+       select HAVE_STACKPROTECTOR              if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
+       select HAVE_STACKPROTECTOR              if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
         select HAVE_CONTEXT_TRACKING            if PPC64
         select HAVE_DEBUG_KMEMLEAK
         select HAVE_DEBUG_STACKOVERFLOW
@@ -188,6 +190,7 @@ config PPC
         select HAVE_EBPF_JIT                    if PPC64
         select HAVE_EFFICIENT_UNALIGNED_ACCESS  if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
         select HAVE_FTRACE_MCOUNT_RECORD
+       select HAVE_FUNCTION_ERROR_INJECTION
         select HAVE_FUNCTION_GRAPH_TRACER
         select HAVE_FUNCTION_TRACER
         select HAVE_GCC_PLUGINS                 if GCC_VERSION >= 50200   # plugin support on gcc <= 5.1 is buggy on PPC
@@ -285,12 +288,10 @@ config ARCH_MAY_HAVE_PC_FDC
  
  config PPC_UDBG_16550
         bool
-       default n
  
  config GENERIC_TBSYNC
         bool
         default y if PPC32 && SMP
-       default n
  
  config AUDIT_ARCH
         bool
@@ -309,13 +310,11 @@ config EPAPR_BOOT
         bool
         help
           Used to allow a board to specify it wants an ePAPR compliant wrapper.
-       default n
  
  config DEFAULT_UIMAGE
         bool
         help
           Used to allow a board to specify it wants a uImage built by default
-       default n
  
  config ARCH_HIBERNATION_POSSIBLE
         bool
@@ -329,11 +328,9 @@ config ARCH_SUSPEND_POSSIBLE
  
  config PPC_DCR_NATIVE
         bool
-       default n
  
  config PPC_DCR_MMIO
         bool
-       default n
  
  config PPC_DCR
         bool
@@ -344,7 +341,6 @@ config PPC_OF_PLATFORM_PCI
         bool
         depends on PCI
         depends on PPC64 # not supported on 32 bits yet
-       default n
  
  config ARCH_SUPPORTS_DEBUG_PAGEALLOC
         depends on PPC32 || PPC_BOOK3S_64
@@ -447,14 +443,12 @@ config PPC_TRANSACTIONAL_MEM
         depends on SMP
         select ALTIVEC
         select VSX
-       default n
         ---help---
           Support user-mode Transactional Memory on POWERPC.
  
  config LD_HEAD_STUB_CATCH
         bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
         depends on PPC64
-       default n
         help
           Very large kernels can cause linker branch stubs to be generated by
           code in head_64.S, which moves the head text sections out of their
@@ -557,7 +551,6 @@ config RELOCATABLE
  config RELOCATABLE_TEST
         bool "Test relocatable kernel"
         depends on (PPC64 && RELOCATABLE)
-       default n
         help
           This runs the relocatable kernel at the address it was initially
           loaded at, which tends to be non-zero and therefore test the
@@ -769,7 +762,6 @@ config PPC_SUBPAGE_PROT
  
  config PPC_COPRO_BASE
         bool
-       default n
  
  config SCHED_SMT
         bool "SMT (Hyperthreading) scheduler support"
@@ -892,7 +884,6 @@ config PPC_INDIRECT_PCI
         bool
         depends on PCI
         default y if 40x || 44x
-       default n
  
  config EISA
         bool
@@ -989,7 +980,6 @@ source "drivers/pcmcia/Kconfig"
  
  config HAS_RAPIDIO
         bool
-       default n
  
  config RAPIDIO
         tristate "RapidIO support"
@@ -1012,7 +1002,6 @@ endmenu
  
  config NONSTATIC_KERNEL
         bool
-       default n
  
  menu "Advanced setup"
         depends on PPC32
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug

index fd63cd9..f4961fb 100644 (file)
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -2,7 +2,6 @@
  
  config PPC_DISABLE_WERROR
         bool "Don't build arch/powerpc code with -Werror"
-       default n
         help
           This option tells the compiler NOT to build the code under
           arch/powerpc with the -Werror flag (which means warnings
@@ -56,7 +55,6 @@ config PPC_EMULATED_STATS
  config CODE_PATCHING_SELFTEST
         bool "Run self-tests of the code-patching code"
         depends on DEBUG_KERNEL
-       default n
  
  config JUMP_LABEL_FEATURE_CHECKS
         bool "Enable use of jump label for cpu/mmu_has_feature()"
@@ -70,7 +68,6 @@ config JUMP_LABEL_FEATURE_CHECKS
  config JUMP_LABEL_FEATURE_CHECK_DEBUG
         bool "Do extra check on feature fixup calls"
         depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS
-       default n
         help
           This tries to catch incorrect usage of cpu_has_feature() and
           mmu_has_feature() in the code.
@@ -80,16 +77,13 @@ config JUMP_LABEL_FEATURE_CHECK_DEBUG
  config FTR_FIXUP_SELFTEST
         bool "Run self-tests of the feature-fixup code"
         depends on DEBUG_KERNEL
-       default n
  
  config MSI_BITMAP_SELFTEST
         bool "Run self-tests of the MSI bitmap code"
         depends on DEBUG_KERNEL
-       default n
  
  config PPC_IRQ_SOFT_MASK_DEBUG
         bool "Include extra checks for powerpc irq soft masking"
-       default n
  
  config XMON
         bool "Include xmon kernel debugger"
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile

index 11a1acb..17be664 100644 (file)
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -112,6 +112,13 @@ KBUILD_LDFLAGS     += -m elf$(BITS)$(LDEMULATION)
  KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET)
  endif
  
+cflags-$(CONFIG_STACKPROTECTOR)        += -mstack-protector-guard=tls
+ifdef CONFIG_PPC64
+cflags-$(CONFIG_STACKPROTECTOR)        += -mstack-protector-guard-reg=r13
+else
+cflags-$(CONFIG_STACKPROTECTOR)        += -mstack-protector-guard-reg=r2
+endif
+
  LDFLAGS_vmlinux-y := -Bstatic
  LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
  LDFLAGS_vmlinux        := $(LDFLAGS_vmlinux-y)
@@ -160,8 +167,17 @@ else
  CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
  endif
  
+ifdef CONFIG_FUNCTION_TRACER
+CC_FLAGS_FTRACE := -pg
  ifdef CONFIG_MPROFILE_KERNEL
-       CC_FLAGS_FTRACE := -pg -mprofile-kernel
+CC_FLAGS_FTRACE += -mprofile-kernel
+endif
+# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828
+ifneq ($(cc-name),clang)
+CC_FLAGS_FTRACE        += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog)
+endif
  endif
  
  CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU))
@@ -229,16 +245,15 @@ ifdef CONFIG_6xx
  KBUILD_CFLAGS          += -mcpu=powerpc
  endif
  
-# Work around a gcc code-gen bug with -fno-omit-frame-pointer.
-ifdef CONFIG_FUNCTION_TRACER
-KBUILD_CFLAGS          += -mno-sched-epilog
-endif
-
  cpu-as-$(CONFIG_4xx)           += -Wa,-m405
  cpu-as-$(CONFIG_ALTIVEC)       += $(call as-option,-Wa$(comma)-maltivec)
  cpu-as-$(CONFIG_E200)          += -Wa,-me200
  cpu-as-$(CONFIG_E500)          += -Wa,-me500
-cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4
+
+# When using '-many -mpower4' gas will first try and find a matching power4
+# mnemonic and failing that it will allow any valid mnemonic that GAS knows
+# about. GCC will pass -many to GAS when assembling, clang does not.
+cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 -Wa,-many
  cpu-as-$(CONFIG_PPC_E500MC)    += $(call as-option,-Wa$(comma)-me500mc)
  
  KBUILD_AFLAGS += $(cpu-as-y)
@@ -258,18 +273,8 @@ head-$(CONFIG_PPC_FPU)             += arch/powerpc/kernel/fpu.o
  head-$(CONFIG_ALTIVEC)         += arch/powerpc/kernel/vector.o
  head-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE)  += arch/powerpc/kernel/prom_init.o
  
-core-y                         += arch/powerpc/kernel/ \
-                                  arch/powerpc/mm/ \
-                                  arch/powerpc/lib/ \
-                                  arch/powerpc/sysdev/ \
-                                  arch/powerpc/platforms/ \
-                                  arch/powerpc/math-emu/ \
-                                  arch/powerpc/crypto/ \
-                                  arch/powerpc/net/
-core-$(CONFIG_XMON)            += arch/powerpc/xmon/
-core-$(CONFIG_KVM)             += arch/powerpc/kvm/
-core-$(CONFIG_PERF_EVENTS)     += arch/powerpc/perf/
-core-$(CONFIG_KEXEC_FILE)      += arch/powerpc/purgatory/
+# See arch/powerpc/Kbuild for content of core part of the kernel
+core-y += arch/powerpc/
  
  drivers-$(CONFIG_OPROFILE)     += arch/powerpc/oprofile/
  
@@ -293,9 +298,6 @@ $(BOOT_TARGETS2): vmlinux
  bootwrapper_install:
         $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
  
-%.dtb: scripts
-       $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
-
  # Used to create 'merged defconfigs'
  # To use it $(call) it with the first argument as the base defconfig
  # and the second argument as a space separated list of .config files to merge,
@@ -400,40 +402,20 @@ archclean:
  
  archprepare: checkbin
  
-# Use the file '.tmp_gas_check' for binutils tests, as gas won't output
-# to stdout and these checks are run even on install targets.
-TOUT   := .tmp_gas_check
+ifdef CONFIG_STACKPROTECTOR
+prepare: stack_protector_prepare
+
+stack_protector_prepare: prepare0
+ifdef CONFIG_PPC64
+       $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h))
+else
+       $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' include/generated/asm-offsets.h))
+endif
+endif
  
-# Check gcc and binutils versions:
-# - gcc-3.4 and binutils-2.14 are a fatal combination
-# - Require gcc 4.0 or above on 64-bit
-# - gcc-4.2.0 has issues compiling modules on 64-bit
+# Check toolchain versions:
+# - gcc-4.6 is the minimum kernel-wide version so nothing required.
  checkbin:
-       @if test "$(cc-name)" != "clang" \
-           && test "$(cc-version)" = "0304" ; then \
-               if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
-                       echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
-                       echo 'correctly with gcc-3.4 and your version of binutils.'; \
-                       echo '*** Please upgrade your binutils or downgrade your gcc'; \
-                       false; \
-               fi ; \
-       fi
-       @if test "$(cc-name)" != "clang" \
-           && test "$(cc-version)" -lt "0400" \
-           && test "x${CONFIG_PPC64}" = "xy" ; then \
-                echo -n "Sorry, GCC v4.0 or above is required to build " ; \
-                echo "the 64-bit powerpc kernel." ; \
-                false ; \
-        fi
-       @if test "$(cc-name)" != "clang" \
-           && test "$(cc-fullversion)" = "040200" \
-           && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
-               echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
-               echo 'kernel with modules enabled.' ; \
-               echo -n '*** Please use a different GCC version or ' ; \
-               echo 'disable kernel modules' ; \
-               false ; \
-       fi
         @if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \
             && $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \
                 echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \
@@ -441,7 +423,3 @@ checkbin:
                 echo -n '*** Please use a different binutils version.' ; \
                 false ; \
         fi
-
-
-CLEAN_FILES += $(TOUT)
-
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore

index f92d053..32034a0 100644 (file)
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -44,4 +44,5 @@ fdt_sw.c
  fdt_wip.c
  libfdt.h
  libfdt_internal.h
+autoconf.h
  
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile

index 0fb96c2..3935436 100644 (file)
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -32,8 +32,8 @@ else
  endif
  
  BOOTCFLAGS    := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-                -fno-strict-aliasing -Os -msoft-float -pipe \
-                -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
+                -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \
+                -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
                  -D$(compress-y)
  
  ifdef CONFIG_PPC64_BOOT_WRAPPER
@@ -197,9 +197,14 @@ $(obj)/empty.c:
  $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
         $(Q)cp $< $@
  
+$(obj)/serial.c: $(obj)/autoconf.h
+
+$(obj)/autoconf.h: $(obj)/%: $(objtree)/include/generated/%
+       $(Q)cp $< $@
+
  clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \
                 $(zlib-decomp-) $(libfdt) $(libfdtheader) \
-               empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
+               autoconf.h empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
  
  quiet_cmd_bootcc = BOOTCC  $@
        cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
@@ -304,9 +309,9 @@ image-$(CONFIG_PPC_ADDER875)                += cuImage.adder875-uboot \
                                            dtbImage.adder875-redboot
  
  # Board ports in arch/powerpc/platform/52xx/Kconfig
-image-$(CONFIG_PPC_LITE5200)           += cuImage.lite5200 lite5200.dtb
-image-$(CONFIG_PPC_LITE5200)           += cuImage.lite5200b lite5200b.dtb
-image-$(CONFIG_PPC_MEDIA5200)          += cuImage.media5200 media5200.dtb
+image-$(CONFIG_PPC_LITE5200)           += cuImage.lite5200
+image-$(CONFIG_PPC_LITE5200)           += cuImage.lite5200b
+image-$(CONFIG_PPC_MEDIA5200)          += cuImage.media5200
  
  # Board ports in arch/powerpc/platform/82xx/Kconfig
  image-$(CONFIG_MPC8272_ADS)            += cuImage.mpc8272ads
@@ -381,11 +386,11 @@ $(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperb
         $(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))
  
  # dtbImage% - a dtbImage is a zImage with an embedded device tree blob
-$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
-       $(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+       $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
  
-$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
-       $(call if_changed,wrap,$*,,$(obj)/$*.dtb)
+$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+       $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb)
  
  # This cannot be in the root of $(src) as the zImage rule always adds a $(obj)
  # prefix
@@ -395,36 +400,33 @@ $(obj)/vmlinux.strip: vmlinux
  $(obj)/uImage: vmlinux $(wrapperbits) FORCE
         $(call if_changed,wrap,uboot)
  
-$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-       $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
-
-$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-       $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb)
+$(obj)/uImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+       $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
  
-$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-       $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/uImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+       $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb)
  
-$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-       $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb)
+$(obj)/cuImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+       $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
  
-$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-       $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/cuImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+       $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb)
  
-$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-       $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb)
+$(obj)/simpleImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+       $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
  
-$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-       $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/simpleImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+       $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb)
  
-$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
-       $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb)
+$(obj)/treeImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+       $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
  
-# Rule to build device tree blobs
-$(obj)/%.dtb: $(src)/dts/%.dts FORCE
-       $(call if_changed_dep,dtc)
+$(obj)/treeImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+       $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb)
  
-$(obj)/%.dtb: $(src)/dts/fsl/%.dts FORCE
-       $(call if_changed_dep,dtc)
+# Needed for the above targets to work with dts/fsl/ files
+$(obj)/dts/%.dtb: $(obj)/dts/fsl/%.dtb
+       @cp $< $@
  
  # If there isn't a platform selected then just strip the vmlinux.
  ifeq (,$(image-y))
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S

index dcf2f15..32dfe6d 100644 (file)
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -47,8 +47,10 @@ p_end:               .long   _end
  p_pstack:      .long   _platform_stack_top
  #endif
  
-       .weak   _zimage_start
         .globl  _zimage_start
+       /* Clang appears to require the .weak directive to be after the symbol
+        * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921  */
+       .weak   _zimage_start
  _zimage_start:
         .globl  _zimage_start_lib
  _zimage_start_lib:
diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile

new file mode 100644 (file)

index 0000000..fb335d0
--- /dev/null
+++ b/arch/powerpc/boot/dts/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+subdir-y += fsl
+
+dtstree                := $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/dts/fsl/Makefile b/arch/powerpc/boot/dts/fsl/Makefile

new file mode 100644 (file)

index 0000000..3bae982
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+dtstree                := $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h

index 2a0c8b1..2abc8e8 100644 (file)
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -5,6 +5,8 @@
  #include <types.h>
  #include <string.h>
  
+#define INT_MAX                        ((int)(~0U>>1))
+
  #include "of.h"
  
  typedef unsigned long uintptr_t;
diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c

index 0272570..dfb199e 100644 (file)
--- a/arch/powerpc/boot/opal.c
+++ b/arch/powerpc/boot/opal.c
@@ -13,8 +13,6 @@
  #include <libfdt.h>
  #include "../include/asm/opal-api.h"
  
-#ifdef CONFIG_PPC64_BOOT_WRAPPER
-
  /* Global OPAL struct used by opal-call.S */
  struct opal {
         u64 base;
@@ -101,9 +99,3 @@ int opal_console_init(void *devp, struct serial_console_data *scdp)
  
         return 0;
  }
-#else
-int opal_console_init(void *devp, struct serial_console_data *scdp)
-{
-       return -1;
-}
-#endif /* __powerpc64__ */
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c

index 48e3743..f045f84 100644 (file)
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -18,6 +18,7 @@
  #include "stdio.h"
  #include "io.h"
  #include "ops.h"
+#include "autoconf.h"
  
  static int serial_open(void)
  {
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig

index 67c39f4..f686cc1 100644 (file)
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -262,3 +262,4 @@ CONFIG_CRYPTO_SERPENT=m
  CONFIG_CRYPTO_TEA=m
  CONFIG_CRYPTO_TWOFISH=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig

index 59e47ec..f71edda 100644 (file)
--- a/arch/powerpc/configs/maple_defconfig
+++ b/arch/powerpc/configs/maple_defconfig
@@ -112,3 +112,4 @@ CONFIG_PPC_EARLY_DEBUG=y
  CONFIG_CRYPTO_ECB=m
  CONFIG_CRYPTO_PCBC=m
  # CONFIG_CRYPTO_HW is not set
+CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig

index 6ab34e6..ef2ef98 100644 (file)
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -44,6 +44,9 @@ CONFIG_PPC_MEMTRACE=y
  # CONFIG_PPC_PSERIES is not set
  # CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
  CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
  CONFIG_CPU_IDLE=y
  CONFIG_HZ_100=y
  CONFIG_BINFMT_MISC=m
@@ -350,3 +353,4 @@ CONFIG_VIRTUALIZATION=y
  CONFIG_KVM_BOOK3S_64=m
  CONFIG_KVM_BOOK3S_64_HV=m
  CONFIG_VHOST_NET=m
+CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig

index 5033e63..f251567 100644 (file)
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -40,6 +40,9 @@ CONFIG_PS3_LPM=m
  CONFIG_PPC_IBM_CELL_BLADE=y
  CONFIG_RTAS_FLASH=m
  CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
  CONFIG_CPU_FREQ_PMAC64=y
  CONFIG_HZ_100=y
  CONFIG_BINFMT_MISC=m
@@ -365,3 +368,4 @@ CONFIG_VIRTUALIZATION=y
  CONFIG_KVM_BOOK3S_64=m
  CONFIG_KVM_BOOK3S_64_HV=m
  CONFIG_VHOST_NET=m
+CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig

index 187e2f7..cf8d55f 100644 (file)
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -171,3 +171,4 @@ CONFIG_CRYPTO_PCBC=m
  CONFIG_CRYPTO_MICHAEL_MIC=m
  CONFIG_CRYPTO_SALSA20=m
  CONFIG_CRYPTO_LZO=m
+CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig

index 0dd5cf7..5e09a40 100644 (file)
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -325,3 +325,4 @@ CONFIG_VIRTUALIZATION=y
  CONFIG_KVM_BOOK3S_64=m
  CONFIG_KVM_BOOK3S_64_HV=m
  CONFIG_VHOST_NET=m
+CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig

index 6bd5e72..cfdd088 100644 (file)
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -3,20 +3,17 @@ CONFIG_ALTIVEC=y
  CONFIG_VSX=y
  CONFIG_NR_CPUS=2048
  CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_KERNEL_XZ=y
  # CONFIG_SWAP is not set
  CONFIG_SYSVIPC=y
  CONFIG_POSIX_MQUEUE=y
  # CONFIG_CROSS_MEMORY_ATTACH is not set
  CONFIG_NO_HZ=y
  CONFIG_HIGH_RES_TIMERS=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
+# CONFIG_CPU_ISOLATION is not set
  CONFIG_IKCONFIG=y
  CONFIG_IKCONFIG_PROC=y
  CONFIG_LOG_BUF_SHIFT=20
-CONFIG_RELAY=y
  CONFIG_BLK_DEV_INITRD=y
  # CONFIG_RD_GZIP is not set
  # CONFIG_RD_BZIP2 is not set
@@ -24,8 +21,14 @@ CONFIG_BLK_DEV_INITRD=y
  # CONFIG_RD_LZO is not set
  # CONFIG_RD_LZ4 is not set
  CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_SGETMASK_SYSCALL is not set
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_SHMEM is not set
+# CONFIG_AIO is not set
  CONFIG_PERF_EVENTS=y
  # CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB_FREELIST_HARDENED=y
  CONFIG_JUMP_LABEL=y
  CONFIG_STRICT_KERNEL_RWX=y
  CONFIG_MODULES=y
@@ -35,7 +38,9 @@ CONFIG_MODULE_SIG_FORCE=y
  CONFIG_MODULE_SIG_SHA512=y
  CONFIG_PARTITION_ADVANCED=y
  # CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_PPC_VAS is not set
  # CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
  CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
  CONFIG_CPU_IDLE=y
  CONFIG_HZ_100=y
@@ -48,8 +53,9 @@ CONFIG_NUMA=y
  CONFIG_PPC_64K_PAGES=y
  CONFIG_SCHED_SMT=y
  CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=tty0 console=hvc0 powersave=off"
+CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet"
  # CONFIG_SECCOMP is not set
+# CONFIG_PPC_MEM_KEYS is not set
  CONFIG_NET=y
  CONFIG_PACKET=y
  CONFIG_UNIX=y
@@ -60,7 +66,6 @@ CONFIG_SYN_COOKIES=y
  # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
  # CONFIG_INET_XFRM_MODE_TUNNEL is not set
  # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_IPV6 is not set
  CONFIG_DNS_RESOLVER=y
  # CONFIG_WIRELESS is not set
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
@@ -73,8 +78,10 @@ CONFIG_BLK_DEV_RAM=y
  CONFIG_BLK_DEV_RAM_SIZE=65536
  CONFIG_VIRTIO_BLK=m
  CONFIG_BLK_DEV_NVME=m
-CONFIG_EEPROM_AT24=y
+CONFIG_NVME_MULTIPATH=y
+CONFIG_EEPROM_AT24=m
  # CONFIG_CXL is not set
+# CONFIG_OCXL is not set
  CONFIG_BLK_DEV_SD=m
  CONFIG_BLK_DEV_SR=m
  CONFIG_BLK_DEV_SR_VENDOR=y
@@ -85,7 +92,6 @@ CONFIG_SCSI_FC_ATTRS=y
  CONFIG_SCSI_CXGB3_ISCSI=m
  CONFIG_SCSI_CXGB4_ISCSI=m
  CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_BE2ISCSI=m
  CONFIG_SCSI_AACRAID=m
  CONFIG_MEGARAID_NEWGEN=y
  CONFIG_MEGARAID_MM=m
@@ -102,7 +108,7 @@ CONFIG_SCSI_VIRTIO=m
  CONFIG_SCSI_DH=y
  CONFIG_SCSI_DH_ALUA=m
  CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI=m
  # CONFIG_ATA_SFF is not set
  CONFIG_MD=y
  CONFIG_BLK_DEV_MD=m
@@ -119,25 +125,72 @@ CONFIG_DM_SNAPSHOT=m
  CONFIG_DM_MIRROR=m
  CONFIG_DM_ZERO=m
  CONFIG_DM_MULTIPATH=m
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_AGERE is not set
+# CONFIG_NET_VENDOR_ALACRITECH is not set
  CONFIG_ACENIC=m
  CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_TIGON3=y
+# CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+CONFIG_TIGON3=m
  CONFIG_BNX2X=m
-CONFIG_CHELSIO_T1=y
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_CAVIUM is not set
+CONFIG_CHELSIO_T1=m
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
  CONFIG_BE2NET=m
-CONFIG_S2IO=m
-CONFIG_E100=m
+# CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_HP is not set
+# CONFIG_NET_VENDOR_HUAWEI is not set
  CONFIG_E1000=m
-CONFIG_E1000E=m
+CONFIG_IGB=m
  CONFIG_IXGB=m
  CONFIG_IXGBE=m
+CONFIG_I40E=m
+CONFIG_S2IO=m
+# CONFIG_NET_VENDOR_MARVELL is not set
  CONFIG_MLX4_EN=m
+# CONFIG_MLX4_CORE_GEN2 is not set
  CONFIG_MLX5_CORE=m
-CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_MICREL is not set
  CONFIG_MYRI10GE=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_PACKET_ENGINE is not set
  CONFIG_QLGE=m
  CONFIG_NETXEN_NIC=m
+# CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
+# CONFIG_NET_VENDOR_ROCKER is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
  CONFIG_SFC=m
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_SOCIONEXT is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_PHYLIB=y
  # CONFIG_USB_NET_DRIVERS is not set
  # CONFIG_WLAN is not set
  CONFIG_INPUT_EVDEV=y
@@ -149,39 +202,51 @@ CONFIG_SERIAL_8250_CONSOLE=y
  CONFIG_IPMI_HANDLER=y
  CONFIG_IPMI_DEVICE_INTERFACE=y
  CONFIG_IPMI_POWERNV=y
+CONFIG_IPMI_WATCHDOG=y
  CONFIG_HW_RANDOM=y
+CONFIG_TCG_TPM=y
  CONFIG_TCG_TIS_I2C_NUVOTON=y
+CONFIG_I2C=y
  # CONFIG_I2C_COMPAT is not set
  CONFIG_I2C_CHARDEV=y
  # CONFIG_I2C_HELPER_AUTO is not set
-CONFIG_DRM=y
-CONFIG_DRM_RADEON=y
+CONFIG_I2C_ALGOBIT=y
+CONFIG_I2C_OPAL=m
+CONFIG_PPS=y
+CONFIG_SENSORS_IBMPOWERNV=m
+CONFIG_DRM=m
  CONFIG_DRM_AST=m
+CONFIG_FB=y
  CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_OF=y
-CONFIG_FB_MATROX=y
-CONFIG_FB_MATROX_MILLENIUM=y
-CONFIG_FB_MATROX_MYSTIQUE=y
-CONFIG_FB_MATROX_G=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_GENERIC is not set
  # CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
  CONFIG_LOGO=y
  # CONFIG_LOGO_LINUX_MONO is not set
  # CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_HID_GENERIC=m
+CONFIG_HID_A4TECH=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_ITE=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
  CONFIG_USB_HIDDEV=y
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_XHCI_HCD=y
-CONFIG_USB_EHCI_HCD=y
+CONFIG_USB=m
+CONFIG_USB_XHCI_HCD=m
+CONFIG_USB_EHCI_HCD=m
  # CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_STORAGE=y
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_STORAGE=m
  CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_OPAL=m
  CONFIG_RTC_DRV_GENERIC=m
  CONFIG_VIRT_DRIVERS=y
-CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_PCI=m
  # CONFIG_IOMMU_SUPPORT is not set
  CONFIG_EXT4_FS=m
  CONFIG_EXT4_FS_POSIX_ACL=y
@@ -195,10 +260,9 @@ CONFIG_UDF_FS=m
  CONFIG_MSDOS_FS=m
  CONFIG_VFAT_FS=m
  CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
  # CONFIG_MISC_FILESYSTEMS is not set
  # CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS=y
  CONFIG_NLS_DEFAULT="utf8"
  CONFIG_NLS_CODEPAGE_437=y
  CONFIG_NLS_ASCII=y
@@ -207,26 +271,24 @@ CONFIG_NLS_UTF8=y
  CONFIG_CRC16=y
  CONFIG_CRC_ITU_T=y
  CONFIG_LIBCRC32C=y
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
  CONFIG_PRINTK_TIME=y
  CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
  CONFIG_DEBUG_STACKOVERFLOW=y
  CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
  CONFIG_HARDLOCKUP_DETECTOR=y
  CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
  CONFIG_WQ_WATCHDOG=y
-CONFIG_SCHEDSTATS=y
+# CONFIG_SCHED_DEBUG is not set
  # CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
  CONFIG_XMON=y
  CONFIG_XMON_DEFAULT=y
-CONFIG_SECURITY=y
-CONFIG_IMA=y
-CONFIG_EVM=y
+CONFIG_ENCRYPTED_KEYS=y
  # CONFIG_CRYPTO_ECHAINIV is not set
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_CMAC=y
-CONFIG_CRYPTO_MD4=y
-CONFIG_CRYPTO_ARC4=y
-CONFIG_CRYPTO_DES=y
  # CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/include/asm/accounting.h b/arch/powerpc/include/asm/accounting.h

index 3abcf98..c607c5d 100644 (file)
--- a/arch/powerpc/include/asm/accounting.h
+++ b/arch/powerpc/include/asm/accounting.h
@@ -15,8 +15,10 @@ struct cpu_accounting_data {
         /* Accumulated cputime values to flush on ticks*/
         unsigned long utime;
         unsigned long stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
         unsigned long utime_scaled;
         unsigned long stime_scaled;
+#endif
         unsigned long gtime;
         unsigned long hardirq_time;
         unsigned long softirq_time;
@@ -25,8 +27,10 @@ struct cpu_accounting_data {
         /* Internal counters */
         unsigned long starttime;        /* TB value snapshot */
         unsigned long starttime_user;   /* TB value on exit to usermode */
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
         unsigned long startspurr;       /* SPURR value snapshot */
         unsigned long utime_sspurr;     /* ->user_time when ->startspurr set */
+#endif
  };
  
  #endif
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h

index c55ba3b..ec691d4 100644 (file)
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -63,7 +63,6 @@ void program_check_exception(struct pt_regs *regs);
  void alignment_exception(struct pt_regs *regs);
  void slb_miss_bad_addr(struct pt_regs *regs);
  void StackOverflow(struct pt_regs *regs);
-void nonrecoverable_exception(struct pt_regs *regs);
  void kernel_fp_unavailable_exception(struct pt_regs *regs);
  void altivec_unavailable_exception(struct pt_regs *regs);
  void vsx_unavailable_exception(struct pt_regs *regs);
@@ -78,6 +77,8 @@ void kernel_bad_stack(struct pt_regs *regs);
  void system_reset_exception(struct pt_regs *regs);
  void machine_check_exception(struct pt_regs *regs);
  void emulation_assist_interrupt(struct pt_regs *regs);
+long do_slb_fault(struct pt_regs *regs, unsigned long ea);
+void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err);
  
  /* signals, syscalls and interrupts */
  long sys_swapcontext(struct ucontext __user *old_ctx,
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h

index 796d026..c21d337 100644 (file)
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -8,7 +8,97 @@
  #include <asm/book3s/32/hash.h>
  
  /* And here we include common definitions */
-#include <asm/pte-common.h>
+
+#define _PAGE_KERNEL_RO                0
+#define _PAGE_KERNEL_ROX       0
+#define _PAGE_KERNEL_RW                (_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX       (_PAGE_DIRTY | _PAGE_RW)
+
+#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
+
+#ifndef __ASSEMBLY__
+
+static inline bool pte_user(pte_t pte)
+{
+       return pte_val(pte) & _PAGE_USER;
+}
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here.
+ */
+#define PTE_RPN_SHIFT  (PAGE_SHIFT)
+
+/*
+ * The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs.
+ */
+#ifdef CONFIG_PTE_64BIT
+#define PTE_RPN_MASK   (~((1ULL << PTE_RPN_SHIFT) - 1))
+#else
+#define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
+#endif
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HASHPTE | _PAGE_DIRTY | \
+                        _PAGE_ACCESSED | _PAGE_SPECIAL)
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE     (_PAGE_BASE_NC | _PAGE_COHERENT)
+
+/*
+ * Permission masks used to generate the __P and __S table.
+ *
+ * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
+ *
+ * Write permissions imply read permissions for now.
+ */
+#define PAGE_NONE      __pgprot(_PAGE_BASE)
+#define PAGE_SHARED    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_COPY      __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X    __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X        __pgprot(_PAGE_BASE | _PAGE_USER)
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL    __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG        __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+                                _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X  __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX        __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/*
+ * Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
+       defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT       PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT       PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC       PAGE_KERNEL_X
+
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP               (PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
  
  #define PTE_INDEX_SIZE PTE_SHIFT
  #define PMD_INDEX_SIZE 0
@@ -219,7 +309,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
                                       pte_t *ptep)
  {
-       pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
+       pte_update(ptep, _PAGE_RW, 0);
  }
  
  static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
@@ -228,10 +318,9 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
                                            int psize)
  {
         unsigned long set = pte_val(entry) &
-               (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
-       unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+               (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW);
  
-       pte_update(ptep, clr, set);
+       pte_update(ptep, 0, set);
  
         flush_tlb_page(vma, address);
  }
@@ -286,7 +375,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
  #define __pte_to_swp_entry(pte)                ((swp_entry_t) { pte_val(pte) >> 3 })
  #define __swp_entry_to_pte(x)          ((pte_t) { (x).val << 3 })
  
-int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
  
  /* Generic accessors to PTE bits */
  static inline int pte_write(pte_t pte)         { return !!(pte_val(pte) & _PAGE_RW);}
@@ -295,13 +384,28 @@ static inline int pte_dirty(pte_t pte)            { return !!(pte_val(pte) & _PAGE_DIRTY);
  static inline int pte_young(pte_t pte)         { return !!(pte_val(pte) & _PAGE_ACCESSED); }
  static inline int pte_special(pte_t pte)       { return !!(pte_val(pte) & _PAGE_SPECIAL); }
  static inline int pte_none(pte_t pte)          { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte)   { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+static inline bool pte_exec(pte_t pte)         { return true; }
  
  static inline int pte_present(pte_t pte)
  {
         return pte_val(pte) & _PAGE_PRESENT;
  }
  
+static inline bool pte_hw_valid(pte_t pte)
+{
+       return pte_val(pte) & _PAGE_PRESENT;
+}
+
+static inline bool pte_hashpte(pte_t pte)
+{
+       return !!(pte_val(pte) & _PAGE_HASHPTE);
+}
+
+static inline bool pte_ci(pte_t pte)
+{
+       return !!(pte_val(pte) & _PAGE_NO_CACHE);
+}
+
  /*
   * We only find page table entry in the last level
   * Hence no need for other accessors
@@ -309,17 +413,14 @@ static inline int pte_present(pte_t pte)
  #define pte_access_permitted pte_access_permitted
  static inline bool pte_access_permitted(pte_t pte, bool write)
  {
-       unsigned long pteval = pte_val(pte);
         /*
          * A read-only access is controlled by _PAGE_USER bit.
          * We have _PAGE_READ set for WRITE and EXECUTE
          */
-       unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
-
-       if (write)
-               need_pte_bits |= _PAGE_WRITE;
+       if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
+               return false;
  
-       if ((pteval & need_pte_bits) != need_pte_bits)
+       if (write && !pte_write(pte))
                 return false;
  
         return true;
@@ -348,6 +449,11 @@ static inline pte_t pte_wrprotect(pte_t pte)
         return __pte(pte_val(pte) & ~_PAGE_RW);
  }
  
+static inline pte_t pte_exprotect(pte_t pte)
+{
+       return pte;
+}
+
  static inline pte_t pte_mkclean(pte_t pte)
  {
         return __pte(pte_val(pte) & ~_PAGE_DIRTY);
@@ -358,6 +464,16 @@ static inline pte_t pte_mkold(pte_t pte)
         return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
  }
  
+static inline pte_t pte_mkexec(pte_t pte)
+{
+       return pte;
+}
+
+static inline pte_t pte_mkpte(pte_t pte)
+{
+       return pte;
+}
+
  static inline pte_t pte_mkwrite(pte_t pte)
  {
         return __pte(pte_val(pte) | _PAGE_RW);
@@ -383,6 +499,16 @@ static inline pte_t pte_mkhuge(pte_t pte)
         return pte;
  }
  
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+       return __pte(pte_val(pte) & ~_PAGE_USER);
+}
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_USER);
+}
+
  static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  {
         return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h

index 9a37986..15bc16b 100644 (file)
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -66,7 +66,7 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
          * if it is not a pte and have hugepd shift mask
          * set, then it is a hugepd directory pointer
          */
-       if (!(hpdval & _PAGE_PTE) &&
+       if (!(hpdval & _PAGE_PTE) && (hpdval & _PAGE_PRESENT) &&
             ((hpdval & HUGEPD_SHIFT_MASK) != 0))
                 return true;
         return false;
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h

index d52a51b..247aff9 100644 (file)
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -18,6 +18,11 @@
  #include <asm/book3s/64/hash-4k.h>
  #endif
  
+/* Bits to set in a PMD/PUD/PGD entry valid bit*/
+#define HASH_PMD_VAL_BITS              (0x8000000000000000UL)
+#define HASH_PUD_VAL_BITS              (0x8000000000000000UL)
+#define HASH_PGD_VAL_BITS              (0x8000000000000000UL)
+
  /*
   * Size of EA range mapped by our pagetables.
   */
@@ -196,8 +201,7 @@ static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  
  
-extern int hash__map_kernel_page(unsigned long ea, unsigned long pa,
-                            unsigned long flags);
+int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
  extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
                                               unsigned long page_size,
                                               unsigned long phys);
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h

index 5088838..5b01777 100644 (file)
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -39,4 +39,7 @@ static inline bool gigantic_page_supported(void)
  }
  #endif
  
+/* hugepd entry valid bit */
+#define HUGEPD_VAL_BITS                (0x8000000000000000UL)
+
  #endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h

index 66db23e..12e5228 100644 (file)
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -30,7 +30,7 @@
   * SLB
   */
  
-#define SLB_NUM_BOLTED         3
+#define SLB_NUM_BOLTED         2
  #define SLB_CACHE_ENTRIES      8
  #define SLB_MIN_SIZE           32
  
@@ -499,6 +499,8 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
  extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
  extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
  
+extern void hash__setup_new_exec(void);
+
  #ifdef CONFIG_PPC_PSERIES
  void hpte_init_pseries(void);
  #else
@@ -507,11 +509,18 @@ static inline void hpte_init_pseries(void) { }
  
  extern void hpte_init_native(void);
  
+struct slb_entry {
+       u64     esid;
+       u64     vsid;
+};
+
  extern void slb_initialize(void);
-extern void slb_flush_and_rebolt(void);
+void slb_flush_and_restore_bolted(void);
  void slb_flush_all_realmode(void);
  void __slb_restore_bolted_realmode(void);
  void slb_restore_bolted_realmode(void);
+void slb_save_contents(struct slb_entry *slb_ptr);
+void slb_dump_contents(struct slb_entry *slb_ptr);
  
  extern void slb_vmalloc_update(void);
  extern void slb_set_size(u16 size);
@@ -524,13 +533,9 @@ extern void slb_set_size(u16 size);
   * from mmu context id and effective segment id of the address.
   *
   * For user processes max context id is limited to MAX_USER_CONTEXT.
-
- * For kernel space, we use context ids 1-4 to map addresses as below:
- * NOTE: each context only support 64TB now.
- * 0x00001 -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x00002 -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x00003 -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x00004 -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
+ * more details in get_user_context
+ *
+ * For kernel space get_kernel_context
   *
   * The proto-VSIDs are then scrambled into real VSIDs with the
   * multiplicative hash:
@@ -570,6 +575,21 @@ extern void slb_set_size(u16 size);
  #define ESID_BITS_MASK         ((1 << ESID_BITS) - 1)
  #define ESID_BITS_1T_MASK      ((1 << ESID_BITS_1T) - 1)
  
+/*
+ * Now certain config support MAX_PHYSMEM more than 512TB. Hence we will need
+ * to use more than one context for linear mapping the kernel.
+ * For vmalloc and memmap, we use just one context with 512TB. With 64 byte
+ * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)).
+ */
+#if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
+#define MAX_KERNEL_CTX_CNT     (1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
+#else
+#define MAX_KERNEL_CTX_CNT     1
+#endif
+
+#define MAX_VMALLOC_CTX_CNT    1
+#define MAX_MEMMAP_CTX_CNT     1
+
  /*
   * 256MB segment
   * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
@@ -580,12 +600,13 @@ extern void slb_set_size(u16 size);
   * We also need to avoid the last segment of the last context, because that
   * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
   * because of the modulo operation in vsid scramble.
+ *
+ * We add one extra context to MIN_USER_CONTEXT so that we can map kernel
+ * context easily. The +1 is to map the unused 0xe region mapping.
   */
  #define MAX_USER_CONTEXT       ((ASM_CONST(1) << CONTEXT_BITS) - 2)
-#define MIN_USER_CONTEXT       (5)
-
-/* Would be nice to use KERNEL_REGION_ID here */
-#define KERNEL_REGION_CONTEXT_OFFSET   (0xc - 1)
+#define MIN_USER_CONTEXT       (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
+                                MAX_MEMMAP_CTX_CNT + 2)
  
  /*
   * For platforms that support on 65bit VA we limit the context bits
@@ -745,6 +766,39 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
         return vsid_scramble(protovsid, VSID_MULTIPLIER_1T, vsid_bits);
  }
  
+/*
+ * For kernel space, we use context ids as below
+ * below. Range is 512TB per context.
+ *
+ * 0x00001 -  [ 0xc000000000000000 - 0xc001ffffffffffff]
+ * 0x00002 -  [ 0xc002000000000000 - 0xc003ffffffffffff]
+ * 0x00003 -  [ 0xc004000000000000 - 0xc005ffffffffffff]
+ * 0x00004 -  [ 0xc006000000000000 - 0xc007ffffffffffff]
+
+ * 0x00005 -  [ 0xd000000000000000 - 0xd001ffffffffffff ]
+ * 0x00006 -  Not used - Can map 0xe000000000000000 range.
+ * 0x00007 -  [ 0xf000000000000000 - 0xf001ffffffffffff ]
+ *
+ * So we can compute the context from the region (top nibble) by
+ * subtracting 11, or 0xc - 1.
+ */
+static inline unsigned long get_kernel_context(unsigned long ea)
+{
+       unsigned long region_id = REGION_ID(ea);
+       unsigned long ctx;
+       /*
+        * For linear mapping we do support multiple context
+        */
+       if (region_id == KERNEL_REGION_ID) {
+               /*
+                * We already verified ea to be not beyond the addr limit.
+                */
+               ctx =  1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT);
+       } else
+               ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT;
+       return ctx;
+}
+
  /*
   * This is only valid for addresses >= PAGE_OFFSET
   */
@@ -755,20 +809,7 @@ static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
         if (!is_kernel_addr(ea))
                 return 0;
  
-       /*
-        * For kernel space, we use context ids 1-4 to map the address space as
-        * below:
-        *
-        * 0x00001 -  [ 0xc000000000000000 - 0xc0003fffffffffff ]
-        * 0x00002 -  [ 0xd000000000000000 - 0xd0003fffffffffff ]
-        * 0x00003 -  [ 0xe000000000000000 - 0xe0003fffffffffff ]
-        * 0x00004 -  [ 0xf000000000000000 - 0xf0003fffffffffff ]
-        *
-        * So we can compute the context from the region (top nibble) by
-        * subtracting 11, or 0xc - 1.
-        */
-       context = (ea >> 60) - KERNEL_REGION_CONTEXT_OFFSET;
-
+       context = get_kernel_context(ea);
         return get_vsid(context, ea, ssize);
  }
  
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h

index 9c8c669..6328857 100644 (file)
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -208,7 +208,7 @@ extern void radix_init_pseries(void);
  static inline void radix_init_pseries(void) { };
  #endif
  
-static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
+static inline int get_user_context(mm_context_t *ctx, unsigned long ea)
  {
         int index = ea >> MAX_EA_BITS_PER_CONTEXT;
  
@@ -223,7 +223,7 @@ static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
  static inline unsigned long get_user_vsid(mm_context_t *ctx,
                                           unsigned long ea, int ssize)
  {
-       unsigned long context = get_ea_context(ctx, ea);
+       unsigned long context = get_user_context(ctx, ea);
  
         return get_vsid(context, ea, ssize);
  }
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h

index d7ee249..e3d4dd4 100644 (file)
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -10,6 +10,9 @@
   *
   * Defined in such a way that we can optimize away code block at build time
   * if CONFIG_HUGETLB_PAGE=n.
+ *
+ * returns true for pmd migration entries, THP, devmap, hugetlb
+ * But compile time dependent on CONFIG_HUGETLB_PAGE
   */
  static inline int pmd_huge(pmd_t pmd)
  {
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h

index fc69eda..c4a726c 100644 (file)
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -14,10 +14,6 @@
   */
  #define _PAGE_BIT_SWAP_TYPE    0
  
-#define _PAGE_NA               0
-#define _PAGE_RO               0
-#define _PAGE_USER             0
-
  #define _PAGE_EXEC             0x00001 /* execute permission */
  #define _PAGE_WRITE            0x00002 /* write access allowed */
  #define _PAGE_READ             0x00004 /* read access allowed */
@@ -122,10 +118,6 @@
  #define _PAGE_KERNEL_RO                 (_PAGE_PRIVILEGED | _PAGE_READ)
  #define _PAGE_KERNEL_RWX       (_PAGE_PRIVILEGED | _PAGE_DIRTY |       \
                                  _PAGE_RW | _PAGE_EXEC)
-/*
- * No page size encoding in the linux PTE
- */
-#define _PAGE_PSIZE            0
  /*
   * _PAGE_CHG_MASK masks of bits that are to be preserved across
   * pgprot changes
@@ -136,20 +128,13 @@
  
  #define H_PTE_PKEY  (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
                      H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
-/*
- * Mask of bits returned by pte_pgprot()
- */
-#define PAGE_PROT_BITS  (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
-                        H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
-                        _PAGE_READ | _PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
-                        _PAGE_SOFT_DIRTY | H_PTE_PKEY)
  /*
   * We define 2 sets of base prot bits, one for basic pages (ie,
   * cacheable kernel and user pages) and one for non cacheable
   * pages. We always set _PAGE_COHERENT when SMP is enabled or
   * the processor might need it for DMA coherency.
   */
-#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED)
  #define _PAGE_BASE     (_PAGE_BASE_NC)
  
  /* Permission masks used to generate the __P and __S table,
@@ -159,8 +144,6 @@
   * Write permissions imply read permissions for now (we could make write-only
   * pages on BookE but we don't bother for now). Execute permission control is
   * possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
   */
  #define PAGE_NONE      __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
  #define PAGE_SHARED    __pgprot(_PAGE_BASE | _PAGE_RW)
@@ -170,24 +153,6 @@
  #define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_READ)
  #define PAGE_READONLY_X        __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
  
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_X
-#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY_X
-#define __P111 PAGE_COPY_X
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_X
-#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED_X
-#define __S111 PAGE_SHARED_X
-
  /* Permission masks used for kernel mappings */
  #define PAGE_KERNEL    __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
  #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
@@ -520,7 +485,11 @@ static inline int pte_special(pte_t pte)
         return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
  }
  
-static inline pgprot_t pte_pgprot(pte_t pte)   { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+static inline bool pte_exec(pte_t pte)
+{
+       return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
+}
+
  
  #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
  static inline bool pte_soft_dirty(pte_t pte)
@@ -530,12 +499,12 @@ static inline bool pte_soft_dirty(pte_t pte)
  
  static inline pte_t pte_mksoft_dirty(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
  }
  
  static inline pte_t pte_clear_soft_dirty(pte_t pte)
  {
-       return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
+       return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
  }
  #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
  
@@ -556,7 +525,7 @@ static inline pte_t pte_mk_savedwrite(pte_t pte)
          */
         VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
                   cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
-       return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED);
+       return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
  }
  
  #define pte_clear_savedwrite pte_clear_savedwrite
@@ -566,14 +535,14 @@ static inline pte_t pte_clear_savedwrite(pte_t pte)
          * Used by KSM subsystem to make a protnone pte readonly.
          */
         VM_BUG_ON(!pte_protnone(pte));
-       return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
  }
  #else
  #define pte_clear_savedwrite pte_clear_savedwrite
  static inline pte_t pte_clear_savedwrite(pte_t pte)
  {
         VM_WARN_ON(1);
-       return __pte(pte_val(pte) & ~_PAGE_WRITE);
+       return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
  }
  #endif /* CONFIG_NUMA_BALANCING */
  
@@ -588,6 +557,11 @@ static inline int pte_present(pte_t pte)
         return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID));
  }
  
+static inline bool pte_hw_valid(pte_t pte)
+{
+       return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT));
+}
+
  #ifdef CONFIG_PPC_MEM_KEYS
  extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
  #else
@@ -597,25 +571,22 @@ static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
  }
  #endif /* CONFIG_PPC_MEM_KEYS */
  
+static inline bool pte_user(pte_t pte)
+{
+       return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
  #define pte_access_permitted pte_access_permitted
  static inline bool pte_access_permitted(pte_t pte, bool write)
  {
-       unsigned long pteval = pte_val(pte);
-       /* Also check for pte_user */
-       unsigned long clear_pte_bits = _PAGE_PRIVILEGED;
         /*
          * _PAGE_READ is needed for any access and will be
          * cleared for PROT_NONE
          */
-       unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ;
-
-       if (write)
-               need_pte_bits |= _PAGE_WRITE;
-
-       if ((pteval & need_pte_bits) != need_pte_bits)
+       if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
                 return false;
  
-       if ((pteval & clear_pte_bits) == clear_pte_bits)
+       if (write && !pte_write(pte))
                 return false;
  
         return arch_pte_access_permitted(pte_val(pte), write, 0);
@@ -644,17 +615,32 @@ static inline pte_t pte_wrprotect(pte_t pte)
  {
         if (unlikely(pte_savedwrite(pte)))
                 return pte_clear_savedwrite(pte);
-       return __pte(pte_val(pte) & ~_PAGE_WRITE);
+       return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
+}
+
+static inline pte_t pte_exprotect(pte_t pte)
+{
+       return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
  }
  
  static inline pte_t pte_mkclean(pte_t pte)
  {
-       return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+       return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
  }
  
  static inline pte_t pte_mkold(pte_t pte)
  {
-       return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+       return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
+}
+
+static inline pte_t pte_mkpte(pte_t pte)
+{
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
  }
  
  static inline pte_t pte_mkwrite(pte_t pte)
@@ -662,22 +648,22 @@ static inline pte_t pte_mkwrite(pte_t pte)
         /*
          * write implies read, hence set both
          */
-       return __pte(pte_val(pte) | _PAGE_RW);
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
  }
  
  static inline pte_t pte_mkdirty(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
  }
  
  static inline pte_t pte_mkyoung(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_ACCESSED);
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
  }
  
  static inline pte_t pte_mkspecial(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_SPECIAL);
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
  }
  
  static inline pte_t pte_mkhuge(pte_t pte)
@@ -687,7 +673,17 @@ static inline pte_t pte_mkhuge(pte_t pte)
  
  static inline pte_t pte_mkdevmap(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP));
+}
+
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+       return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
  }
  
  /*
@@ -706,12 +702,8 @@ static inline int pte_devmap(pte_t pte)
  static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  {
         /* FIXME!! check whether this need to be a conditional */
-       return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
-
-static inline bool pte_user(pte_t pte)
-{
-       return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
+       return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
+                        cpu_to_be64(pgprot_val(newprot)));
  }
  
  /* Encode and de-code a swap entry */
@@ -742,6 +734,8 @@ static inline bool pte_user(pte_t pte)
   */
  #define __pte_to_swp_entry(pte)        ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
  #define __swp_entry_to_pte(x)  __pte((x).val | _PAGE_PTE)
+#define __pmd_to_swp_entry(pmd)        (__pte_to_swp_entry(pmd_pte(pmd)))
+#define __swp_entry_to_pmd(x)  (pte_pmd(__swp_entry_to_pte(x)))
  
  #ifdef CONFIG_MEM_SOFT_DIRTY
  #define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
@@ -752,7 +746,7 @@ static inline bool pte_user(pte_t pte)
  #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
  static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+       return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
  }
  
  static inline bool pte_swp_soft_dirty(pte_t pte)
@@ -762,7 +756,7 @@ static inline bool pte_swp_soft_dirty(pte_t pte)
  
  static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
  {
-       return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
+       return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
  }
  #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
  
@@ -851,10 +845,10 @@ static inline pgprot_t pgprot_writecombine(pgprot_t prot)
   */
  static inline bool pte_ci(pte_t pte)
  {
-       unsigned long pte_v = pte_val(pte);
+       __be64 pte_v = pte_raw(pte);
  
-       if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
-           ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
+       if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
+           ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
                 return true;
         return false;
  }
@@ -876,8 +870,16 @@ static inline int pmd_none(pmd_t pmd)
  
  static inline int pmd_present(pmd_t pmd)
  {
+       /*
+        * A pmd is considerent present if _PAGE_PRESENT is set.
+        * We also need to consider the pmd present which is marked
+        * invalid during a split. Hence we look for _PAGE_INVALID
+        * if we find _PAGE_PRESENT cleared.
+        */
+       if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
+               return true;
  
-       return !pmd_none(pmd);
+       return false;
  }
  
  static inline int pmd_bad(pmd_t pmd)
@@ -904,7 +906,7 @@ static inline int pud_none(pud_t pud)
  
  static inline int pud_present(pud_t pud)
  {
-       return !pud_none(pud);
+       return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
  }
  
  extern struct page *pud_page(pud_t pud);
@@ -951,7 +953,7 @@ static inline int pgd_none(pgd_t pgd)
  
  static inline int pgd_present(pgd_t pgd)
  {
-       return !pgd_none(pgd);
+       return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT));
  }
  
  static inline pte_t pgd_pte(pgd_t pgd)
@@ -1021,17 +1023,16 @@ extern struct page *pgd_page(pgd_t pgd);
  #define pgd_ERROR(e) \
         pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
  
-static inline int map_kernel_page(unsigned long ea, unsigned long pa,
-                                 unsigned long flags)
+static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
  {
         if (radix_enabled()) {
  #if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
                 unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
                 WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
  #endif
-               return radix__map_kernel_page(ea, pa, __pgprot(flags), PAGE_SIZE);
+               return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
         }
-       return hash__map_kernel_page(ea, pa, flags);
+       return hash__map_kernel_page(ea, pa, prot);
  }
  
  static inline int __meminit vmemmap_create_mapping(unsigned long start,
@@ -1083,6 +1084,12 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
  #define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
  #define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
  #define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define pmd_swp_mksoft_dirty(pmd)      pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_swp_soft_dirty(pmd)                pte_swp_soft_dirty(pmd_pte(pmd))
+#define pmd_swp_clear_soft_dirty(pmd)  pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
+#endif
  #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
  
  #ifdef CONFIG_NUMA_BALANCING
@@ -1128,6 +1135,10 @@ pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
         return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
  }
  
+/*
+ * returns true for pmd migration entries, THP, devmap, hugetlb
+ * But compile time dependent on THP config
+ */
  static inline int pmd_large(pmd_t pmd)
  {
         return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
@@ -1162,8 +1173,22 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
                 pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
  }
  
+/*
+ * Only returns true for a THP. False for pmd migration entry.
+ * We also need to return true when we come across a pte that
+ * in between a thp split. While splitting THP, we mark the pmd
+ * invalid (pmdp_invalidate()) before we set it with pte page
+ * address. A pmd_trans_huge() check against a pmd entry during that time
+ * should return true.
+ * We should not call this on a hugetlb entry. We should check for HugeTLB
+ * entry using vma->vm_flags
+ * The page table walk rule is explained in Documentation/vm/transhuge.rst
+ */
  static inline int pmd_trans_huge(pmd_t pmd)
  {
+       if (!pmd_present(pmd))
+               return false;
+
         if (radix_enabled())
                 return radix__pmd_trans_huge(pmd);
         return hash__pmd_trans_huge(pmd);
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h

index d71a909..deb99fd 100644 (file)
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -23,11 +23,13 @@
  extern int threads_per_core;
  extern int threads_per_subcore;
  extern int threads_shift;
+extern bool has_big_cores;
  extern cpumask_t threads_core_mask;
  #else
  #define threads_per_core       1
  #define threads_per_subcore    1
  #define threads_shift          0
+#define has_big_cores          0
  #define threads_core_mask      (*get_cpu_mask(0))
  #endif
  
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h

index 1336727..ae73dc8 100644 (file)
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -61,7 +61,6 @@ static inline void arch_vtime_task_switch(struct task_struct *prev)
         struct cpu_accounting_data *acct0 = get_accounting(prev);
  
         acct->starttime = acct0->starttime;
-       acct->startspurr = acct0->startspurr;
  }
  #endif
  
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h

index ce242b9..7c1d8e7 100644 (file)
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -99,4 +99,9 @@ void __init walk_drmem_lmbs_early(unsigned long node,
                         void (*func)(struct drmem_lmb *, const __be32 **));
  #endif
  
+static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
+{
+       lmb->aa_index = 0xffffffff;
+}
+
  #endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h

index 219637e..8b596d0 100644 (file)
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -43,7 +43,6 @@ struct pci_dn;
  #define EEH_VALID_PE_ZERO      0x10    /* PE#0 is valid                     */
  #define EEH_ENABLE_IO_FOR_LOG  0x20    /* Enable IO for log                 */
  #define EEH_EARLY_DUMP_LOG     0x40    /* Dump log immediately              */
-#define EEH_POSTPONED_PROBE    0x80    /* Powernv may postpone device probe */
  
  /*
   * Delay for PE reset, all in ms
@@ -99,13 +98,13 @@ struct eeh_pe {
         atomic_t pass_dev_cnt;          /* Count of passed through devs */
         struct eeh_pe *parent;          /* Parent PE                    */
         void *data;                     /* PE auxillary data            */
-       struct list_head child_list;    /* Link PE to the child list    */
-       struct list_head edevs;         /* Link list of EEH devices     */
-       struct list_head child;         /* Child PEs                    */
+       struct list_head child_list;    /* List of PEs below this PE    */
+       struct list_head child;         /* Memb. child_list/eeh_phb_pe  */
+       struct list_head edevs;         /* List of eeh_dev in this PE   */
  };
  
  #define eeh_pe_for_each_dev(pe, edev, tmp) \
-               list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
+               list_for_each_entry_safe(edev, tmp, &pe->edevs, entry)
  
  #define eeh_for_each_pe(root, pe) \
         for (pe = root; pe; pe = eeh_pe_next(pe, root))
@@ -142,13 +141,12 @@ struct eeh_dev {
         int aer_cap;                    /* Saved AER capability         */
         int af_cap;                     /* Saved AF capability          */
         struct eeh_pe *pe;              /* Associated PE                */
-       struct list_head list;          /* Form link list in the PE     */
-       struct list_head rmv_list;      /* Record the removed edevs     */
+       struct list_head entry;         /* Membership in eeh_pe.edevs   */
+       struct list_head rmv_entry;     /* Membership in rmv_list       */
         struct pci_dn *pdn;             /* Associated PCI device node   */
         struct pci_dev *pdev;           /* Associated PCI device        */
         bool in_error;                  /* Error flag for edev          */
         struct pci_dev *physfn;         /* Associated SRIOV PF          */
-       struct pci_bus *bus;            /* PCI bus for partial hotplug  */
  };
  
  static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
@@ -207,9 +205,8 @@ struct eeh_ops {
         void* (*probe)(struct pci_dn *pdn, void *data);
         int (*set_option)(struct eeh_pe *pe, int option);
         int (*get_pe_addr)(struct eeh_pe *pe);
-       int (*get_state)(struct eeh_pe *pe, int *state);
+       int (*get_state)(struct eeh_pe *pe, int *delay);
         int (*reset)(struct eeh_pe *pe, int option);
-       int (*wait_state)(struct eeh_pe *pe, int max_wait);
         int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
         int (*configure_bridge)(struct eeh_pe *pe);
         int (*err_inject)(struct eeh_pe *pe, int type, int func,
@@ -243,11 +240,7 @@ static inline bool eeh_has_flag(int flag)
  
  static inline bool eeh_enabled(void)
  {
-       if (eeh_has_flag(EEH_FORCE_DISABLED) ||
-           !eeh_has_flag(EEH_ENABLED))
-               return false;
-
-       return true;
+       return eeh_has_flag(EEH_ENABLED) && !eeh_has_flag(EEH_FORCE_DISABLED);
  }
  
  static inline void eeh_serialize_lock(unsigned long *flags)
@@ -270,6 +263,7 @@ typedef void *(*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
  typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
  void eeh_set_pe_aux_size(int size);
  int eeh_phb_pe_create(struct pci_controller *phb);
+int eeh_wait_state(struct eeh_pe *pe, int max_wait);
  struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
  struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
  struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
diff --git a/arch/powerpc/include/asm/error-injection.h b/arch/powerpc/include/asm/error-injection.h

new file mode 100644 (file)

index 0000000..62fd247
--- /dev/null
+++ b/arch/powerpc/include/asm/error-injection.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _ASM_ERROR_INJECTION_H
+#define _ASM_ERROR_INJECTION_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <asm/ptrace.h>
+#include <asm-generic/error-injection.h>
+
+void override_function_with_return(struct pt_regs *regs);
+
+#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h

index a86fedd..3b4767e 100644 (file)
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -60,14 +60,6 @@
   */
  #define MAX_MCE_DEPTH  4
  
-/*
- * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
- * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
- * in the save area so it's not necessary to overlap them. Could be used
- * for future savings though if another 4 byte register was to be saved.
- */
-#define EX_LR          EX_DAR
-
  /*
   * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
   * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
@@ -236,11 +228,10 @@
   * PPR save/restore macros used in exceptions_64s.S  
   * Used for P7 or later processors
   */
-#define SAVE_PPR(area, ra, rb)                                         \
+#define SAVE_PPR(area, ra)                                             \
  BEGIN_FTR_SECTION_NESTED(940)                                          \
-       ld      ra,PACACURRENT(r13);                                    \
-       ld      rb,area+EX_PPR(r13);    /* Read PPR from paca */        \
-       std     rb,TASKTHREADPPR(ra);                                   \
+       ld      ra,area+EX_PPR(r13);    /* Read PPR from paca */        \
+       std     ra,_PPR(r1);                                            \
  END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
  
  #define RESTORE_PPR_PACA(area, ra)                                     \
@@ -508,7 +499,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
  3:     EXCEPTION_PROLOG_COMMON_1();                                       \
         beq     4f;                     /* if from kernel mode          */ \
         ACCOUNT_CPU_USER_ENTRY(r13, r9, r10);                              \
-       SAVE_PPR(area, r9, r10);                                           \
+       SAVE_PPR(area, r9);                                                \
  4:     EXCEPTION_PROLOG_COMMON_2(area)                                    \
         EXCEPTION_PROLOG_COMMON_3(n)                                       \
         ACCOUNT_STOLEN_TIME
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h

index 7a051bd..00bc42d 100644 (file)
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -52,6 +52,8 @@
  #define FW_FEATURE_PRRN                ASM_CONST(0x0000000200000000)
  #define FW_FEATURE_DRMEM_V2    ASM_CONST(0x0000000400000000)
  #define FW_FEATURE_DRC_INFO    ASM_CONST(0x0000000800000000)
+#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000)
+#define FW_FEATURE_PAPR_SCM    ASM_CONST(0x0000002000000000)
  
  #ifndef __ASSEMBLY__
  
@@ -69,7 +71,8 @@ enum {
                 FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
                 FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
                 FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
-               FW_FEATURE_DRC_INFO,
+               FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
+               FW_FEATURE_PAPR_SCM,
         FW_FEATURE_PSERIES_ALWAYS = 0,
         FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
         FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h

index 41cc15c..b9fbed8 100644 (file)
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -72,7 +72,7 @@ enum fixed_addresses {
  static inline void __set_fixmap(enum fixed_addresses idx,
                                 phys_addr_t phys, pgprot_t flags)
  {
-       map_kernel_page(fix_to_virt(idx), phys, pgprot_val(flags));
+       map_kernel_page(fix_to_virt(idx), phys, flags);
  }
  
  #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h

index 45e8789..33a4fc8 100644 (file)
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -278,6 +278,7 @@
  #define H_COP                  0x304
  #define H_GET_MPP_X            0x314
  #define H_SET_MODE             0x31C
+#define H_BLOCK_REMOVE         0x328
  #define H_CLEAR_HPT            0x358
  #define H_REQUEST_VMC          0x360
  #define H_RESIZE_HPT_PREPARE   0x36C
@@ -295,7 +296,15 @@
  #define H_INT_ESB               0x3C8
  #define H_INT_SYNC              0x3CC
  #define H_INT_RESET             0x3D0
-#define MAX_HCALL_OPCODE       H_INT_RESET
+#define H_SCM_READ_METADATA     0x3E4
+#define H_SCM_WRITE_METADATA    0x3E8
+#define H_SCM_BIND_MEM          0x3EC
+#define H_SCM_UNBIND_MEM        0x3F0
+#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
+#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
+#define H_SCM_MEM_QUERY                0x3FC
+#define H_SCM_BLOCK_CLEAR       0x400
+#define MAX_HCALL_OPCODE       H_SCM_BLOCK_CLEAR
  
  /* H_VIOCTL functions */
  #define H_GET_VIOA_DUMP_SIZE   0x01
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h

index e0331e7..3ef40b7 100644 (file)
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -3,6 +3,9 @@
  #ifdef __KERNEL__
  
  #define ARCH_HAS_IOREMAP_WC
+#ifdef CONFIG_PPC32
+#define ARCH_HAS_IOREMAP_WT
+#endif
  
  /*
   * This program is free software; you can redistribute it and/or
@@ -108,25 +111,6 @@ extern bool isa_io_special;
  #define IO_SET_SYNC_FLAG()
  #endif
  
-/* gcc 4.0 and older doesn't have 'Z' constraint */
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-#define DEF_MMIO_IN_X(name, size, insn)                                \
-static inline u##size name(const volatile u##size __iomem *addr)       \
-{                                                                      \
-       u##size ret;                                                    \
-       __asm__ __volatile__("sync;"#insn" %0,0,%1;twi 0,%0,0;isync"    \
-               : "=r" (ret) : "r" (addr), "m" (*addr) : "memory");     \
-       return ret;                                                     \
-}
-
-#define DEF_MMIO_OUT_X(name, size, insn)                               \
-static inline void name(volatile u##size __iomem *addr, u##size val)   \
-{                                                                      \
-       __asm__ __volatile__("sync;"#insn" %1,0,%2"                     \
-               : "=m" (*addr) : "r" (val), "r" (addr) : "memory");     \
-       IO_SET_SYNC_FLAG();                                             \
-}
-#else /* newer gcc */
  #define DEF_MMIO_IN_X(name, size, insn)                                \
  static inline u##size name(const volatile u##size __iomem *addr)       \
  {                                                                      \
@@ -143,7 +127,6 @@ static inline void name(volatile u##size __iomem *addr, u##size val)        \
                 : "=Z" (*addr) : "r" (val) : "memory");                 \
         IO_SET_SYNC_FLAG();                                             \
  }
-#endif
  
  #define DEF_MMIO_IN_D(name, size, insn)                                \
  static inline u##size name(const volatile u##size __iomem *addr)       \
@@ -746,6 +729,10 @@ static inline void iosync(void)
   *
   * * ioremap_wc enables write combining
   *
+ * * ioremap_wt enables write through
+ *
+ * * ioremap_coherent maps coherent cached memory
+ *
   * * iounmap undoes such a mapping and can be hooked
   *
   * * __ioremap_at (and the pending __iounmap_at) are low level functions to
@@ -767,6 +754,8 @@ extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
  extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
                                   unsigned long flags);
  extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
+void __iomem *ioremap_wt(phys_addr_t address, unsigned long size);
+void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
  #define ioremap_nocache(addr, size)    ioremap((addr), (size))
  #define ioremap_uc(addr, size)         ioremap((addr), (size))
  #define ioremap_cache(addr, size) \
@@ -777,12 +766,12 @@ extern void iounmap(volatile void __iomem *addr);
  extern void __iomem *__ioremap(phys_addr_t, unsigned long size,
                                unsigned long flags);
  extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
-                                     unsigned long flags, void *caller);
+                                     pgprot_t prot, void *caller);
  
  extern void __iounmap(volatile void __iomem *addr);
  
  extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
-                                  unsigned long size, unsigned long flags);
+                                  unsigned long size, pgprot_t prot);
  extern void __iounmap_at(void *ea, unsigned long size);
  
  /*
diff --git a/arch/powerpc/include/asm/kgdb.h b/arch/powerpc/include/asm/kgdb.h

index 9db24e7..a9e098a 100644 (file)
--- a/arch/powerpc/include/asm/kgdb.h
+++ b/arch/powerpc/include/asm/kgdb.h
@@ -26,9 +26,12 @@
  #define BREAK_INSTR_SIZE       4
  #define BUFMAX                 ((NUMREGBYTES * 2) + 512)
  #define OUTBUFMAX              ((NUMREGBYTES * 2) + 512)
+
+#define BREAK_INSTR            0x7d821008      /* twge r2, r2 */
+
  static inline void arch_kgdb_breakpoint(void)
  {
-       asm(".long 0x7d821008"); /* twge r2, r2 */
+       asm(stringify_in_c(.long BREAK_INSTR));
  }
  #define CACHE_FLUSH_IS_SAFE    1
  #define DBG_MAX_REG_NUM     70
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h

index a47de82..8311869 100644 (file)
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -35,7 +35,7 @@ struct machdep_calls {
         char            *name;
  #ifdef CONFIG_PPC64
         void __iomem *  (*ioremap)(phys_addr_t addr, unsigned long size,
-                                  unsigned long flags, void *caller);
+                                  pgprot_t prot, void *caller);
         void            (*iounmap)(volatile void __iomem *token);
  
  #ifdef CONFIG_PM
@@ -108,6 +108,7 @@ struct machdep_calls {
  
         /* Early exception handlers called in realmode */
         int             (*hmi_exception_early)(struct pt_regs *regs);
+       long            (*machine_check_early)(struct pt_regs *regs);
  
         /* Called during machine check exception to retrive fixup address. */
         bool            (*mce_check_early_recovery)(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h

index 3a1226e..a8b8903 100644 (file)
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -210,4 +210,7 @@ extern void release_mce_event(void);
  extern void machine_check_queue_event(void);
  extern void machine_check_print_event_info(struct machine_check_event *evt,
                                            bool user_mode);
+#ifdef CONFIG_PPC_BOOK3S_64
+void flush_and_reload_slb(void);
+#endif /* CONFIG_PPC_BOOK3S_64 */
  #endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h

index 13ea441..eb20eb3 100644 (file)
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -309,6 +309,21 @@ static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
   */
  #define MMU_PAGE_COUNT 16
  
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) &&  \
+       defined (CONFIG_PPC_64K_PAGES)
+#define MAX_PHYSMEM_BITS        51
+#else
+#define MAX_PHYSMEM_BITS        46
+#endif
+
  #ifdef CONFIG_PPC_BOOK3S_64
  #include <asm/book3s/64/mmu.h>
  #else /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h

index b694d6a..0381394 100644 (file)
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -82,7 +82,7 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
  {
         int context_id;
  
-       context_id = get_ea_context(&mm->context, ea);
+       context_id = get_user_context(&mm->context, ea);
         if (!context_id)
                 return true;
         return false;
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h

index fad8ddd..0abf2e7 100644 (file)
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -393,7 +393,14 @@ extern struct bus_type mpic_subsys;
  #define        MPIC_REGSET_TSI108              MPIC_REGSET(1)  /* Tsi108/109 PIC */
  
  /* Get the version of primary MPIC */
+#ifdef CONFIG_MPIC
  extern u32 fsl_mpic_primary_get_version(void);
+#else
+static inline u32 fsl_mpic_primary_get_version(void)
+{
+       return 0;
+}
+#endif
  
  /* Allocate the controller structure and setup the linux irq descs
   * for the range if interrupts passed in. No HW initialization is
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h

index 6c82b96..3ffb0ff 100644 (file)
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -128,14 +128,65 @@ extern int icache_44x_need_flush;
  #include <asm/nohash/32/pte-8xx.h>
  #endif
  
-/* And here we include common definitions */
-#include <asm/pte-common.h>
+/*
+ * Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here.
+ */
+#ifndef PTE_RPN_SHIFT
+#define PTE_RPN_SHIFT  (PAGE_SHIFT)
+#endif
+
+/*
+ * The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs.
+ */
+#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+#define PTE_RPN_MASK   (~((1ULL << PTE_RPN_SHIFT) - 1))
+#else
+#define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
+#endif
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
  
  #ifndef __ASSEMBLY__
  
  #define pte_clear(mm, addr, ptep) \
         do { pte_update(ptep, ~0, 0); } while (0)
  
+#ifndef pte_mkwrite
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_RW);
+}
+#endif
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+#ifndef pte_wrprotect
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+       return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+#endif
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_EXEC);
+}
+
  #define pmd_none(pmd)          (!pmd_val(pmd))
  #define        pmd_bad(pmd)            (pmd_val(pmd) & _PMD_BAD)
  #define        pmd_present(pmd)        (pmd_val(pmd) & _PMD_PRESENT_MASK)
@@ -244,7 +295,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
  static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
                                       pte_t *ptep)
  {
-       pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
+       unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0)));
+       unsigned long set = pte_val(pte_wrprotect(__pte(0)));
+
+       pte_update(ptep, clr, set);
  }
  
  static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
@@ -252,9 +306,10 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
                                            unsigned long address,
                                            int psize)
  {
-       unsigned long set = pte_val(entry) &
-               (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
-       unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA);
+       pte_t pte_set = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(0)))));
+       pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0)))));
+       unsigned long set = pte_val(entry) & pte_val(pte_set);
+       unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr);
  
         pte_update(ptep, clr, set);
  
@@ -317,7 +372,7 @@ static inline int pte_young(pte_t pte)
  #define __pte_to_swp_entry(pte)                ((swp_entry_t) { pte_val(pte) >> 3 })
  #define __swp_entry_to_pte(x)          ((pte_t) { (x).val << 3 })
  
-int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
  
  #endif /* !__ASSEMBLY__ */
  
diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h

index bb4b3a4..661f459 100644 (file)
--- a/arch/powerpc/include/asm/nohash/32/pte-40x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h
@@ -50,13 +50,56 @@
  #define _PAGE_EXEC     0x200   /* hardware: EX permission */
  #define _PAGE_ACCESSED 0x400   /* software: R: page referenced */
  
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE            0
+
+/* cache related flags non existing on 40x */
+#define _PAGE_COHERENT 0
+
+#define _PAGE_KERNEL_RO                0
+#define _PAGE_KERNEL_ROX       _PAGE_EXEC
+#define _PAGE_KERNEL_RW                (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
+#define _PAGE_KERNEL_RWX       (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC)
+
  #define _PMD_PRESENT   0x400   /* PMD points to page of PTEs */
+#define _PMD_PRESENT_MASK      _PMD_PRESENT
  #define _PMD_BAD       0x802
  #define _PMD_SIZE_4M   0x0c0
  #define _PMD_SIZE_16M  0x0e0
+#define _PMD_USER      0
+
+#define _PTE_NONE_MASK 0
  
  /* Until my rework is finished, 40x still needs atomic PTE updates */
  #define PTE_ATOMIC_UPDATES     1
  
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE     (_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE      __pgprot(_PAGE_BASE)
+#define PAGE_SHARED    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY      __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X        __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#ifndef __ASSEMBLY__
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+       return __pte(pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE));
+}
+
+#define pte_wrprotect pte_wrprotect
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+       return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
+}
+
+#define pte_mkclean pte_mkclean
+#endif
+
  #endif /* __KERNEL__ */
  #endif /*  _ASM_POWERPC_NOHASH_32_PTE_40x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h

index f812c02..78bc304 100644 (file)
--- a/arch/powerpc/include/asm/nohash/32/pte-44x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h
@@ -85,14 +85,44 @@
  #define _PAGE_NO_CACHE 0x00000400              /* H: I bit */
  #define _PAGE_WRITETHRU        0x00000800              /* H: W bit */
  
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE            0
+
+#define _PAGE_KERNEL_RO                0
+#define _PAGE_KERNEL_ROX       _PAGE_EXEC
+#define _PAGE_KERNEL_RW                (_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX       (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+
  /* TODO: Add large page lowmem mapping support */
  #define _PMD_PRESENT   0
  #define _PMD_PRESENT_MASK (PAGE_MASK)
  #define _PMD_BAD       (~PAGE_MASK)
+#define _PMD_USER      0
  
  /* ERPN in a PTE never gets cleared, ignore it */
  #define _PTE_NONE_MASK 0xffffffff00000000ULL
  
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED)
+#if defined(CONFIG_SMP)
+#define _PAGE_BASE     (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE     (_PAGE_BASE_NC)
+#endif
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE      __pgprot(_PAGE_BASE)
+#define PAGE_SHARED    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY      __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X        __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
  
  #endif /* __KERNEL__ */
  #endif /*  _ASM_POWERPC_NOHASH_32_PTE_44x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h

index f04cb46..6bfe041 100644 (file)
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -29,10 +29,10 @@
   */
  
  /* Definitions for 8xx embedded chips. */
-#define _PAGE_PRESENT  0x0001  /* Page is valid */
-#define _PAGE_NO_CACHE 0x0002  /* I: cache inhibit */
-#define _PAGE_PRIVILEGED       0x0004  /* No ASID (context) compare */
-#define _PAGE_HUGE     0x0008  /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
+#define _PAGE_PRESENT  0x0001  /* V: Page is valid */
+#define _PAGE_NO_CACHE 0x0002  /* CI: cache inhibit */
+#define _PAGE_SH       0x0004  /* SH: No ASID (context) compare */
+#define _PAGE_SPS      0x0008  /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
  #define _PAGE_DIRTY    0x0100  /* C: page changed */
  
  /* These 4 software bits must be masked out when the L2 entry is loaded
@@ -46,18 +46,95 @@
  #define _PAGE_NA       0x0200  /* Supervisor NA, User no access */
  #define _PAGE_RO       0x0600  /* Supervisor RO, User no access */
  
+/* cache related flags non existing on 8xx */
+#define _PAGE_COHERENT 0
+#define _PAGE_WRITETHRU        0
+
+#define _PAGE_KERNEL_RO                (_PAGE_SH | _PAGE_RO)
+#define _PAGE_KERNEL_ROX       (_PAGE_SH | _PAGE_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RW                (_PAGE_SH | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RWX       (_PAGE_SH | _PAGE_DIRTY | _PAGE_EXEC)
+
  #define _PMD_PRESENT   0x0001
+#define _PMD_PRESENT_MASK      _PMD_PRESENT
  #define _PMD_BAD       0x0fd0
  #define _PMD_PAGE_MASK 0x000c
  #define _PMD_PAGE_8M   0x000c
  #define _PMD_PAGE_512K 0x0004
  #define _PMD_USER      0x0020  /* APG 1 */
  
+#define _PTE_NONE_MASK 0
+
  /* Until my rework is finished, 8xx still needs atomic PTE updates */
  #define PTE_ATOMIC_UPDATES     1
  
  #ifdef CONFIG_PPC_16K_PAGES
-#define _PAGE_PSIZE    _PAGE_HUGE
+#define _PAGE_PSIZE    _PAGE_SPS
+#else
+#define _PAGE_PSIZE            0
+#endif
+
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE     (_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE      __pgprot(_PAGE_BASE | _PAGE_NA)
+#define PAGE_SHARED    __pgprot(_PAGE_BASE)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_EXEC)
+#define PAGE_COPY      __pgprot(_PAGE_BASE | _PAGE_RO)
+#define PAGE_COPY_X    __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_RO)
+#define PAGE_READONLY_X        __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC)
+
+#ifndef __ASSEMBLY__
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_RO);
+}
+
+#define pte_wrprotect pte_wrprotect
+
+static inline int pte_write(pte_t pte)
+{
+       return !(pte_val(pte) & _PAGE_RO);
+}
+
+#define pte_write pte_write
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+       return __pte(pte_val(pte) & ~_PAGE_RO);
+}
+
+#define pte_mkwrite pte_mkwrite
+
+static inline bool pte_user(pte_t pte)
+{
+       return !(pte_val(pte) & _PAGE_SH);
+}
+
+#define pte_user pte_user
+
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_SH);
+}
+
+#define pte_mkprivileged pte_mkprivileged
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+       return __pte(pte_val(pte) & ~_PAGE_SH);
+}
+
+#define pte_mkuser pte_mkuser
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_SPS);
+}
+
+#define pte_mkhuge pte_mkhuge
  #endif
  
  #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h

index d1ee24e..0fc1bd4 100644 (file)
--- a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h
@@ -31,11 +31,44 @@
  #define _PAGE_WRITETHRU        0x00400 /* H: W bit */
  #define _PAGE_SPECIAL  0x00800 /* S: Special page */
  
+#define _PAGE_KERNEL_RO                0
+#define _PAGE_KERNEL_ROX       _PAGE_EXEC
+#define _PAGE_KERNEL_RW                (_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX       (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE            0
+
  #define _PMD_PRESENT   0
  #define _PMD_PRESENT_MASK (PAGE_MASK)
  #define _PMD_BAD       (~PAGE_MASK)
+#define _PMD_USER      0
+
+#define _PTE_NONE_MASK 0
  
  #define PTE_WIMGE_SHIFT (6)
  
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED)
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define _PAGE_BASE     (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE     (_PAGE_BASE_NC)
+#endif
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE      __pgprot(_PAGE_BASE)
+#define PAGE_SHARED    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY      __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X        __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
  #endif /* __KERNEL__ */
  #endif /*  _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h

index 68283e6..67421f7 100644 (file)
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -89,11 +89,47 @@
   * Include the PTE bits definitions
   */
  #include <asm/nohash/pte-book3e.h>
-#include <asm/pte-common.h>
+
+#define _PAGE_SAO      0
+
+#define PTE_RPN_MASK   (~((1UL << PTE_RPN_SHIFT) - 1))
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
+
+#define H_PAGE_4K_PFN 0
  
  #ifndef __ASSEMBLY__
  /* pte_clear moved to later in this file */
  
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+       return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+       return __pte(pte_val(pte) | _PAGE_EXEC);
+}
+
  #define PMD_BAD_BITS           (PTE_TABLE_SIZE-1)
  #define PUD_BAD_BITS           (PMD_TABLE_SIZE-1)
  
@@ -328,8 +364,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
  #define __pte_to_swp_entry(pte)                ((swp_entry_t) { pte_val((pte)) })
  #define __swp_entry_to_pte(x)          __pte((x).val)
  
-extern int map_kernel_page(unsigned long ea, unsigned long pa,
-                          unsigned long flags);
+int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
  extern int __meminit vmemmap_create_mapping(unsigned long start,
                                             unsigned long page_size,
                                             unsigned long phys);
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h

index b321c82..70ff239 100644 (file)
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -8,18 +8,50 @@
  #include <asm/nohash/32/pgtable.h>
  #endif
  
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL    __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG        __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+                                _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X  __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX        __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/*
+ * Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
+       defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT       PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT       PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC       PAGE_KERNEL_X
+
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP               (PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
+
  #ifndef __ASSEMBLY__
  
  /* Generic accessors to PTE bits */
+#ifndef pte_write
  static inline int pte_write(pte_t pte)
  {
-       return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO;
+       return pte_val(pte) & _PAGE_RW;
  }
+#endif
  static inline int pte_read(pte_t pte)          { return 1; }
  static inline int pte_dirty(pte_t pte)         { return pte_val(pte) & _PAGE_DIRTY; }
  static inline int pte_special(pte_t pte)       { return pte_val(pte) & _PAGE_SPECIAL; }
  static inline int pte_none(pte_t pte)          { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte)   { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+static inline bool pte_hashpte(pte_t pte)      { return false; }
+static inline bool pte_ci(pte_t pte)           { return pte_val(pte) & _PAGE_NO_CACHE; }
+static inline bool pte_exec(pte_t pte)         { return pte_val(pte) & _PAGE_EXEC; }
  
  #ifdef CONFIG_NUMA_BALANCING
  /*
@@ -29,8 +61,7 @@ static inline pgprot_t pte_pgprot(pte_t pte)  { return __pgprot(pte_val(pte) & PA
   */
  static inline int pte_protnone(pte_t pte)
  {
-       return (pte_val(pte) &
-               (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+       return pte_present(pte) && !pte_user(pte);
  }
  
  static inline int pmd_protnone(pmd_t pmd)
@@ -44,6 +75,23 @@ static inline int pte_present(pte_t pte)
         return pte_val(pte) & _PAGE_PRESENT;
  }
  
+static inline bool pte_hw_valid(pte_t pte)
+{
+       return pte_val(pte) & _PAGE_PRESENT;
+}
+
+/*
+ * Don't just check for any non zero bits in __PAGE_USER, since for book3e
+ * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
+ * _PAGE_USER.  Need to explicitly match _PAGE_BAP_UR bit in that case too.
+ */
+#ifndef pte_user
+static inline bool pte_user(pte_t pte)
+{
+       return (pte_val(pte) & _PAGE_USER) == _PAGE_USER;
+}
+#endif
+
  /*
   * We only find page table entry in the last level
   * Hence no need for other accessors
@@ -77,53 +125,53 @@ static inline unsigned long pte_pfn(pte_t pte)     {
         return pte_val(pte) >> PTE_RPN_SHIFT; }
  
  /* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte)
+static inline pte_t pte_exprotect(pte_t pte)
  {
-       pte_basic_t ptev;
-
-       ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE);
-       ptev |= _PAGE_RO;
-       return __pte(ptev);
+       return __pte(pte_val(pte) & ~_PAGE_EXEC);
  }
  
+#ifndef pte_mkclean
  static inline pte_t pte_mkclean(pte_t pte)
  {
-       return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
+       return __pte(pte_val(pte) & ~_PAGE_DIRTY);
  }
+#endif
  
  static inline pte_t pte_mkold(pte_t pte)
  {
         return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
  }
  
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkpte(pte_t pte)
  {
-       pte_basic_t ptev;
-
-       ptev = pte_val(pte) & ~_PAGE_RO;
-       ptev |= _PAGE_RW;
-       return __pte(ptev);
+       return pte;
  }
  
-static inline pte_t pte_mkdirty(pte_t pte)
+static inline pte_t pte_mkspecial(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_DIRTY);
+       return __pte(pte_val(pte) | _PAGE_SPECIAL);
  }
  
-static inline pte_t pte_mkyoung(pte_t pte)
+#ifndef pte_mkhuge
+static inline pte_t pte_mkhuge(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_ACCESSED);
+       return __pte(pte_val(pte));
  }
+#endif
  
-static inline pte_t pte_mkspecial(pte_t pte)
+#ifndef pte_mkprivileged
+static inline pte_t pte_mkprivileged(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_SPECIAL);
+       return __pte(pte_val(pte) & ~_PAGE_USER);
  }
+#endif
  
-static inline pte_t pte_mkhuge(pte_t pte)
+#ifndef pte_mkuser
+static inline pte_t pte_mkuser(pte_t pte)
  {
-       return __pte(pte_val(pte) | _PAGE_HUGE);
+       return __pte(pte_val(pte) | _PAGE_USER);
  }
+#endif
  
  static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  {
@@ -197,6 +245,8 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre
  #if _PAGE_WRITETHRU != 0
  #define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
                                             _PAGE_COHERENT | _PAGE_WRITETHRU))
+#else
+#define pgprot_cached_wthru(prot)      pgprot_noncached(prot)
  #endif
  
  #define pgprot_cached_noncoherent(prot) \
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h

index 12730b8..dd40d20 100644 (file)
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -77,7 +77,48 @@
  #define _PMD_PRESENT   0
  #define _PMD_PRESENT_MASK (PAGE_MASK)
  #define _PMD_BAD       (~PAGE_MASK)
+#define _PMD_USER      0
+#else
+#define _PTE_NONE_MASK 0
+#endif
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#if defined(CONFIG_SMP)
+#define _PAGE_BASE     (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE     (_PAGE_BASE_NC)
  #endif
  
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE      __pgprot(_PAGE_BASE)
+#define PAGE_SHARED    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY      __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X        __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#ifndef __ASSEMBLY__
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+       return __pte((pte_val(pte) & ~_PAGE_USER) | _PAGE_PRIVILEGED);
+}
+
+#define pte_mkprivileged pte_mkprivileged
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+       return __pte((pte_val(pte) & ~_PAGE_PRIVILEGED) | _PAGE_USER);
+}
+
+#define pte_mkuser pte_mkuser
+#endif /* __ASSEMBLY__ */
+
  #endif /* __KERNEL__ */
  #endif /*  _ASM_POWERPC_NOHASH_PTE_BOOK3E_H */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h

index 8365353..870fb7b 100644 (file)
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1050,6 +1050,7 @@ enum OpalSysCooling {
  enum {
         OPAL_REBOOT_NORMAL              = 0,
         OPAL_REBOOT_PLATFORM_ERROR      = 1,
+       OPAL_REBOOT_FULL_IPL            = 2,
  };
  
  /* Argument to OPAL_PCI_TCE_KILL */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h

index ad4f161..e843bc5 100644 (file)
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -113,7 +113,13 @@ struct paca_struct {
                                  * on the linear mapping */
         /* SLB related definitions */
         u16 vmalloc_sllp;
-       u16 slb_cache_ptr;
+       u8 slb_cache_ptr;
+       u8 stab_rr;                     /* stab/slb round-robin counter */
+#ifdef CONFIG_DEBUG_VM
+       u8 in_kernel_slb_handler;
+#endif
+       u32 slb_used_bitmap;            /* Bitmaps for first 32 SLB entries. */
+       u32 slb_kern_bitmap;
         u32 slb_cache[SLB_CACHE_ENTRIES];
  #endif /* CONFIG_PPC_BOOK3S_64 */
  
@@ -160,7 +166,6 @@ struct paca_struct {
          */
         struct task_struct *__current;  /* Pointer to current */
         u64 kstack;                     /* Saved Kernel stack addr */
-       u64 stab_rr;                    /* stab/slb round-robin counter */
         u64 saved_r1;                   /* r1 save for RTAS calls or PM or EE=0 */
         u64 saved_msr;                  /* MSR saved here by enter_rtas */
         u16 trap_save;                  /* Used when bad stack is encountered */
@@ -250,6 +255,15 @@ struct paca_struct {
  #ifdef CONFIG_PPC_PSERIES
         u8 *mce_data_buf;               /* buffer to hold per cpu rtas errlog */
  #endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       /* Capture SLB related old contents in MCE handler. */
+       struct slb_entry *mce_faulty_slbs;
+       u16 slb_save_cache_ptr;
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_STACKPROTECTOR
+       unsigned long canary;
+#endif
  } ____cacheline_aligned;
  
  extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h

index 14c79a7..9679b75 100644 (file)
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -20,6 +20,25 @@ struct mm_struct;
  #include <asm/nohash/pgtable.h>
  #endif /* !CONFIG_PPC_BOOK3S */
  
+/* Note due to the way vm flags are laid out, the bits are XWR */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
+#define __P101 PAGE_READONLY_X
+#define __P110 PAGE_COPY_X
+#define __P111 PAGE_COPY_X
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
+#define __S101 PAGE_READONLY_X
+#define __S110 PAGE_SHARED_X
+#define __S111 PAGE_SHARED_X
+
  #ifndef __ASSEMBLY__
  
  #include <asm/tlbflush.h>
@@ -27,6 +46,16 @@ struct mm_struct;
  /* Keep these as a macros to avoid include dependency mess */
  #define pte_page(x)            pfn_to_page(pte_pfn(x))
  #define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+       unsigned long pte_flags;
+
+       pte_flags = pte_val(pte) & ~PTE_RPN_MASK;
+       return __pgprot(pte_flags);
+}
  
  /*
   * ZERO_PAGE is a global shared page that is always zero: used
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h

index 7262880..f67da27 100644 (file)
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -58,6 +58,7 @@ void eeh_save_bars(struct eeh_dev *edev);
  int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
  int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
  void eeh_pe_state_mark(struct eeh_pe *pe, int state);
+void eeh_pe_mark_isolated(struct eeh_pe *pe);
  void eeh_pe_state_clear(struct eeh_pe *pe, int state);
  void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state);
  void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h

index 52fadde..7d04d60 100644 (file)
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -32,9 +32,9 @@
  /* Default SMT priority is set to 3. Use 11- 13bits to save priority. */
  #define PPR_PRIORITY 3
  #ifdef __ASSEMBLY__
-#define INIT_PPR (PPR_PRIORITY << 50)
+#define DEFAULT_PPR (PPR_PRIORITY << 50)
  #else
-#define INIT_PPR ((u64)PPR_PRIORITY << 50)
+#define DEFAULT_PPR ((u64)PPR_PRIORITY << 50)
  #endif /* __ASSEMBLY__ */
  #endif /* CONFIG_PPC64 */
  
@@ -273,6 +273,7 @@ struct thread_struct {
  #endif /* CONFIG_HAVE_HW_BREAKPOINT */
         struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
         unsigned long   trap_nr;        /* last trap # on this thread */
+       u8 load_slb;                    /* Ages out SLB preload cache entries */
         u8 load_fp;
  #ifdef CONFIG_ALTIVEC
         u8 load_vec;
@@ -341,7 +342,6 @@ struct thread_struct {
          * onwards.
          */
         int             dscr_inherit;
-       unsigned long   ppr;    /* used to save/restore SMT priority */
         unsigned long   tidr;
  #endif
  #ifdef CONFIG_PPC_BOOK3S_64
@@ -389,7 +389,6 @@ struct thread_struct {
         .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
         .addr_limit = KERNEL_DS, \
         .fpexc_mode = 0, \
-       .ppr = INIT_PPR, \
         .fscr = FSCR_TAR | FSCR_EBB \
  }
  #endif
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h

deleted file mode 100644 (file)

index bef5614..0000000
--- a/arch/powerpc/include/asm/pte-common.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Included from asm/pgtable-*.h only ! */
-
-/*
- * Some bits are only used on some cpu families... Make sure that all
- * the undefined gets a sensible default
- */
-#ifndef _PAGE_HASHPTE
-#define _PAGE_HASHPTE  0
-#endif
-#ifndef _PAGE_HWWRITE
-#define _PAGE_HWWRITE  0
-#endif
-#ifndef _PAGE_EXEC
-#define _PAGE_EXEC     0
-#endif
-#ifndef _PAGE_ENDIAN
-#define _PAGE_ENDIAN   0
-#endif
-#ifndef _PAGE_COHERENT
-#define _PAGE_COHERENT 0
-#endif
-#ifndef _PAGE_WRITETHRU
-#define _PAGE_WRITETHRU        0
-#endif
-#ifndef _PAGE_4K_PFN
-#define _PAGE_4K_PFN           0
-#endif
-#ifndef _PAGE_SAO
-#define _PAGE_SAO      0
-#endif
-#ifndef _PAGE_PSIZE
-#define _PAGE_PSIZE            0
-#endif
-/* _PAGE_RO and _PAGE_RW shall not be defined at the same time */
-#ifndef _PAGE_RO
-#define _PAGE_RO 0
-#else
-#define _PAGE_RW 0
-#endif
-
-#ifndef _PAGE_PTE
-#define _PAGE_PTE 0
-#endif
-/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */
-#ifndef _PAGE_PRIVILEGED
-#define _PAGE_PRIVILEGED 0
-#else
-#ifndef _PAGE_USER
-#define _PAGE_USER 0
-#endif
-#endif
-#ifndef _PAGE_NA
-#define _PAGE_NA 0
-#endif
-#ifndef _PAGE_HUGE
-#define _PAGE_HUGE 0
-#endif
-
-#ifndef _PMD_PRESENT_MASK
-#define _PMD_PRESENT_MASK      _PMD_PRESENT
-#endif
-#ifndef _PMD_USER
-#define _PMD_USER      0
-#endif
-#ifndef _PAGE_KERNEL_RO
-#define _PAGE_KERNEL_RO                (_PAGE_PRIVILEGED | _PAGE_RO)
-#endif
-#ifndef _PAGE_KERNEL_ROX
-#define _PAGE_KERNEL_ROX       (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC)
-#endif
-#ifndef _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RW                (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
-                                _PAGE_HWWRITE)
-#endif
-#ifndef _PAGE_KERNEL_RWX
-#define _PAGE_KERNEL_RWX       (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
-                                _PAGE_HWWRITE | _PAGE_EXEC)
-#endif
-#ifndef _PAGE_HPTEFLAGS
-#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
-#endif
-#ifndef _PTE_NONE_MASK
-#define _PTE_NONE_MASK _PAGE_HPTEFLAGS
-#endif
-
-#ifndef __ASSEMBLY__
-
-/*
- * Don't just check for any non zero bits in __PAGE_USER, since for book3e
- * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
- * _PAGE_USER.  Need to explicitly match _PAGE_BAP_UR bit in that case too.
- */
-static inline bool pte_user(pte_t pte)
-{
-       return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER;
-}
-#endif /* __ASSEMBLY__ */
-
-/* Location of the PFN in the PTE. Most 32-bit platforms use the same
- * as _PAGE_SHIFT here (ie, naturally aligned).
- * Platform who don't just pre-define the value so we don't override it here
- */
-#ifndef PTE_RPN_SHIFT
-#define PTE_RPN_SHIFT  (PAGE_SHIFT)
-#endif
-
-/* The mask covered by the RPN must be a ULL on 32-bit platforms with
- * 64-bit PTEs
- */
-#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
-#define PTE_RPN_MASK   (~((1ULL<<PTE_RPN_SHIFT)-1))
-#else
-#define PTE_RPN_MASK   (~((1UL<<PTE_RPN_SHIFT)-1))
-#endif
-
-/* _PAGE_CHG_MASK masks of bits that are to be preserved across
- * pgprot changes
- */
-#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-                         _PAGE_ACCESSED | _PAGE_SPECIAL)
-
-/* Mask of bits returned by pte_pgprot() */
-#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
-                        _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \
-                        _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \
-                        _PAGE_PRIVILEGED | \
-                        _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
-
-/*
- * We define 2 sets of base prot bits, one for basic pages (ie,
- * cacheable kernel and user pages) and one for non cacheable
- * pages. We always set _PAGE_COHERENT when SMP is enabled or
- * the processor might need it for DMA coherency.
- */
-#define _PAGE_BASE_NC  (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_STD_MMU) || \
-       defined(CONFIG_PPC_E500MC)
-#define _PAGE_BASE     (_PAGE_BASE_NC | _PAGE_COHERENT)
-#else
-#define _PAGE_BASE     (_PAGE_BASE_NC)
-#endif
-
-/* Permission masks used to generate the __P and __S table,
- *
- * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
- *
- * Write permissions imply read permissions for now (we could make write-only
- * pages on BookE but we don't bother for now). Execute permission control is
- * possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
- */
-#define PAGE_NONE      __pgprot(_PAGE_BASE | _PAGE_NA)
-#define PAGE_SHARED    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
-#define PAGE_SHARED_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
-                                _PAGE_EXEC)
-#define PAGE_COPY      __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO)
-#define PAGE_COPY_X    __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \
-                                _PAGE_EXEC)
-#define PAGE_READONLY  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO)
-#define PAGE_READONLY_X        __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \
-                                _PAGE_EXEC)
-
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_X
-#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY_X
-#define __P111 PAGE_COPY_X
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_X
-#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED_X
-#define __S111 PAGE_SHARED_X
-
-/* Permission masks used for kernel mappings */
-#define PAGE_KERNEL    __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
-#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-                                _PAGE_NO_CACHE)
-#define PAGE_KERNEL_NCG        __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-                                _PAGE_NO_CACHE | _PAGE_GUARDED)
-#define PAGE_KERNEL_X  __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
-#define PAGE_KERNEL_ROX        __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
-
-/* Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
-       defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT       PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT       PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC       PAGE_KERNEL_X
-
-/* Advertise special mapping type for AGP */
-#define PAGE_AGP               (PAGE_KERNEL_NC)
-#define HAVE_PAGE_AGP
-
-#ifndef _PAGE_READ
-/* if not defined, we should not find _PAGE_WRITE too */
-#define _PAGE_READ 0
-#define _PAGE_WRITE _PAGE_RW
-#endif
-
-#ifndef H_PAGE_4K_PFN
-#define H_PAGE_4K_PFN 0
-#endif
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h

index 5b480e1..f73886a 100644 (file)
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -26,6 +26,37 @@
  #include <uapi/asm/ptrace.h>
  #include <asm/asm-const.h>
  
+#ifndef __ASSEMBLY__
+struct pt_regs
+{
+       union {
+               struct user_pt_regs user_regs;
+               struct {
+                       unsigned long gpr[32];
+                       unsigned long nip;
+                       unsigned long msr;
+                       unsigned long orig_gpr3;
+                       unsigned long ctr;
+                       unsigned long link;
+                       unsigned long xer;
+                       unsigned long ccr;
+#ifdef CONFIG_PPC64
+                       unsigned long softe;
+#else
+                       unsigned long mq;
+#endif
+                       unsigned long trap;
+                       unsigned long dar;
+                       unsigned long dsisr;
+                       unsigned long result;
+               };
+       };
+
+#ifdef CONFIG_PPC64
+       unsigned long ppr;
+#endif
+};
+#endif
  
  #ifdef __powerpc64__
  
@@ -102,6 +133,11 @@ static inline long regs_return_value(struct pt_regs *regs)
                 return -regs->gpr[3];
  }
  
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+       regs->gpr[3] = rc;
+}
+
  #ifdef __powerpc64__
  #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
  #else
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h

index c906989..de52c31 100644 (file)
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -118,11 +118,16 @@
  #define MSR_TS_S       __MASK(MSR_TS_S_LG)     /*  Transaction Suspended */
  #define MSR_TS_T       __MASK(MSR_TS_T_LG)     /*  Transaction Transactional */
  #define MSR_TS_MASK    (MSR_TS_T | MSR_TS_S)   /* Transaction State bits */
-#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
  #define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */
  #define MSR_TM_TRANSACTIONAL(x)        (((x) & MSR_TS_MASK) == MSR_TS_T)
  #define MSR_TM_SUSPENDED(x)    (((x) & MSR_TS_MASK) == MSR_TS_S)
  
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
+#else
+#define MSR_TM_ACTIVE(x) 0
+#endif
+
  #if defined(CONFIG_PPC_BOOK3S_64)
  #define MSR_64BIT      MSR_SF
  
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h

index 71e393c..bb38dd6 100644 (file)
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -125,6 +125,7 @@ struct rtas_suspend_me_data {
  #define RTAS_TYPE_INFO                 0xE2
  #define RTAS_TYPE_DEALLOC              0xE3
  #define RTAS_TYPE_DUMP                 0xE4
+#define RTAS_TYPE_HOTPLUG              0xE5
  /* I don't add PowerMGM events right now, this is a different topic */ 
  #define RTAS_TYPE_PMGM_POWER_SW_ON     0x60
  #define RTAS_TYPE_PMGM_POWER_SW_OFF    0x61
@@ -185,11 +186,23 @@ static inline uint8_t rtas_error_disposition(const struct rtas_error_log *elog)
         return (elog->byte1 & 0x18) >> 3;
  }
  
+static inline
+void rtas_set_disposition_recovered(struct rtas_error_log *elog)
+{
+       elog->byte1 &= ~0x18;
+       elog->byte1 |= (RTAS_DISP_FULLY_RECOVERED << 3);
+}
+
  static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog)
  {
         return (elog->byte1 & 0x04) >> 2;
  }
  
+static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
+{
+       return (elog->byte2 & 0xf0) >> 4;
+}
+
  #define rtas_error_type(x)     ((x)->byte3)
  
  static inline
@@ -275,6 +288,7 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log)
  #define PSERIES_ELOG_SECT_ID_CALL_HOME         (('C' << 8) | 'H')
  #define PSERIES_ELOG_SECT_ID_USER_DEF          (('U' << 8) | 'D')
  #define PSERIES_ELOG_SECT_ID_HOTPLUG           (('H' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_MCE               (('M' << 8) | 'C')
  
  /* Vendor specific Platform Event Log Format, Version 6, section header */
  struct pseries_errorlog {
@@ -316,6 +330,7 @@ struct pseries_hp_errorlog {
  #define PSERIES_HP_ELOG_RESOURCE_MEM   2
  #define PSERIES_HP_ELOG_RESOURCE_SLOT  3
  #define PSERIES_HP_ELOG_RESOURCE_PHB   4
+#define PSERIES_HP_ELOG_RESOURCE_PMEM   6
  
  #define PSERIES_HP_ELOG_ACTION_ADD     1
  #define PSERIES_HP_ELOG_ACTION_REMOVE  2
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h

index e40406c..a595461 100644 (file)
--- a/arch/powerpc/include/asm/slice.h
+++ b/arch/powerpc/include/asm/slice.h
@@ -32,6 +32,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
                            unsigned long len, unsigned int psize);
  
  void slice_init_new_context_exec(struct mm_struct *mm);
+void slice_setup_new_exec(void);
  
  #endif /* __ASSEMBLY__ */
  
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h

index 95b66a0..4169574 100644 (file)
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -100,6 +100,7 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
  DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
  DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
  DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
  
  static inline struct cpumask *cpu_sibling_mask(int cpu)
  {
@@ -116,6 +117,11 @@ static inline struct cpumask *cpu_l2_cache_mask(int cpu)
         return per_cpu(cpu_l2_cache_map, cpu);
  }
  
+static inline struct cpumask *cpu_smallcore_mask(int cpu)
+{
+       return per_cpu(cpu_smallcore_map, cpu);
+}
+
  extern int cpu_to_core_id(int cpu);
  
  /* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
@@ -166,6 +172,11 @@ static inline const struct cpumask *cpu_sibling_mask(int cpu)
         return cpumask_of(cpu);
  }
  
+static inline const struct cpumask *cpu_smallcore_mask(int cpu)
+{
+       return cpumask_of(cpu);
+}
+
  #endif /* CONFIG_SMP */
  
  #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h

index 28f5dae..68da493 100644 (file)
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -9,17 +9,6 @@
   * MAX_PHYSMEM_BITS            2^N: how much memory we can have in that space
   */
  #define SECTION_SIZE_BITS       24
-/*
- * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
- * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
- * page_to_nid does a page->section->node lookup
- * Hence only increase for VMEMMAP.
- */
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-#define MAX_PHYSMEM_BITS        47
-#else
-#define MAX_PHYSMEM_BITS        46
-#endif
  
  #endif /* CONFIG_SPARSEMEM */
  
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h

new file mode 100644 (file)

index 0000000..1c8460e
--- /dev/null
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GCC stack protector support.
+ *
+ */
+
+#ifndef _ASM_STACKPROTECTOR_H
+#define _ASM_STACKPROTECTOR_H
+
+#include <linux/random.h>
+#include <linux/version.h>
+#include <asm/reg.h>
+#include <asm/current.h>
+#include <asm/paca.h>
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+       unsigned long canary;
+
+       /* Try to get a semi random initial value. */
+       canary = get_random_canary();
+       canary ^= mftb();
+       canary ^= LINUX_VERSION_CODE;
+       canary &= CANARY_MASK;
+
+       current->stack_canary = canary;
+#ifdef CONFIG_PPC64
+       get_paca()->canary = canary;
+#endif
+}
+
+#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h

index 3c00020..544cac0 100644 (file)
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -29,6 +29,7 @@
  #include <asm/page.h>
  #include <asm/accounting.h>
  
+#define SLB_PRELOAD_NR 16U
  /*
   * low level task data.
   */
@@ -44,6 +45,10 @@ struct thread_info {
  #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32)
         struct cpu_accounting_data accounting;
  #endif
+       unsigned char slb_preload_nr;
+       unsigned char slb_preload_tail;
+       u32 slb_preload_esid[SLB_PRELOAD_NR];
+
         /* low level flags - has atomic operations done on it */
         unsigned long   flags ____cacheline_aligned_in_smp;
  };
@@ -72,6 +77,12 @@ static inline struct thread_info *current_thread_info(void)
  }
  
  extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
+#endif
+
  #endif /* __ASSEMBLY__ */
  
  /*
@@ -81,7 +92,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src
  #define TIF_SIGPENDING         1       /* signal pending */
  #define TIF_NEED_RESCHED       2       /* rescheduling necessary */
  #define TIF_FSCHECK            3       /* Check FS is USER_DS on return */
-#define TIF_32BIT              4       /* 32 bit binary */
+#define TIF_SYSCALL_EMU                4       /* syscall emulation active */
  #define TIF_RESTORE_TM         5       /* need to restore TM FP/VEC/VSX */
  #define TIF_PATCH_PENDING      6       /* pending live patching update */
  #define TIF_SYSCALL_AUDIT      7       /* syscall auditing active */
@@ -100,6 +111,7 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src
  #define TIF_ELF2ABI            18      /* function descriptors must die! */
  #endif
  #define TIF_POLLING_NRFLAG     19      /* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_32BIT              20      /* 32 bit binary */
  
  /* as above, but as bit values */
  #define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
@@ -120,9 +132,10 @@ extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src
  #define _TIF_EMULATE_STACK_STORE       (1<<TIF_EMULATE_STACK_STORE)
  #define _TIF_NOHZ              (1<<TIF_NOHZ)
  #define _TIF_FSCHECK           (1<<TIF_FSCHECK)
+#define _TIF_SYSCALL_EMU       (1<<TIF_SYSCALL_EMU)
  #define _TIF_SYSCALL_DOTRACE   (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
                                  _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
-                                _TIF_NOHZ)
+                                _TIF_NOHZ | _TIF_SYSCALL_EMU)
  
  #define _TIF_USER_WORK_MASK    (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
                                  _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h

index d018e86..58ef8c4 100644 (file)
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -201,6 +201,21 @@ TRACE_EVENT(tlbie,
                 __entry->r)
  );
  
+TRACE_EVENT(tlbia,
+
+       TP_PROTO(unsigned long id),
+       TP_ARGS(id),
+       TP_STRUCT__entry(
+               __field(unsigned long, id)
+               ),
+
+       TP_fast_assign(
+               __entry->id = id;
+               ),
+
+       TP_printk("ctx.id=0x%lx", __entry->id)
+);
+
  #endif /* _TRACE_POWERPC_H */
  
  #undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h

index bac225b..15bea9a 100644 (file)
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -260,7 +260,7 @@ do {                                                                \
  ({                                                             \
         long __gu_err;                                          \
         __long_type(*(ptr)) __gu_val;                           \
-       const __typeof__(*(ptr)) __user *__gu_addr = (ptr);     \
+       __typeof__(*(ptr)) __user *__gu_addr = (ptr);   \
         __chk_user_ptr(ptr);                                    \
         if (!is_kernel_addr((unsigned long)__gu_addr))          \
                 might_fault();                                  \
@@ -274,7 +274,7 @@ do {                                                                \
  ({                                                                     \
         long __gu_err = -EFAULT;                                        \
         __long_type(*(ptr)) __gu_val = 0;                               \
-       const __typeof__(*(ptr)) __user *__gu_addr = (ptr);             \
+       __typeof__(*(ptr)) __user *__gu_addr = (ptr);           \
         might_fault();                                                  \
         if (access_ok(VERIFY_READ, __gu_addr, (size))) {                \
                 barrier_nospec();                                       \
@@ -288,7 +288,7 @@ do {                                                                \
  ({                                                             \
         long __gu_err;                                          \
         __long_type(*(ptr)) __gu_val;                           \
-       const __typeof__(*(ptr)) __user *__gu_addr = (ptr);     \
+       __typeof__(*(ptr)) __user *__gu_addr = (ptr);   \
         __chk_user_ptr(ptr);                                    \
         barrier_nospec();                                       \
         __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
diff --git a/arch/powerpc/include/asm/user.h b/arch/powerpc/include/asm/user.h

index 5c0e082..99443b8 100644 (file)
--- a/arch/powerpc/include/asm/user.h
+++ b/arch/powerpc/include/asm/user.h
@@ -31,7 +31,7 @@
   *     to write an integer number of pages.
   */
  struct user {
-       struct pt_regs  regs;                   /* entire machine state */
+       struct user_pt_regs regs;               /* entire machine state */
         size_t          u_tsize;                /* text size (pages) */
         size_t          u_dsize;                /* data size (pages) */
         size_t          u_ssize;                /* stack size (pages) */
diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h

index 5e3edc2..f5f1ccc 100644 (file)
--- a/arch/powerpc/include/uapi/asm/ptrace.h
+++ b/arch/powerpc/include/uapi/asm/ptrace.h
@@ -29,7 +29,12 @@
  
  #ifndef __ASSEMBLY__
  
-struct pt_regs {
+#ifdef __KERNEL__
+struct user_pt_regs
+#else
+struct pt_regs
+#endif
+{
         unsigned long gpr[32];
         unsigned long nip;
         unsigned long msr;
@@ -160,6 +165,10 @@ struct pt_regs {
  #define PTRACE_GETVSRREGS      0x1b
  #define PTRACE_SETVSRREGS      0x1c
  
+/* Syscall emulation defines */
+#define PTRACE_SYSEMU                  0x1d
+#define PTRACE_SYSEMU_SINGLESTEP       0x1e
+
  /*
   * Get or set a debug register. The first 16 are DABR registers and the
   * second 16 are IABR registers.
diff --git a/arch/powerpc/include/uapi/asm/sigcontext.h b/arch/powerpc/include/uapi/asm/sigcontext.h

index 2fbe485..630aeda 100644 (file)
--- a/arch/powerpc/include/uapi/asm/sigcontext.h
+++ b/arch/powerpc/include/uapi/asm/sigcontext.h
@@ -22,7 +22,11 @@ struct sigcontext {
  #endif
         unsigned long   handler;
         unsigned long   oldmask;
-       struct pt_regs  __user *regs;
+#ifdef __KERNEL__
+       struct user_pt_regs __user *regs;
+#else
+       struct pt_regs  *regs;
+#endif
  #ifdef __powerpc64__
         elf_gregset_t   gp_regs;
         elf_fpregset_t  fp_regs;
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile

index 3b66f2c..53d4b8d 100644 (file)
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -5,7 +5,8 @@
  
  CFLAGS_ptrace.o                += -DUTS_MACHINE='"$(UTS_MACHINE)"'
  
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+# Disable clang warning for using setjmp without setjmp.h header
+CFLAGS_crash.o         += $(call cc-disable-warning, builtin-requires-header)
  
  ifdef CONFIG_PPC64
  CFLAGS_prom_init.o     += $(NO_MINIMAL_TOC)
@@ -20,12 +21,14 @@ CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
  CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
  CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
  
+CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+
  ifdef CONFIG_FUNCTION_TRACER
  # Do not trace early boot code
-CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cputable.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
  endif
  
  obj-y                          := cputable.o ptrace.o syscalls.o \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c

index d68b9ef..9ffc72d 100644 (file)
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -79,11 +79,16 @@ int main(void)
  {
         OFFSET(THREAD, task_struct, thread);
         OFFSET(MM, task_struct, mm);
+#ifdef CONFIG_STACKPROTECTOR
+       OFFSET(TASK_CANARY, task_struct, stack_canary);
+#ifdef CONFIG_PPC64
+       OFFSET(PACA_CANARY, paca_struct, canary);
+#endif
+#endif
         OFFSET(MMCONTEXTID, mm_struct, context.id);
  #ifdef CONFIG_PPC64
         DEFINE(SIGSEGV, SIGSEGV);
         DEFINE(NMI_MASK, NMI_MASK);
-       OFFSET(TASKTHREADPPR, task_struct, thread.ppr);
  #else
         OFFSET(THREAD_INFO, task_struct, stack);
         DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
@@ -173,7 +178,6 @@ int main(void)
         OFFSET(PACAKSAVE, paca_struct, kstack);
         OFFSET(PACACURRENT, paca_struct, __current);
         OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
-       OFFSET(PACASTABRR, paca_struct, stab_rr);
         OFFSET(PACAR1, paca_struct, saved_r1);
         OFFSET(PACATOC, paca_struct, kernel_toc);
         OFFSET(PACAKBASE, paca_struct, kernelbase);
@@ -212,6 +216,7 @@ int main(void)
  #ifdef CONFIG_PPC_BOOK3S_64
         OFFSET(PACASLBCACHE, paca_struct, slb_cache);
         OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
+       OFFSET(PACASTABRR, paca_struct, stab_rr);
         OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
  #ifdef CONFIG_PPC_MM_SLICES
         OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
@@ -274,11 +279,6 @@ int main(void)
         /* Interrupt register frame */
         DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
         DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
-#ifdef CONFIG_PPC64
-       /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
-       DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-       DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-#endif /* CONFIG_PPC64 */
         STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
         STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
         STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
@@ -322,10 +322,7 @@ int main(void)
         STACK_PT_REGS_OFFSET(_ESR, dsisr);
  #else /* CONFIG_PPC64 */
         STACK_PT_REGS_OFFSET(SOFTE, softe);
-
-       /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
-       DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
-       DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
+       STACK_PT_REGS_OFFSET(_PPR, ppr);
  #endif /* CONFIG_PPC64 */
  
  #if defined(CONFIG_PPC32)
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c

index b2072d5..b4241ed 100644 (file)
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -163,7 +163,7 @@ void btext_map(void)
         offset = ((unsigned long) dispDeviceBase) - base;
         size = dispDeviceRowBytes * dispDeviceRect[3] + offset
                 + dispDeviceRect[0];
-       vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
+       vbase = ioremap_wc(base, size);
         if (!vbase)
                 return;
         logicalDisplayBase = vbase + offset;
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c

index a8f20e5..be57bd0 100644 (file)
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -20,6 +20,8 @@
  #include <linux/percpu.h>
  #include <linux/slab.h>
  #include <asm/prom.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
  
  #include "cacheinfo.h"
  
@@ -627,17 +629,48 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *
  static struct kobj_attribute cache_level_attr =
         __ATTR(level, 0444, level_show, NULL);
  
+static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
+{
+       struct kobject *index_dir_kobj = &index->kobj;
+       struct kobject *cache_dir_kobj = index_dir_kobj->parent;
+       struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
+       struct device *dev = kobj_to_dev(cpu_dev_kobj);
+
+       return dev->id;
+}
+
+/*
+ * On big-core systems, each core has two groups of CPUs each of which
+ * has its own L1-cache. The thread-siblings which share l1-cache with
+ * @cpu can be obtained via cpu_smallcore_mask().
+ */
+static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache)
+{
+       if (cache->level == 1)
+               return cpu_smallcore_mask(cpu);
+
+       return &cache->shared_cpu_map;
+}
+
  static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
  {
         struct cache_index_dir *index;
         struct cache *cache;
-       int ret;
+       const struct cpumask *mask;
+       int ret, cpu;
  
         index = kobj_to_cache_index_dir(k);
         cache = index->cache;
  
+       if (has_big_cores) {
+               cpu = index_dir_to_cpu(index);
+               mask = get_big_core_shared_cpu_map(cpu, cache);
+       } else {
+               mask  = &cache->shared_cpu_map;
+       }
+
         ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n",
-                       cpumask_pr_args(&cache->shared_cpu_map));
+                       cpumask_pr_args(mask));
         buf[ret++] = '\n';
         buf[ret] = '\0';
         return ret;
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c

index d10ad25..bbdc470 100644 (file)
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -110,7 +110,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
                 vaddr = __va(paddr);
                 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
         } else {
-               vaddr = __ioremap(paddr, PAGE_SIZE, 0);
+               vaddr = ioremap_cache(paddr, PAGE_SIZE);
                 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
                 iounmap(vaddr);
         }
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c

index 88f3963..5fc335f 100644 (file)
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -11,7 +11,7 @@
   *
   */
  
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
  #include <linux/memblock.h>
  #include <linux/pfn.h>
  #include <linux/of_platform.h>
@@ -59,7 +59,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
         .sync_single_for_device = swiotlb_sync_single_for_device,
         .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
         .sync_sg_for_device = swiotlb_sync_sg_for_device,
-       .mapping_error = swiotlb_dma_mapping_error,
+       .mapping_error = dma_direct_mapping_error,
         .get_required_mask = swiotlb_powerpc_get_required,
  };
  
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c

index 6ebba3e..6cae6b5 100644 (file)
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -169,6 +169,11 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
         int n = 0, l = 0;
         char buffer[128];
  
+       if (!pdn) {
+               pr_warn("EEH: Note: No error log for absent device.\n");
+               return 0;
+       }
+
         n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
                        pdn->phb->global_number, pdn->busno,
                        PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
@@ -399,7 +404,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
         }
  
         /* Isolate the PHB and send event */
-       eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+       eeh_pe_mark_isolated(phb_pe);
         eeh_serialize_unlock(flags);
  
         pr_err("EEH: PHB#%x failure detected, location: %s\n",
@@ -558,7 +563,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
          * with other functions on this device, and functions under
          * bridges.
          */
-       eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+       eeh_pe_mark_isolated(pe);
         eeh_serialize_unlock(flags);
  
         /* Most EEH events are due to device driver bugs.  Having
@@ -676,7 +681,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
  
         /* Check if the request is finished successfully */
         if (active_flag) {
-               rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+               rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
                 if (rc < 0)
                         return rc;
  
@@ -825,7 +830,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
                 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
                 break;
         case pcie_hot_reset:
-               eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
+               eeh_pe_mark_isolated(pe);
+               eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
                 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
                 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
                 if (!(pe->type & EEH_PE_VF))
@@ -833,7 +839,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
                 eeh_ops->reset(pe, EEH_RESET_HOT);
                 break;
         case pcie_warm_reset:
-               eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
+               eeh_pe_mark_isolated(pe);
+               eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
                 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
                 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
                 if (!(pe->type & EEH_PE_VF))
@@ -913,16 +920,15 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
                         break;
  
                 /* Wait until the PE is in a functioning state */
-               state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-               if (eeh_state_active(state))
-                       break;
-
+               state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
                 if (state < 0) {
                         pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
                                 __func__, pe->phb->global_number, pe->addr);
                         ret = -ENOTRECOVERABLE;
                         break;
                 }
+               if (eeh_state_active(state))
+                       break;
  
                 /* Set error in case this is our last attempt */
                 ret = -EIO;
@@ -1036,6 +1042,11 @@ void eeh_probe_devices(void)
                 pdn = hose->pci_data;
                 traverse_pci_dn(pdn, eeh_ops->probe, NULL);
         }
+       if (eeh_enabled())
+               pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
+       else
+               pr_info("EEH: No capable adapters found\n");
+
  }
  
  /**
@@ -1079,18 +1090,7 @@ static int eeh_init(void)
                 eeh_dev_phb_init_dynamic(hose);
  
         /* Initialize EEH event */
-       ret = eeh_event_init();
-       if (ret)
-               return ret;
-
-       eeh_probe_devices();
-
-       if (eeh_enabled())
-               pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
-       else if (!eeh_has_flag(EEH_POSTPONED_PROBE))
-               pr_info("EEH: No capable adapters found\n");
-
-       return ret;
+       return eeh_event_init();
  }
  
  core_initcall_sync(eeh_init);
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c

index a34e691..d8c90f3 100644 (file)
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -60,8 +60,6 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
         /* Associate EEH device with OF node */
         pdn->edev = edev;
         edev->pdn = pdn;
-       INIT_LIST_HEAD(&edev->list);
-       INIT_LIST_HEAD(&edev->rmv_list);
  
         return edev;
  }
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c

index 67619b4..9446248 100644 (file)
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -35,8 +35,8 @@
  #include <asm/rtas.h>
  
  struct eeh_rmv_data {
-       struct list_head edev_list;
-       int removed;
+       struct list_head removed_vf_list;
+       int removed_dev_count;
  };
  
  static int eeh_result_priority(enum pci_ers_result result)
@@ -281,6 +281,10 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
         struct pci_driver *driver;
         enum pci_ers_result new_result;
  
+       if (!edev->pdev) {
+               eeh_edev_info(edev, "no device");
+               return;
+       }
         device_lock(&edev->pdev->dev);
         if (eeh_edev_actionable(edev)) {
                 driver = eeh_pcid_get(edev->pdev);
@@ -400,7 +404,7 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
          * EEH device is created.
          */
         if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
-               if (list_is_last(&edev->list, &edev->pe->edevs))
+               if (list_is_last(&edev->entry, &edev->pe->edevs))
                         eeh_pe_restore_bars(edev->pe);
  
                 return NULL;
@@ -465,10 +469,9 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
         return rc;
  }
  
-static void *eeh_add_virt_device(void *data, void *userdata)
+static void *eeh_add_virt_device(struct eeh_dev *edev)
  {
         struct pci_driver *driver;
-       struct eeh_dev *edev = (struct eeh_dev *)data;
         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
         struct pci_dn *pdn = eeh_dev_to_pdn(edev);
  
@@ -499,7 +502,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
         struct pci_driver *driver;
         struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
         struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
-       int *removed = rmv_data ? &rmv_data->removed : NULL;
  
         /*
          * Actually, we should remove the PCI bridges as well.
@@ -521,7 +523,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
         if (eeh_dev_removed(edev))
                 return NULL;
  
-       if (removed) {
+       if (rmv_data) {
                 if (eeh_pe_passed(edev->pe))
                         return NULL;
                 driver = eeh_pcid_get(dev);
@@ -539,10 +541,9 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
         /* Remove it from PCI subsystem */
         pr_debug("EEH: Removing %s without EEH sensitive driver\n",
                  pci_name(dev));
-       edev->bus = dev->bus;
         edev->mode |= EEH_DEV_DISCONNECTED;
-       if (removed)
-               (*removed)++;
+       if (rmv_data)
+               rmv_data->removed_dev_count++;
  
         if (edev->physfn) {
  #ifdef CONFIG_PCI_IOV
@@ -558,7 +559,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
                 pdn->pe_number = IODA_INVALID_PE;
  #endif
                 if (rmv_data)
-                       list_add(&edev->rmv_list, &rmv_data->edev_list);
+                       list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
         } else {
                 pci_lock_rescan_remove();
                 pci_stop_and_remove_bus_device(dev);
@@ -727,7 +728,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
          * the device up before the scripts have taken it down,
          * potentially weird things happen.
          */
-       if (!driver_eeh_aware || rmv_data->removed) {
+       if (!driver_eeh_aware || rmv_data->removed_dev_count) {
                 pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
                         (driver_eeh_aware ? "partial" : "complete"));
                 ssleep(5);
@@ -737,10 +738,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
                  * PE. We should disconnect it so the binding can be
                  * rebuilt when adding PCI devices.
                  */
-               edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+               edev = list_first_entry(&pe->edevs, struct eeh_dev, entry);
                 eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
                 if (pe->type & EEH_PE_VF) {
-                       eeh_add_virt_device(edev, NULL);
+                       eeh_add_virt_device(edev);
                 } else {
                         if (!driver_eeh_aware)
                                 eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
@@ -789,7 +790,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
         struct eeh_pe *tmp_pe;
         int rc = 0;
         enum pci_ers_result result = PCI_ERS_RESULT_NONE;
-       struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
+       struct eeh_rmv_data rmv_data =
+               {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
  
         bus = eeh_pe_bus_get(pe);
         if (!bus) {
@@ -806,10 +808,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
                 pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
                        pe->phb->global_number, pe->addr,
                        pe->freeze_count);
-               goto hard_fail;
+               result = PCI_ERS_RESULT_DISCONNECT;
         }
-       pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
-               pe->freeze_count, eeh_max_freezes);
  
         /* Walk the various device drivers attached to this slot through
          * a reset sequence, giving each an opportunity to do what it needs
@@ -821,31 +821,39 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
          * the error. Override the result if necessary to have partially
          * hotplug for this case.
          */
-       pr_info("EEH: Notify device drivers to shutdown\n");
-       eeh_set_channel_state(pe, pci_channel_io_frozen);
-       eeh_set_irq_state(pe, false);
-       eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
-                     &result);
-       if ((pe->type & EEH_PE_PHB) &&
-           result != PCI_ERS_RESULT_NONE &&
-           result != PCI_ERS_RESULT_NEED_RESET)
-               result = PCI_ERS_RESULT_NEED_RESET;
+       if (result != PCI_ERS_RESULT_DISCONNECT) {
+               pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+                       pe->freeze_count, eeh_max_freezes);
+               pr_info("EEH: Notify device drivers to shutdown\n");
+               eeh_set_channel_state(pe, pci_channel_io_frozen);
+               eeh_set_irq_state(pe, false);
+               eeh_pe_report("error_detected(IO frozen)", pe,
+                             eeh_report_error, &result);
+               if ((pe->type & EEH_PE_PHB) &&
+                   result != PCI_ERS_RESULT_NONE &&
+                   result != PCI_ERS_RESULT_NEED_RESET)
+                       result = PCI_ERS_RESULT_NEED_RESET;
+       }
  
         /* Get the current PCI slot state. This can take a long time,
          * sometimes over 300 seconds for certain systems.
          */
-       rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
-       if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
-               pr_warn("EEH: Permanent failure\n");
-               goto hard_fail;
+       if (result != PCI_ERS_RESULT_DISCONNECT) {
+               rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
+               if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+                       pr_warn("EEH: Permanent failure\n");
+                       result = PCI_ERS_RESULT_DISCONNECT;
+               }
         }
  
         /* Since rtas may enable MMIO when posting the error log,
          * don't post the error log until after all dev drivers
          * have been informed.
          */
-       pr_info("EEH: Collect temporary log\n");
-       eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+       if (result != PCI_ERS_RESULT_DISCONNECT) {
+               pr_info("EEH: Collect temporary log\n");
+               eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+       }
  
         /* If all device drivers were EEH-unaware, then shut
          * down all of the device drivers, and hope they
@@ -857,7 +865,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
                 if (rc) {
                         pr_warn("%s: Unable to reset, err=%d\n",
                                 __func__, rc);
-                       goto hard_fail;
+                       result = PCI_ERS_RESULT_DISCONNECT;
                 }
         }
  
@@ -866,9 +874,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
                 pr_info("EEH: Enable I/O for affected devices\n");
                 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
  
-               if (rc < 0)
-                       goto hard_fail;
-               if (rc) {
+               if (rc < 0) {
+                       result = PCI_ERS_RESULT_DISCONNECT;
+               } else if (rc) {
                         result = PCI_ERS_RESULT_NEED_RESET;
                 } else {
                         pr_info("EEH: Notify device drivers to resume I/O\n");
@@ -882,9 +890,9 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
                 pr_info("EEH: Enabled DMA for affected devices\n");
                 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
  
-               if (rc < 0)
-                       goto hard_fail;
-               if (rc) {
+               if (rc < 0) {
+                       result = PCI_ERS_RESULT_DISCONNECT;
+               } else if (rc) {
                         result = PCI_ERS_RESULT_NEED_RESET;
                 } else {
                         /*
@@ -897,12 +905,6 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
                 }
         }
  
-       /* If any device has a hard failure, then shut off everything. */
-       if (result == PCI_ERS_RESULT_DISCONNECT) {
-               pr_warn("EEH: Device driver gave up\n");
-               goto hard_fail;
-       }
-
         /* If any device called out for a reset, then reset the slot */
         if (result == PCI_ERS_RESULT_NEED_RESET) {
                 pr_info("EEH: Reset without hotplug activity\n");
@@ -910,88 +912,81 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
                 if (rc) {
                         pr_warn("%s: Cannot reset, err=%d\n",
                                 __func__, rc);
-                       goto hard_fail;
+                       result = PCI_ERS_RESULT_DISCONNECT;
+               } else {
+                       result = PCI_ERS_RESULT_NONE;
+                       eeh_set_channel_state(pe, pci_channel_io_normal);
+                       eeh_set_irq_state(pe, true);
+                       eeh_pe_report("slot_reset", pe, eeh_report_reset,
+                                     &result);
                 }
-
-               pr_info("EEH: Notify device drivers "
-                       "the completion of reset\n");
-               result = PCI_ERS_RESULT_NONE;
-               eeh_set_channel_state(pe, pci_channel_io_normal);
-               eeh_set_irq_state(pe, true);
-               eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
-       }
-
-       /* All devices should claim they have recovered by now. */
-       if ((result != PCI_ERS_RESULT_RECOVERED) &&
-           (result != PCI_ERS_RESULT_NONE)) {
-               pr_warn("EEH: Not recovered\n");
-               goto hard_fail;
-       }
-
-       /*
-        * For those hot removed VFs, we should add back them after PF get
-        * recovered properly.
-        */
-       list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
-               eeh_add_virt_device(edev, NULL);
-               list_del(&edev->rmv_list);
         }
  
-       /* Tell all device drivers that they can resume operations */
-       pr_info("EEH: Notify device driver to resume\n");
-       eeh_set_channel_state(pe, pci_channel_io_normal);
-       eeh_set_irq_state(pe, true);
-       eeh_pe_report("resume", pe, eeh_report_resume, NULL);
-       eeh_for_each_pe(pe, tmp_pe) {
-               eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
-                       edev->mode &= ~EEH_DEV_NO_HANDLER;
-                       edev->in_error = false;
+       if ((result == PCI_ERS_RESULT_RECOVERED) ||
+           (result == PCI_ERS_RESULT_NONE)) {
+               /*
+                * For those hot removed VFs, we should add back them after PF
+                * get recovered properly.
+                */
+               list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
+                                        rmv_entry) {
+                       eeh_add_virt_device(edev);
+                       list_del(&edev->rmv_entry);
                 }
-       }
  
-       pr_info("EEH: Recovery successful.\n");
-       goto final;
+               /* Tell all device drivers that they can resume operations */
+               pr_info("EEH: Notify device driver to resume\n");
+               eeh_set_channel_state(pe, pci_channel_io_normal);
+               eeh_set_irq_state(pe, true);
+               eeh_pe_report("resume", pe, eeh_report_resume, NULL);
+               eeh_for_each_pe(pe, tmp_pe) {
+                       eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
+                               edev->mode &= ~EEH_DEV_NO_HANDLER;
+                               edev->in_error = false;
+                       }
+               }
  
-hard_fail:
-       /*
-        * About 90% of all real-life EEH failures in the field
-        * are due to poorly seated PCI cards. Only 10% or so are
-        * due to actual, failed cards.
-        */
-       pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
-              "Please try reseating or replacing it\n",
-               pe->phb->global_number, pe->addr);
+               pr_info("EEH: Recovery successful.\n");
+       } else  {
+               /*
+                * About 90% of all real-life EEH failures in the field
+                * are due to poorly seated PCI cards. Only 10% or so are
+                * due to actual, failed cards.
+                */
+               pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+                      "Please try reseating or replacing it\n",
+                       pe->phb->global_number, pe->addr);
  
-       eeh_slot_error_detail(pe, EEH_LOG_PERM);
+               eeh_slot_error_detail(pe, EEH_LOG_PERM);
  
-       /* Notify all devices that they're about to go down. */
-       eeh_set_channel_state(pe, pci_channel_io_perm_failure);
-       eeh_set_irq_state(pe, false);
-       eeh_pe_report("error_detected(permanent failure)", pe,
-                     eeh_report_failure, NULL);
+               /* Notify all devices that they're about to go down. */
+               eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+               eeh_set_irq_state(pe, false);
+               eeh_pe_report("error_detected(permanent failure)", pe,
+                             eeh_report_failure, NULL);
  
-       /* Mark the PE to be removed permanently */
-       eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+               /* Mark the PE to be removed permanently */
+               eeh_pe_state_mark(pe, EEH_PE_REMOVED);
  
-       /*
-        * Shut down the device drivers for good. We mark
-        * all removed devices correctly to avoid access
-        * the their PCI config any more.
-        */
-       if (pe->type & EEH_PE_VF) {
-               eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
-               eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-       } else {
-               eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
-               eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+               /*
+                * Shut down the device drivers for good. We mark
+                * all removed devices correctly to avoid access
+                * the their PCI config any more.
+                */
+               if (pe->type & EEH_PE_VF) {
+                       eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+                       eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+               } else {
+                       eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+                       eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
  
-               pci_lock_rescan_remove();
-               pci_hp_remove_devices(bus);
-               pci_unlock_rescan_remove();
-               /* The passed PE should no longer be used */
-               return;
+                       pci_lock_rescan_remove();
+                       pci_hp_remove_devices(bus);
+                       pci_unlock_rescan_remove();
+                       /* The passed PE should no longer be used */
+                       return;
+               }
         }
-final:
         eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
  }
  
@@ -1026,7 +1021,7 @@ void eeh_handle_special_event(void)
                                 phb_pe = eeh_phb_pe_get(hose);
                                 if (!phb_pe) continue;
  
-                               eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+                               eeh_pe_mark_isolated(phb_pe);
                         }
  
                         eeh_serialize_unlock(flags);
@@ -1041,11 +1036,9 @@ void eeh_handle_special_event(void)
                         /* Purge all events of the PHB */
                         eeh_remove_event(pe, true);
  
-                       if (rc == EEH_NEXT_ERR_DEAD_PHB)
-                               eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-                       else
-                               eeh_pe_state_mark(pe,
-                                       EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+                       if (rc != EEH_NEXT_ERR_DEAD_PHB)
+                               eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+                       eeh_pe_mark_isolated(pe);
  
                         eeh_serialize_unlock(flags);
  
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c

index 1b238ec..6fa2032 100644 (file)
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -75,7 +75,6 @@ static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
         pe->type = type;
         pe->phb = phb;
         INIT_LIST_HEAD(&pe->child_list);
-       INIT_LIST_HEAD(&pe->child);
         INIT_LIST_HEAD(&pe->edevs);
  
         pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
@@ -109,6 +108,57 @@ int eeh_phb_pe_create(struct pci_controller *phb)
         return 0;
  }
  
+/**
+ * eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in millisecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+int eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+       int ret;
+       int mwait;
+
+       /*
+        * According to PAPR, the state of PE might be temporarily
+        * unavailable. Under the circumstance, we have to wait
+        * for indicated time determined by firmware. The maximal
+        * wait time is 5 minutes, which is acquired from the original
+        * EEH implementation. Also, the original implementation
+        * also defined the minimal wait time as 1 second.
+        */
+#define EEH_STATE_MIN_WAIT_TIME        (1000)
+#define EEH_STATE_MAX_WAIT_TIME        (300 * 1000)
+
+       while (1) {
+               ret = eeh_ops->get_state(pe, &mwait);
+
+               if (ret != EEH_STATE_UNAVAILABLE)
+                       return ret;
+
+               if (max_wait <= 0) {
+                       pr_warn("%s: Timeout when getting PE's state (%d)\n",
+                               __func__, max_wait);
+                       return EEH_STATE_NOT_SUPPORT;
+               }
+
+               if (mwait < EEH_STATE_MIN_WAIT_TIME) {
+                       pr_warn("%s: Firmware returned bad wait value %d\n",
+                               __func__, mwait);
+                       mwait = EEH_STATE_MIN_WAIT_TIME;
+               } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
+                       pr_warn("%s: Firmware returned too long wait value %d\n",
+                               __func__, mwait);
+                       mwait = EEH_STATE_MAX_WAIT_TIME;
+               }
+
+               msleep(min(mwait, max_wait));
+               max_wait -= mwait;
+       }
+}
+
  /**
   * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
   * @phb: PCI controller
@@ -360,7 +410,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
                 edev->pe = pe;
  
                 /* Put the edev to PE */
-               list_add_tail(&edev->list, &pe->edevs);
+               list_add_tail(&edev->entry, &pe->edevs);
                 pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
                          pdn->phb->global_number,
                          pdn->busno,
@@ -369,7 +419,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
                          pe->addr);
                 return 0;
         } else if (pe && (pe->type & EEH_PE_INVALID)) {
-               list_add_tail(&edev->list, &pe->edevs);
+               list_add_tail(&edev->entry, &pe->edevs);
                 edev->pe = pe;
                 /*
                  * We're running to here because of PCI hotplug caused by
@@ -379,7 +429,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
                 while (parent) {
                         if (!(parent->type & EEH_PE_INVALID))
                                 break;
-                       parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
+                       parent->type &= ~EEH_PE_INVALID;
                         parent = parent->parent;
                 }
  
@@ -429,7 +479,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
          * link the EEH device accordingly.
          */
         list_add_tail(&pe->child, &parent->child_list);
-       list_add_tail(&edev->list, &pe->edevs);
+       list_add_tail(&edev->entry, &pe->edevs);
         edev->pe = pe;
         pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
                  "Device PE#%x, Parent PE#%x\n",
@@ -457,7 +507,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
         int cnt;
         struct pci_dn *pdn = eeh_dev_to_pdn(edev);
  
-       if (!edev->pe) {
+       pe = eeh_dev_to_pe(edev);
+       if (!pe) {
                 pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
                          __func__,  pdn->phb->global_number,
                          pdn->busno,
@@ -467,9 +518,8 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
         }
  
         /* Remove the EEH device */
-       pe = eeh_dev_to_pe(edev);
         edev->pe = NULL;
-       list_del(&edev->list);
+       list_del(&edev->entry);
  
         /*
          * Check if the parent PE includes any EEH devices.
@@ -541,56 +591,50 @@ void eeh_pe_update_time_stamp(struct eeh_pe *pe)
  }
  
  /**
- * __eeh_pe_state_mark - Mark the state for the PE
- * @data: EEH PE
- * @flag: state
+ * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * @pe: EEH PE
   *
- * The function is used to mark the indicated state for the given
- * PE. Also, the associated PCI devices will be put into IO frozen
- * state as well.
+ * EEH error affects the current PE and its child PEs. The function
+ * is used to mark appropriate state for the affected PEs and the
+ * associated devices.
   */
-static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag)
+void eeh_pe_state_mark(struct eeh_pe *root, int state)
  {
-       int state = *((int *)flag);
-       struct eeh_dev *edev, *tmp;
-       struct pci_dev *pdev;
-
-       /* Keep the state of permanently removed PE intact */
-       if (pe->state & EEH_PE_REMOVED)
-               return NULL;
-
-       pe->state |= state;
-
-       /* Offline PCI devices if applicable */
-       if (!(state & EEH_PE_ISOLATED))
-               return NULL;
-
-       eeh_pe_for_each_dev(pe, edev, tmp) {
-               pdev = eeh_dev_to_pci_dev(edev);
-               if (pdev)
-                       pdev->error_state = pci_channel_io_frozen;
-       }
-
-       /* Block PCI config access if required */
-       if (pe->state & EEH_PE_CFG_RESTRICTED)
-               pe->state |= EEH_PE_CFG_BLOCKED;
+       struct eeh_pe *pe;
  
-       return NULL;
+       eeh_for_each_pe(root, pe)
+               if (!(pe->state & EEH_PE_REMOVED))
+                       pe->state |= state;
  }
+EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
  
  /**
- * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * eeh_pe_mark_isolated
   * @pe: EEH PE
   *
- * EEH error affects the current PE and its child PEs. The function
- * is used to mark appropriate state for the affected PEs and the
- * associated devices.
+ * Record that a PE has been isolated by marking the PE and it's children as
+ * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices
+ * as pci_channel_io_frozen.
   */
-void eeh_pe_state_mark(struct eeh_pe *pe, int state)
+void eeh_pe_mark_isolated(struct eeh_pe *root)
  {
-       eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+       struct eeh_pe *pe;
+       struct eeh_dev *edev;
+       struct pci_dev *pdev;
+
+       eeh_pe_state_mark(root, EEH_PE_ISOLATED);
+       eeh_for_each_pe(root, pe) {
+               list_for_each_entry(edev, &pe->edevs, entry) {
+                       pdev = eeh_dev_to_pci_dev(edev);
+                       if (pdev)
+                               pdev->error_state = pci_channel_io_frozen;
+               }
+               /* Block PCI config access if required */
+               if (pe->state & EEH_PE_CFG_RESTRICTED)
+                       pe->state |= EEH_PE_CFG_BLOCKED;
+       }
  }
-EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
+EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
  
  static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
  {
@@ -671,28 +715,6 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state)
         eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
  }
  
-/**
- * eeh_pe_state_mark_with_cfg - Mark PE state with unblocked config space
- * @pe: PE
- * @state: PE state to be set
- *
- * Set specified flag to PE and its child PEs. The PCI config space
- * of some PEs is blocked automatically when EEH_PE_ISOLATED is set,
- * which isn't needed in some situations. The function allows to set
- * the specified flag to indicated PEs without blocking their PCI
- * config space.
- */
-void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state)
-{
-       eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
-       if (!(state & EEH_PE_ISOLATED))
-               return;
-
-       /* Clear EEH_PE_CFG_BLOCKED, which might be set just now */
-       state = EEH_PE_CFG_BLOCKED;
-       eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
-}
-
  /*
   * Some PCI bridges (e.g. PLX bridges) have primary/secondary
   * buses assigned explicitly by firmware, and we probably have
@@ -945,7 +967,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
                 return pe->bus;
  
         /* Retrieve the parent PCI bus of first (top) PCI device */
-       edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+       edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
         pdev = eeh_dev_to_pci_dev(edev);
         if (pdev)
                 return pdev->bus;
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S

index e58c3f4..77decde 100644 (file)
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -794,7 +794,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
         lis     r10,MSR_KERNEL@h
         ori     r10,r10,MSR_KERNEL@l
         bl      transfer_to_handler_full
-       .long   nonrecoverable_exception
+       .long   unrecoverable_exception
         .long   ret_from_except
  #endif
  
@@ -1297,7 +1297,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
         rlwinm  r3,r3,0,0,30
         stw     r3,_TRAP(r1)
  4:     addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      nonrecoverable_exception
+       bl      unrecoverable_exception
         /* shouldn't return */
         b       4b
  
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S

index 2206912..7b1693a 100644 (file)
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -171,7 +171,7 @@ system_call:                        /* label this so stack traces look sane */
   * based on caller's run-mode / personality.
   */
         ld      r11,SYS_CALL_TABLE@toc(2)
-       andi.   r10,r10,_TIF_32BIT
+       andis.  r10,r10,_TIF_32BIT@h
         beq     15f
         addi    r11,r11,8       /* use 32-bit syscall entries */
         clrldi  r3,r3,32
@@ -386,10 +386,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
  
  4:     /* Anything else left to do? */
  BEGIN_FTR_SECTION
-       lis     r3,INIT_PPR@highest     /* Set thread.ppr = 3 */
-       ld      r10,PACACURRENT(r13)
+       lis     r3,DEFAULT_PPR@highest  /* Set default PPR */
         sldi    r3,r3,32        /* bits 11-13 are used for ppr */
-       std     r3,TASKTHREADPPR(r10)
+       std     r3,_PPR(r1)
  END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
  
         andi.   r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
@@ -624,6 +623,10 @@ _GLOBAL(_switch)
  
         addi    r6,r4,-THREAD   /* Convert THREAD to 'current' */
         std     r6,PACACURRENT(r13)     /* Set new 'current' */
+#if defined(CONFIG_STACKPROTECTOR)
+       ld      r6, TASK_CANARY(r6)
+       std     r6, PACA_CANARY(r13)
+#endif
  
         ld      r8,KSP(r4)      /* new stack pointer */
  #ifdef CONFIG_PPC_BOOK3S_64
@@ -672,7 +675,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
  
         isync
         slbie   r6
+BEGIN_FTR_SECTION
         slbie   r6              /* Workaround POWER5 < DD2.1 issue */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
         slbmte  r7,r0
         isync
  2:
@@ -936,12 +941,6 @@ fast_exception_return:
         andi.   r0,r3,MSR_RI
         beq-    .Lunrecov_restore
  
-       /* Load PPR from thread struct before we clear MSR:RI */
-BEGIN_FTR_SECTION
-       ld      r2,PACACURRENT(r13)
-       ld      r2,TASKTHREADPPR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
         /*
          * Clear RI before restoring r13.  If we are returning to
          * userspace and we take an exception after restoring r13,
@@ -962,7 +961,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
         andi.   r0,r3,MSR_PR
         beq     1f
  BEGIN_FTR_SECTION
-       mtspr   SPRN_PPR,r2     /* Restore PPR */
+       /* Restore PPR */
+       ld      r2,_PPR(r1)
+       mtspr   SPRN_PPR,r2
  END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
         ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
         REST_GPR(13, r1)
@@ -1118,7 +1119,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return);
  _GLOBAL(enter_rtas)
         mflr    r0
         std     r0,16(r1)
-        stdu   r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */
+        stdu   r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
  
         /* Because RTAS is running in 32b mode, it clobbers the high order half
          * of all registers that it saves.  We therefore save those registers
@@ -1250,7 +1251,7 @@ rtas_restore_regs:
         ld      r8,_DSISR(r1)
         mtdsisr r8
  
-        addi   r1,r1,RTAS_FRAME_SIZE   /* Unstack our frame */
+        addi   r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
         ld      r0,16(r1)               /* get return address */
  
         mtlr    r0
@@ -1261,7 +1262,7 @@ rtas_restore_regs:
  _GLOBAL(enter_prom)
         mflr    r0
         std     r0,16(r1)
-        stdu   r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+        stdu   r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
  
         /* Because PROM is running in 32b mode, it clobbers the high order half
          * of all registers that it saves.  We therefore save those registers
@@ -1318,8 +1319,8 @@ _GLOBAL(enter_prom)
         REST_10GPRS(22, r1)
         ld      r4,_CCR(r1)
         mtcr    r4
-       
-        addi   r1,r1,PROM_FRAME_SIZE
+
+        addi   r1,r1,SWITCH_FRAME_SIZE
         ld      r0,16(r1)
         mtlr    r0
          blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S

index 2d8fc8c..89d32bb 100644 (file)
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -244,14 +244,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
         SET_SCRATCH0(r13)               /* save r13 */
         EXCEPTION_PROLOG_0(PACA_EXMC)
  BEGIN_FTR_SECTION
-       b       machine_check_powernv_early
+       b       machine_check_common_early
  FTR_SECTION_ELSE
         b       machine_check_pSeries_0
  ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
  EXC_REAL_END(machine_check, 0x200, 0x100)
  EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_powernv_early)
-BEGIN_FTR_SECTION
+TRAMP_REAL_BEGIN(machine_check_common_early)
         EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
         /*
          * Register contents:
@@ -305,7 +304,9 @@ BEGIN_FTR_SECTION
         /* Save r9 through r13 from EXMC save area to stack frame. */
         EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
         mfmsr   r11                     /* get MSR value */
+BEGIN_FTR_SECTION
         ori     r11,r11,MSR_ME          /* turn on ME bit */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
         ori     r11,r11,MSR_RI          /* turn on RI bit */
         LOAD_HANDLER(r12, machine_check_handle_early)
  1:     mtspr   SPRN_SRR0,r12
@@ -324,13 +325,15 @@ BEGIN_FTR_SECTION
         andc    r11,r11,r10             /* Turn off MSR_ME */
         b       1b
         b       .       /* prevent speculative execution */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
  
  TRAMP_REAL_BEGIN(machine_check_pSeries)
         .globl machine_check_fwnmi
  machine_check_fwnmi:
         SET_SCRATCH0(r13)               /* save r13 */
         EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+       b       machine_check_common_early
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
  machine_check_pSeries_0:
         EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
         /*
@@ -440,6 +443,9 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
         bl      machine_check_early
         std     r3,RESULT(r1)   /* Save result */
         ld      r12,_MSR(r1)
+BEGIN_FTR_SECTION
+       b       4f
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
  
  #ifdef CONFIG_PPC_P7_NAP
         /*
@@ -463,11 +469,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
          */
         rldicl. r11,r12,4,63            /* See if MC hit while in HV mode. */
         beq     5f
-       andi.   r11,r12,MSR_PR          /* See if coming from user. */
+4:     andi.   r11,r12,MSR_PR          /* See if coming from user. */
         bne     9f                      /* continue in V mode if we are. */
  
  5:
  #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+BEGIN_FTR_SECTION
         /*
          * We are coming from kernel context. Check if we are coming from
          * guest. if yes, then we can continue. We will fall through
@@ -476,6 +483,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
         lbz     r11,HSTATE_IN_GUEST(r13)
         cmpwi   r11,0                   /* Check if coming from guest */
         bne     9f                      /* continue if we are. */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
  #endif
         /*
          * At this point we are not sure about what context we come from.
@@ -510,6 +518,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
         cmpdi   r3,0            /* see if we handled MCE successfully */
  
         beq     1b              /* if !handled then panic */
+BEGIN_FTR_SECTION
         /*
          * Return from MC interrupt.
          * Queue up the MCE event so that we can log it later, while
@@ -518,10 +527,24 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
         bl      machine_check_queue_event
         MACHINE_CHECK_HANDLER_WINDUP
         RFI_TO_USER_OR_KERNEL
+FTR_SECTION_ELSE
+       /*
+        * pSeries: Return from MC interrupt. Before that stay on emergency
+        * stack and call machine_check_exception to log the MCE event.
+        */
+       LOAD_HANDLER(r10,mce_return)
+       mtspr   SPRN_SRR0,r10
+       ld      r10,PACAKMSR(r13)
+       mtspr   SPRN_SRR1,r10
+       RFI_TO_KERNEL
+       b       .
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
  9:
         /* Deliver the machine check to host kernel in V mode. */
         MACHINE_CHECK_HANDLER_WINDUP
-       b       machine_check_pSeries
+       SET_SCRATCH0(r13)               /* save r13 */
+       EXCEPTION_PROLOG_0(PACA_EXMC)
+       b       machine_check_pSeries_0
  
  EXC_COMMON_BEGIN(unrecover_mce)
         /* Invoke machine_check_exception to print MCE event and panic. */
@@ -535,6 +558,13 @@ EXC_COMMON_BEGIN(unrecover_mce)
         bl      unrecoverable_exception
         b       1b
  
+EXC_COMMON_BEGIN(mce_return)
+       /* Invoke machine_check_exception to print MCE event and return. */
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      machine_check_exception
+       MACHINE_CHECK_HANDLER_WINDUP
+       RFI_TO_KERNEL
+       b       .
  
  EXC_REAL(data_access, 0x300, 0x80)
  EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
@@ -566,28 +596,36 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
  
  
  EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
-       SET_SCRATCH0(r13)
-       EXCEPTION_PROLOG_0(PACA_EXSLB)
-       EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
-       mr      r12,r3  /* save r3 */
-       mfspr   r3,SPRN_DAR
-       mfspr   r11,SPRN_SRR1
-       crset   4*cr6+eq
-       BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380);
  EXC_REAL_END(data_access_slb, 0x380, 0x80)
  
  EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
-       SET_SCRATCH0(r13)
-       EXCEPTION_PROLOG_0(PACA_EXSLB)
-       EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
-       mr      r12,r3  /* save r3 */
-       mfspr   r3,SPRN_DAR
-       mfspr   r11,SPRN_SRR1
-       crset   4*cr6+eq
-       BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380);
  EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
+
  TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
  
+EXC_COMMON_BEGIN(data_access_slb_common)
+       mfspr   r10,SPRN_DAR
+       std     r10,PACA_EXSLB+EX_DAR(r13)
+       EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
+       ld      r4,PACA_EXSLB+EX_DAR(r13)
+       std     r4,_DAR(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      do_slb_fault
+       cmpdi   r3,0
+       bne-    1f
+       b       fast_exception_return
+1:     /* Error case */
+       std     r3,RESULT(r1)
+       bl      save_nvgprs
+       RECONCILE_IRQ_STATE(r10, r11)
+       ld      r4,_DAR(r1)
+       ld      r5,RESULT(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      do_bad_slb_fault
+       b       ret_from_except
+
  
  EXC_REAL(instruction_access, 0x400, 0x80)
  EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
@@ -610,160 +648,34 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
  
  
  EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
-       SET_SCRATCH0(r13)
-       EXCEPTION_PROLOG_0(PACA_EXSLB)
-       EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
-       mr      r12,r3  /* save r3 */
-       mfspr   r3,SPRN_SRR0            /* SRR0 is faulting address */
-       mfspr   r11,SPRN_SRR1
-       crclr   4*cr6+eq
-       BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480);
  EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
  
  EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
-       SET_SCRATCH0(r13)
-       EXCEPTION_PROLOG_0(PACA_EXSLB)
-       EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
-       mr      r12,r3  /* save r3 */
-       mfspr   r3,SPRN_SRR0            /* SRR0 is faulting address */
-       mfspr   r11,SPRN_SRR1
-       crclr   4*cr6+eq
-       BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480);
  EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-TRAMP_KVM(PACA_EXSLB, 0x480)
-
-
-/*
- * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as
- * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled.
- */
-EXC_COMMON_BEGIN(slb_miss_common)
-       /*
-        * r13 points to the PACA, r9 contains the saved CR,
-        * r12 contains the saved r3,
-        * r11 contain the saved SRR1, SRR0 is still ready for return
-        * r3 has the faulting address
-        * r9 - r13 are saved in paca->exslb.
-        * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
-        * We assume we aren't going to take any exceptions during this
-        * procedure.
-        */
-       mflr    r10
-       stw     r9,PACA_EXSLB+EX_CCR(r13)       /* save CR in exc. frame */
-       std     r10,PACA_EXSLB+EX_LR(r13)       /* save LR */
-
-       andi.   r9,r11,MSR_PR   // Check for exception from userspace
-       cmpdi   cr4,r9,MSR_PR   // And save the result in CR4 for later
-
-       /*
-        * Test MSR_RI before calling slb_allocate_realmode, because the
-        * MSR in r11 gets clobbered. However we still want to allocate
-        * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
-        * recursive SLB faults. So use cr5 for this, which is preserved.
-        */
-       andi.   r11,r11,MSR_RI  /* check for unrecoverable exception */
-       cmpdi   cr5,r11,MSR_RI
-
-       crset   4*cr0+eq
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_MMU_FTR_SECTION
-       bl      slb_allocate
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
-#endif
-
-       ld      r10,PACA_EXSLB+EX_LR(r13)
-       lwz     r9,PACA_EXSLB+EX_CCR(r13)       /* get saved CR */
-       mtlr    r10
-
-       /*
-        * Large address, check whether we have to allocate new contexts.
-        */
-       beq-    8f
  
-       bne-    cr5,2f          /* if unrecoverable exception, oops */
-
-       /* All done -- return from exception. */
-
-       bne     cr4,1f          /* returning to kernel */
-
-       mtcrf   0x80,r9
-       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
-       mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
-       mtcrf   0x02,r9         /* I/D indication is in cr6 */
-       mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
-
-       RESTORE_CTR(r9, PACA_EXSLB)
-       RESTORE_PPR_PACA(PACA_EXSLB, r9)
-       mr      r3,r12
-       ld      r9,PACA_EXSLB+EX_R9(r13)
-       ld      r10,PACA_EXSLB+EX_R10(r13)
-       ld      r11,PACA_EXSLB+EX_R11(r13)
-       ld      r12,PACA_EXSLB+EX_R12(r13)
-       ld      r13,PACA_EXSLB+EX_R13(r13)
-       RFI_TO_USER
-       b       .       /* prevent speculative execution */
-1:
-       mtcrf   0x80,r9
-       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
-       mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
-       mtcrf   0x02,r9         /* I/D indication is in cr6 */
-       mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
-
-       RESTORE_CTR(r9, PACA_EXSLB)
-       RESTORE_PPR_PACA(PACA_EXSLB, r9)
-       mr      r3,r12
-       ld      r9,PACA_EXSLB+EX_R9(r13)
-       ld      r10,PACA_EXSLB+EX_R10(r13)
-       ld      r11,PACA_EXSLB+EX_R11(r13)
-       ld      r12,PACA_EXSLB+EX_R12(r13)
-       ld      r13,PACA_EXSLB+EX_R13(r13)
-       RFI_TO_KERNEL
-       b       .       /* prevent speculative execution */
-
-
-2:     std     r3,PACA_EXSLB+EX_DAR(r13)
-       mr      r3,r12
-       mfspr   r11,SPRN_SRR0
-       mfspr   r12,SPRN_SRR1
-       LOAD_HANDLER(r10,unrecov_slb)
-       mtspr   SPRN_SRR0,r10
-       ld      r10,PACAKMSR(r13)
-       mtspr   SPRN_SRR1,r10
-       RFI_TO_KERNEL
-       b       .
-
-8:     std     r3,PACA_EXSLB+EX_DAR(r13)
-       mr      r3,r12
-       mfspr   r11,SPRN_SRR0
-       mfspr   r12,SPRN_SRR1
-       LOAD_HANDLER(r10, large_addr_slb)
-       mtspr   SPRN_SRR0,r10
-       ld      r10,PACAKMSR(r13)
-       mtspr   SPRN_SRR1,r10
-       RFI_TO_KERNEL
-       b       .
+TRAMP_KVM(PACA_EXSLB, 0x480)
  
-EXC_COMMON_BEGIN(unrecov_slb)
-       EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
-       RECONCILE_IRQ_STATE(r10, r11)
+EXC_COMMON_BEGIN(instruction_access_slb_common)
+       EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
+       ld      r4,_NIP(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      do_slb_fault
+       cmpdi   r3,0
+       bne-    1f
+       b       fast_exception_return
+1:     /* Error case */
+       std     r3,RESULT(r1)
         bl      save_nvgprs
-1:     addi    r3,r1,STACK_FRAME_OVERHEAD
-       bl      unrecoverable_exception
-       b       1b
-
-EXC_COMMON_BEGIN(large_addr_slb)
-       EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
         RECONCILE_IRQ_STATE(r10, r11)
-       ld      r3, PACA_EXSLB+EX_DAR(r13)
-       std     r3, _DAR(r1)
-       beq     cr6, 2f
-       li      r10, 0x481              /* fix trap number for I-SLB miss */
-       std     r10, _TRAP(r1)
-2:     bl      save_nvgprs
-       addi    r3, r1, STACK_FRAME_OVERHEAD
-       bl      slb_miss_large_addr
+       ld      r4,_NIP(r1)
+       ld      r5,RESULT(r1)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
+       bl      do_bad_slb_fault
         b       ret_from_except
  
+
  EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
         .globl hardware_interrupt_hv;
  hardware_interrupt_hv:
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c

index a711d22..761b28b 100644 (file)
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1444,8 +1444,8 @@ static ssize_t fadump_register_store(struct kobject *kobj,
                 break;
         case 1:
                 if (fw_dump.dump_registered == 1) {
-                       ret = -EEXIST;
-                       goto unlock_out;
+                       /* Un-register Firmware-assisted dump */
+                       fadump_unregister_dump(&fdm);
                 }
                 /* Register Firmware-assisted dump */
                 ret = register_fadump();
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S

index 6582f82..134a573 100644 (file)
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -642,7 +642,7 @@ DTLBMissIMMR:
         mtspr   SPRN_MD_TWC, r10
         mfspr   r10, SPRN_IMMR                  /* Get current IMMR */
         rlwinm  r10, r10, 0, 0xfff80000         /* Get 512 kbytes boundary */
-       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
                           _PAGE_PRESENT | _PAGE_NO_CACHE
         mtspr   SPRN_MD_RPN, r10        /* Update TLB entry */
  
@@ -660,7 +660,7 @@ DTLBMissLinear:
         li      r11, MD_PS8MEG | MD_SVALID | M_APG2
         mtspr   SPRN_MD_TWC, r11
         rlwinm  r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
-       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
                           _PAGE_PRESENT
         mtspr   SPRN_MD_RPN, r10        /* Update TLB entry */
  
@@ -679,7 +679,7 @@ ITLBMissLinear:
         li      r11, MI_PS8MEG | MI_SVALID | M_APG2
         mtspr   SPRN_MI_TWC, r11
         rlwinm  r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
-       ori     r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+       ori     r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
                           _PAGE_PRESENT
         mtspr   SPRN_MI_RPN, r10        /* Update TLB entry */
  
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c

index aa9f1b8..7e89d02 100644 (file)
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -153,10 +153,10 @@ static const struct ppc_pci_io iowa_pci_io = {
  
  #ifdef CONFIG_PPC_INDIRECT_MMIO
  static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
-                                 unsigned long flags, void *caller)
+                                 pgprot_t prot, void *caller)
  {
         struct iowa_bus *bus;
-       void __iomem *res = __ioremap_caller(addr, size, flags, caller);
+       void __iomem *res = __ioremap_caller(addr, size, prot, caller);
         int busno;
  
         bus = iowa_pci_find(0, (unsigned long)addr);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c

index 19b4c62..f0dc680 100644 (file)
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -785,9 +785,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
  
         vaddr = page_address(page) + offset;
         uaddr = (unsigned long)vaddr;
-       npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
  
         if (tbl) {
+               npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
                 align = 0;
                 if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
                     ((unsigned long)vaddr & ~PAGE_MASK) == 0)
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c

index 1df6c74..fda3ae4 100644 (file)
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -110,14 +110,14 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
                 size = 0x10000;
  
         __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
-                    size, pgprot_val(pgprot_noncached(__pgprot(0))));
+                    size, pgprot_noncached(PAGE_KERNEL));
         return;
  
  inval_range:
         printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
                "mapping 64k\n");
         __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
-                    0x10000, pgprot_val(pgprot_noncached(__pgprot(0))));
+                    0x10000, pgprot_noncached(PAGE_KERNEL));
  }
  
  
@@ -253,7 +253,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
          */
         isa_io_base = ISA_IO_BASE;
         __ioremap_at(pbase, (void *)ISA_IO_BASE,
-                    size, pgprot_val(pgprot_noncached(__pgprot(0))));
+                    size, pgprot_noncached(PAGE_KERNEL));
  
         pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
  }
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c

index 35e240a..59c578f 100644 (file)
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -24,6 +24,7 @@
  #include <asm/processor.h>
  #include <asm/machdep.h>
  #include <asm/debug.h>
+#include <asm/code-patching.h>
  #include <linux/slab.h>
  
  /*
@@ -144,7 +145,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
         if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
                 return 0;
  
-       if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
+       if (*(u32 *)regs->nip == BREAK_INSTR)
                 regs->nip += BREAK_INSTR_SIZE;
  
         return 1;
@@ -441,16 +442,42 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
         return -1;
  }
  
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+       int err;
+       unsigned int instr;
+       unsigned int *addr = (unsigned int *)bpt->bpt_addr;
+
+       err = probe_kernel_address(addr, instr);
+       if (err)
+               return err;
+
+       err = patch_instruction(addr, BREAK_INSTR);
+       if (err)
+               return -EFAULT;
+
+       *(unsigned int *)bpt->saved_instr = instr;
+
+       return 0;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+       int err;
+       unsigned int instr = *(unsigned int *)bpt->saved_instr;
+       unsigned int *addr = (unsigned int *)bpt->bpt_addr;
+
+       err = patch_instruction(addr, instr);
+       if (err)
+               return -EFAULT;
+
+       return 0;
+}
+
  /*
   * Global data
   */
-struct kgdb_arch arch_kgdb_ops = {
-#ifdef __LITTLE_ENDIAN__
-       .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d},
-#else
-       .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
-#endif
-};
+struct kgdb_arch arch_kgdb_ops;
  
  static int kgdb_not_implemented(struct pt_regs *regs)
  {
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c

index efdd16a..bd933a7 100644 (file)
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -488,10 +488,11 @@ long machine_check_early(struct pt_regs *regs)
  {
         long handled = 0;
  
-       __this_cpu_inc(irq_stat.mce_exceptions);
-
-       if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
-               handled = cur_cpu_spec->machine_check_early(regs);
+       /*
+        * See if platform is capable of handling machine check.
+        */
+       if (ppc_md.machine_check_early)
+               handled = ppc_md.machine_check_early(regs);
         return handled;
  }
  
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c

index 3497c83..6b800ee 100644 (file)
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -60,7 +60,7 @@ static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
  
  /* flush SLBs and reload */
  #ifdef CONFIG_PPC_BOOK3S_64
-static void flush_and_reload_slb(void)
+void flush_and_reload_slb(void)
  {
         /* Invalidate all SLBs */
         slb_flush_all_realmode();
@@ -89,6 +89,13 @@ static void flush_and_reload_slb(void)
  
  static void flush_erat(void)
  {
+#ifdef CONFIG_PPC_BOOK3S_64
+       if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+               flush_and_reload_slb();
+               return;
+       }
+#endif
+       /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */
         asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
  }
  
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c

index 77371c9..2d861a3 100644 (file)
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -74,6 +74,14 @@ int module_finalize(const Elf_Ehdr *hdr,
                                   (void *)sect->sh_addr + sect->sh_size);
  #endif /* CONFIG_PPC64 */
  
+#ifdef PPC64_ELF_ABI_v1
+       sect = find_section(hdr, sechdrs, ".opd");
+       if (sect != NULL) {
+               me->arch.start_opd = sect->sh_addr;
+               me->arch.end_opd = sect->sh_addr + sect->sh_size;
+       }
+#endif /* PPC64_ELF_ABI_v1 */
+
  #ifdef CONFIG_PPC_BARRIER_NOSPEC
         sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
         if (sect != NULL)
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c

index b8d61e0..8661eea 100644 (file)
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -360,11 +360,6 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
                 else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
                         dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
                                           sechdrs[i].sh_size);
-               else if (!strcmp(secstrings + sechdrs[i].sh_name, ".opd")) {
-                       me->arch.start_opd = sechdrs[i].sh_addr;
-                       me->arch.end_opd = sechdrs[i].sh_addr +
-                                          sechdrs[i].sh_size;
-               }
  
                 /* We don't handle .init for the moment: rename to _init */
                 while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
@@ -685,7 +680,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
  
                 case R_PPC64_REL32:
                         /* 32 bits relative (used by relative exception tables) */
-                       *(u32 *)location = value - (unsigned long)location;
+                       /* Convert value to relative */
+                       value -= (unsigned long)location;
+                       if (value + 0x80000000 > 0xffffffff) {
+                               pr_err("%s: REL32 %li out of range!\n",
+                                      me->name, (long int)value);
+                               return -ENOEXEC;
+                       }
+                       *(u32 *)location = value;
                         break;
  
                 case R_PPC64_TOCSAVE:
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c

index d63b488..4da8ed5 100644 (file)
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -17,7 +17,6 @@
  #include <linux/of.h>
  #include <linux/slab.h>
  #include <linux/export.h>
-#include <linux/syscalls.h>
  
  #include <asm/processor.h>
  #include <asm/io.h>
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c

index dff28f9..9d8c10d 100644 (file)
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -159,7 +159,7 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
  
         /* Establish the mapping */
         if (__ioremap_at(phys_page, area->addr, size_page,
-                        pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)
+                        pgprot_noncached(PAGE_KERNEL)) == NULL)
                 return -ENOMEM;
  
         /* Fixup hose IO resource */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c

index 5d983d8..4d5322c 100644 (file)
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -43,6 +43,7 @@
  #include <linux/uaccess.h>
  #include <linux/elf-randomize.h>
  #include <linux/pkeys.h>
+#include <linux/seq_buf.h>
  
  #include <asm/pgtable.h>
  #include <asm/io.h>
@@ -65,6 +66,7 @@
  #include <asm/livepatch.h>
  #include <asm/cpu_has_feature.h>
  #include <asm/asm-prototypes.h>
+#include <asm/stacktrace.h>
  
  #include <linux/kprobes.h>
  #include <linux/kdebug.h>
@@ -102,24 +104,18 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
         }
  }
  
-static inline bool msr_tm_active(unsigned long msr)
-{
-       return MSR_TM_ACTIVE(msr);
-}
-
  static bool tm_active_with_fp(struct task_struct *tsk)
  {
-       return msr_tm_active(tsk->thread.regs->msr) &&
+       return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
                 (tsk->thread.ckpt_regs.msr & MSR_FP);
  }
  
  static bool tm_active_with_altivec(struct task_struct *tsk)
  {
-       return msr_tm_active(tsk->thread.regs->msr) &&
+       return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
                 (tsk->thread.ckpt_regs.msr & MSR_VEC);
  }
  #else
-static inline bool msr_tm_active(unsigned long msr) { return false; }
  static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
  static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
  static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
@@ -247,7 +243,8 @@ void enable_kernel_fp(void)
                  * giveup as this would save  to the 'live' structure not the
                  * checkpointed structure.
                  */
-               if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+               if (!MSR_TM_ACTIVE(cpumsr) &&
+                    MSR_TM_ACTIVE(current->thread.regs->msr))
                         return;
                 __giveup_fpu(current);
         }
@@ -311,7 +308,8 @@ void enable_kernel_altivec(void)
                  * giveup as this would save  to the 'live' structure not the
                  * checkpointed structure.
                  */
-               if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+               if (!MSR_TM_ACTIVE(cpumsr) &&
+                    MSR_TM_ACTIVE(current->thread.regs->msr))
                         return;
                 __giveup_altivec(current);
         }
@@ -397,7 +395,8 @@ void enable_kernel_vsx(void)
                  * giveup as this would save  to the 'live' structure not the
                  * checkpointed structure.
                  */
-               if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+               if (!MSR_TM_ACTIVE(cpumsr) &&
+                    MSR_TM_ACTIVE(current->thread.regs->msr))
                         return;
                 __giveup_vsx(current);
         }
@@ -530,7 +529,7 @@ void restore_math(struct pt_regs *regs)
  {
         unsigned long msr;
  
-       if (!msr_tm_active(regs->msr) &&
+       if (!MSR_TM_ACTIVE(regs->msr) &&
                 !current->thread.load_fp && !loadvec(current->thread))
                 return;
  
@@ -1252,17 +1251,16 @@ struct task_struct *__switch_to(struct task_struct *prev,
         return last;
  }
  
-static int instructions_to_print = 16;
+#define NR_INSN_TO_PRINT       16
  
  static void show_instructions(struct pt_regs *regs)
  {
         int i;
-       unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
-                       sizeof(int));
+       unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
  
         printk("Instruction dump:");
  
-       for (i = 0; i < instructions_to_print; i++) {
+       for (i = 0; i < NR_INSN_TO_PRINT; i++) {
                 int instr;
  
                 if (!(i % 8))
@@ -1277,7 +1275,7 @@ static void show_instructions(struct pt_regs *regs)
  #endif
  
                 if (!__kernel_text_address(pc) ||
-                    probe_kernel_address((unsigned int __user *)pc, instr)) {
+                   probe_kernel_address((const void *)pc, instr)) {
                         pr_cont("XXXXXXXX ");
                 } else {
                         if (regs->nip == pc)
@@ -1295,43 +1293,43 @@ static void show_instructions(struct pt_regs *regs)
  void show_user_instructions(struct pt_regs *regs)
  {
         unsigned long pc;
-       int i;
+       int n = NR_INSN_TO_PRINT;
+       struct seq_buf s;
+       char buf[96]; /* enough for 8 times 9 + 2 chars */
  
-       pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int));
+       pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
  
         /*
          * Make sure the NIP points at userspace, not kernel text/data or
          * elsewhere.
          */
-       if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) {
+       if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
                 pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
                         current->comm, current->pid);
                 return;
         }
  
-       pr_info("%s[%d]: code: ", current->comm, current->pid);
+       seq_buf_init(&s, buf, sizeof(buf));
  
-       for (i = 0; i < instructions_to_print; i++) {
-               int instr;
+       while (n) {
+               int i;
  
-               if (!(i % 8) && (i > 0)) {
-                       pr_cont("\n");
-                       pr_info("%s[%d]: code: ", current->comm, current->pid);
-               }
+               seq_buf_clear(&s);
  
-               if (probe_kernel_address((unsigned int __user *)pc, instr)) {
-                       pr_cont("XXXXXXXX ");
-               } else {
-                       if (regs->nip == pc)
-                               pr_cont("<%08x> ", instr);
-                       else
-                               pr_cont("%08x ", instr);
+               for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
+                       int instr;
+
+                       if (probe_kernel_address((const void *)pc, instr)) {
+                               seq_buf_printf(&s, "XXXXXXXX ");
+                               continue;
+                       }
+                       seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr);
                 }
  
-               pc += sizeof(int);
+               if (!seq_buf_has_overflowed(&s))
+                       pr_info("%s[%d]: code: %s\n", current->comm,
+                               current->pid, s.buffer);
         }
-
-       pr_cont("\n");
  }
  
  struct regbit {
@@ -1485,6 +1483,15 @@ void flush_thread(void)
  #endif /* CONFIG_HAVE_HW_BREAKPOINT */
  }
  
+#ifdef CONFIG_PPC_BOOK3S_64
+void arch_setup_new_exec(void)
+{
+       if (radix_enabled())
+               return;
+       hash__setup_new_exec();
+}
+#endif
+
  int set_thread_uses_vas(void)
  {
  #ifdef CONFIG_PPC_BOOK3S_64
@@ -1705,7 +1712,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
                 p->thread.dscr = mfspr(SPRN_DSCR);
         }
         if (cpu_has_feature(CPU_FTR_HAS_PPR))
-               p->thread.ppr = INIT_PPR;
+               childregs->ppr = DEFAULT_PPR;
  
         p->thread.tidr = 0;
  #endif
@@ -1713,6 +1720,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
         return 0;
  }
  
+void preload_new_slb_context(unsigned long start, unsigned long sp);
+
  /*
   * Set up a thread for executing a new program
   */
@@ -1720,6 +1729,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
  {
  #ifdef CONFIG_PPC64
         unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       preload_new_slb_context(start, sp);
+#endif
  #endif
  
         /*
@@ -1810,6 +1823,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
  #ifdef CONFIG_VSX
         current->thread.used_vsr = 0;
  #endif
+       current->thread.load_slb = 0;
         current->thread.load_fp = 0;
         memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
         current->thread.fp_save_area = NULL;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c

index 9b38a2e..f33ff41 100644 (file)
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -43,11 +43,13 @@
  #include <asm/btext.h>
  #include <asm/sections.h>
  #include <asm/machdep.h>
-#include <asm/opal.h>
  #include <asm/asm-prototypes.h>
  
  #include <linux/linux_logo.h>
  
+/* All of prom_init bss lives here */
+#define __prombss __section(.bss.prominit)
+
  /*
   * Eventually bump that one up
   */
@@ -87,7 +89,7 @@
  #define OF_WORKAROUNDS 0
  #else
  #define OF_WORKAROUNDS of_workarounds
-int of_workarounds;
+static int of_workarounds __prombss;
  #endif
  
  #define OF_WA_CLAIM    1       /* do phys/virt claim separately, then map */
@@ -148,29 +150,31 @@ extern void copy_and_flush(unsigned long dest, unsigned long src,
                            unsigned long size, unsigned long offset);
  
  /* prom structure */
-static struct prom_t __initdata prom;
+static struct prom_t __prombss prom;
  
-static unsigned long prom_entry __initdata;
+static unsigned long __prombss prom_entry;
  
  #define PROM_SCRATCH_SIZE 256
  
-static char __initdata of_stdout_device[256];
-static char __initdata prom_scratch[PROM_SCRATCH_SIZE];
+static char __prombss of_stdout_device[256];
+static char __prombss prom_scratch[PROM_SCRATCH_SIZE];
  
-static unsigned long __initdata dt_header_start;
-static unsigned long __initdata dt_struct_start, dt_struct_end;
-static unsigned long __initdata dt_string_start, dt_string_end;
+static unsigned long __prombss dt_header_start;
+static unsigned long __prombss dt_struct_start, dt_struct_end;
+static unsigned long __prombss dt_string_start, dt_string_end;
  
-static unsigned long __initdata prom_initrd_start, prom_initrd_end;
+static unsigned long __prombss prom_initrd_start, prom_initrd_end;
  
  #ifdef CONFIG_PPC64
-static int __initdata prom_iommu_force_on;
-static int __initdata prom_iommu_off;
-static unsigned long __initdata prom_tce_alloc_start;
-static unsigned long __initdata prom_tce_alloc_end;
+static int __prombss prom_iommu_force_on;
+static int __prombss prom_iommu_off;
+static unsigned long __prombss prom_tce_alloc_start;
+static unsigned long __prombss prom_tce_alloc_end;
  #endif
  
-static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+#ifdef CONFIG_PPC_PSERIES
+static bool __prombss prom_radix_disable;
+#endif
  
  struct platform_support {
         bool hash_mmu;
@@ -188,26 +192,25 @@ struct platform_support {
  #define PLATFORM_LPAR          0x0001
  #define PLATFORM_POWERMAC      0x0400
  #define PLATFORM_GENERIC       0x0500
-#define PLATFORM_OPAL          0x0600
  
-static int __initdata of_platform;
+static int __prombss of_platform;
  
-static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
+static char __prombss prom_cmd_line[COMMAND_LINE_SIZE];
  
-static unsigned long __initdata prom_memory_limit;
+static unsigned long __prombss prom_memory_limit;
  
-static unsigned long __initdata alloc_top;
-static unsigned long __initdata alloc_top_high;
-static unsigned long __initdata alloc_bottom;
-static unsigned long __initdata rmo_top;
-static unsigned long __initdata ram_top;
+static unsigned long __prombss alloc_top;
+static unsigned long __prombss alloc_top_high;
+static unsigned long __prombss alloc_bottom;
+static unsigned long __prombss rmo_top;
+static unsigned long __prombss ram_top;
  
-static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE];
-static int __initdata mem_reserve_cnt;
+static struct mem_map_entry __prombss mem_reserve_map[MEM_RESERVE_MAP_SIZE];
+static int __prombss mem_reserve_cnt;
  
-static cell_t __initdata regbuf[1024];
+static cell_t __prombss regbuf[1024];
  
-static bool rtas_has_query_cpu_stopped;
+static bool  __prombss rtas_has_query_cpu_stopped;
  
  
  /*
@@ -522,8 +525,8 @@ static void add_string(char **str, const char *q)
  
  static char *tohex(unsigned int x)
  {
-       static char digits[] = "0123456789abcdef";
-       static char result[9];
+       static const char digits[] __initconst = "0123456789abcdef";
+       static char result[9] __prombss;
         int i;
  
         result[8] = 0;
@@ -664,6 +667,8 @@ static void __init early_cmdline_parse(void)
  #endif
         }
  
+#ifdef CONFIG_PPC_PSERIES
+       prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
         opt = strstr(prom_cmd_line, "disable_radix");
         if (opt) {
                 opt += 13;
@@ -679,9 +684,10 @@ static void __init early_cmdline_parse(void)
         }
         if (prom_radix_disable)
                 prom_debug("Radix disabled from cmdline\n");
+#endif /* CONFIG_PPC_PSERIES */
  }
  
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+#ifdef CONFIG_PPC_PSERIES
  /*
   * The architecture vector has an array of PVR mask/value pairs,
   * followed by # option vectors - 1, followed by the option vectors.
@@ -782,7 +788,7 @@ struct ibm_arch_vec {
         struct option_vector6 vec6;
  } __packed;
  
-struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
+static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
         .pvrs = {
                 {
                         .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */
@@ -920,9 +926,11 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
         },
  };
  
+static struct ibm_arch_vec __prombss ibm_architecture_vec  ____cacheline_aligned;
+
  /* Old method - ELF header with PT_NOTE sections only works on BE */
  #ifdef __BIG_ENDIAN__
-static struct fake_elf {
+static const struct fake_elf {
         Elf32_Ehdr      elfhdr;
         Elf32_Phdr      phdr[2];
         struct chrpnote {
@@ -955,7 +963,7 @@ static struct fake_elf {
                         u32     ignore_me;
                 } rpadesc;
         } rpanote;
-} fake_elf = {
+} fake_elf __initconst = {
         .elfhdr = {
                 .e_ident = { 0x7f, 'E', 'L', 'F',
                              ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
@@ -1129,14 +1137,21 @@ static void __init prom_check_platform_support(void)
         };
         int prop_len = prom_getproplen(prom.chosen,
                                        "ibm,arch-vec-5-platform-support");
+
+       /* First copy the architecture vec template */
+       ibm_architecture_vec = ibm_architecture_vec_template;
+
         if (prop_len > 1) {
                 int i;
-               u8 vec[prop_len];
+               u8 vec[8];
                 prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
                            prop_len);
+               if (prop_len > sizeof(vec))
+                       prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n",
+                                   prop_len);
                 prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
                              &vec, sizeof(vec));
-               for (i = 0; i < prop_len; i += 2) {
+               for (i = 0; i < sizeof(vec); i += 2) {
                         prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
                                                                   , vec[i]
                                                                   , vec[i + 1]);
@@ -1225,7 +1240,7 @@ static void __init prom_send_capabilities(void)
         }
  #endif /* __BIG_ENDIAN__ */
  }
-#endif /* #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+#endif /* CONFIG_PPC_PSERIES */
  
  /*
   * Memory allocation strategy... our layout is normally:
@@ -1562,88 +1577,6 @@ static void __init prom_close_stdin(void)
         }
  }
  
-#ifdef CONFIG_PPC_POWERNV
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
-static u64 __initdata prom_opal_base;
-static u64 __initdata prom_opal_entry;
-#endif
-
-/*
- * Allocate room for and instantiate OPAL
- */
-static void __init prom_instantiate_opal(void)
-{
-       phandle opal_node;
-       ihandle opal_inst;
-       u64 base, entry;
-       u64 size = 0, align = 0x10000;
-       __be64 val64;
-       u32 rets[2];
-
-       prom_debug("prom_instantiate_opal: start...\n");
-
-       opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal"));
-       prom_debug("opal_node: %x\n", opal_node);
-       if (!PHANDLE_VALID(opal_node))
-               return;
-
-       val64 = 0;
-       prom_getprop(opal_node, "opal-runtime-size", &val64, sizeof(val64));
-       size = be64_to_cpu(val64);
-       if (size == 0)
-               return;
-       val64 = 0;
-       prom_getprop(opal_node, "opal-runtime-alignment", &val64,sizeof(val64));
-       align = be64_to_cpu(val64);
-
-       base = alloc_down(size, align, 0);
-       if (base == 0) {
-               prom_printf("OPAL allocation failed !\n");
-               return;
-       }
-
-       opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal"));
-       if (!IHANDLE_VALID(opal_inst)) {
-               prom_printf("opening opal package failed (%x)\n", opal_inst);
-               return;
-       }
-
-       prom_printf("instantiating opal at 0x%llx...", base);
-
-       if (call_prom_ret("call-method", 4, 3, rets,
-                         ADDR("load-opal-runtime"),
-                         opal_inst,
-                         base >> 32, base & 0xffffffff) != 0
-           || (rets[0] == 0 && rets[1] == 0)) {
-               prom_printf(" failed\n");
-               return;
-       }
-       entry = (((u64)rets[0]) << 32) | rets[1];
-
-       prom_printf(" done\n");
-
-       reserve_mem(base, size);
-
-       prom_debug("opal base     = 0x%llx\n", base);
-       prom_debug("opal align    = 0x%llx\n", align);
-       prom_debug("opal entry    = 0x%llx\n", entry);
-       prom_debug("opal size     = 0x%llx\n", size);
-
-       prom_setprop(opal_node, "/ibm,opal", "opal-base-address",
-                    &base, sizeof(base));
-       prom_setprop(opal_node, "/ibm,opal", "opal-entry-address",
-                    &entry, sizeof(entry));
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
-       prom_opal_base = base;
-       prom_opal_entry = entry;
-#endif
-       prom_debug("prom_instantiate_opal: end...\n");
-}
-
-#endif /* CONFIG_PPC_POWERNV */
-
  /*
   * Allocate room for and instantiate RTAS
   */
@@ -2150,10 +2083,6 @@ static int __init prom_find_machine_type(void)
                 }
         }
  #ifdef CONFIG_PPC64
-       /* Try to detect OPAL */
-       if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal"))))
-               return PLATFORM_OPAL;
-
         /* Try to figure out if it's an IBM pSeries or any other
          * PAPR compliant platform. We assume it is if :
          *  - /device_type is "chrp" (please, do NOT use that for future
@@ -2202,7 +2131,7 @@ static void __init prom_check_displays(void)
         ihandle ih;
         int i;
  
-       static unsigned char default_colors[] = {
+       static const unsigned char default_colors[] __initconst = {
                 0x00, 0x00, 0x00,
                 0x00, 0x00, 0xaa,
                 0x00, 0xaa, 0x00,
@@ -2398,7 +2327,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
         char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
         unsigned long soff;
         unsigned char *valp;
-       static char pname[MAX_PROPERTY_NAME];
+       static char pname[MAX_PROPERTY_NAME] __prombss;
         int l, room, has_phandle = 0;
  
         dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
@@ -2481,14 +2410,11 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
                         has_phandle = 1;
         }
  
-       /* Add a "linux,phandle" property if no "phandle" property already
-        * existed (can happen with OPAL)
-        */
+       /* Add a "phandle" property if none already exist */
         if (!has_phandle) {
-               soff = dt_find_string("linux,phandle");
+               soff = dt_find_string("phandle");
                 if (soff == 0)
-                       prom_printf("WARNING: Can't find string index for"
-                                   " <linux-phandle> node %s\n", path);
+                       prom_printf("WARNING: Can't find string index for <phandle> node %s\n", path);
                 else {
                         dt_push_token(OF_DT_PROP, mem_start, mem_end);
                         dt_push_token(4, mem_start, mem_end);
@@ -2548,9 +2474,9 @@ static void __init flatten_device_tree(void)
         dt_string_start = mem_start;
         mem_start += 4; /* hole */
  
-       /* Add "linux,phandle" in there, we'll need it */
+       /* Add "phandle" in there, we'll need it */
         namep = make_room(&mem_start, &mem_end, 16, 1);
-       strcpy(namep, "linux,phandle");
+       strcpy(namep, "phandle");
         mem_start = (unsigned long)namep + strlen(namep) + 1;
  
         /* Build string array */
@@ -3172,7 +3098,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
          */
         early_cmdline_parse();
  
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+#ifdef CONFIG_PPC_PSERIES
         /*
          * On pSeries, inform the firmware about our capabilities
          */
@@ -3216,15 +3142,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
          * On non-powermacs, try to instantiate RTAS. PowerMacs don't
          * have a usable RTAS implementation.
          */
-       if (of_platform != PLATFORM_POWERMAC &&
-           of_platform != PLATFORM_OPAL)
+       if (of_platform != PLATFORM_POWERMAC)
                 prom_instantiate_rtas();
  
-#ifdef CONFIG_PPC_POWERNV
-       if (of_platform == PLATFORM_OPAL)
-               prom_instantiate_opal();
-#endif /* CONFIG_PPC_POWERNV */
-
  #ifdef CONFIG_PPC64
         /* instantiate sml */
         prom_instantiate_sml();
@@ -3237,8 +3157,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
          *
          * (This must be done after instanciating RTAS)
          */
-       if (of_platform != PLATFORM_POWERMAC &&
-           of_platform != PLATFORM_OPAL)
+       if (of_platform != PLATFORM_POWERMAC)
                 prom_hold_cpus();
  
         /*
@@ -3282,11 +3201,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
         /*
          * in case stdin is USB and still active on IBM machines...
          * Unfortunately quiesce crashes on some powermacs if we have
-        * closed stdin already (in particular the powerbook 101). It
-        * appears that the OPAL version of OFW doesn't like it either.
+        * closed stdin already (in particular the powerbook 101).
          */
-       if (of_platform != PLATFORM_POWERMAC &&
-           of_platform != PLATFORM_OPAL)
+       if (of_platform != PLATFORM_POWERMAC)
                 prom_close_stdin();
  
         /*
@@ -3304,10 +3221,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
         hdr = dt_header_start;
  
         /* Don't print anything after quiesce under OPAL, it crashes OFW */
-       if (of_platform != PLATFORM_OPAL) {
-               prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
-               prom_debug("->dt_header_start=0x%lx\n", hdr);
-       }
+       prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
+       prom_debug("->dt_header_start=0x%lx\n", hdr);
  
  #ifdef CONFIG_PPC32
         reloc_got2(-offset);
@@ -3315,13 +3230,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
         unreloc_toc();
  #endif
  
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
-       /* OPAL early debug gets the OPAL base & entry in r8 and r9 */
-       __start(hdr, kbase, 0, 0, 0,
-               prom_opal_base, prom_opal_entry);
-#else
         __start(hdr, kbase, 0, 0, 0, 0, 0);
-#endif
  
         return 0;
  }
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh

index acb6b92..667df97 100644 (file)
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -28,6 +28,18 @@ OBJ="$2"
  
  ERROR=0
  
+function check_section()
+{
+    file=$1
+    section=$2
+    size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}")
+    size=${size:-0}
+    if [ $size -ne 0 ]; then
+       ERROR=1
+       echo "Error: Section $section not empty in prom_init.c" >&2
+    fi
+}
+
  for UNDEF in $($NM -u $OBJ | awk '{print $2}')
  do
         # On 64-bit nm gives us the function descriptors, which have
@@ -66,4 +78,8 @@ do
         fi
  done
  
+check_section $OBJ .data
+check_section $OBJ .bss
+check_section $OBJ .init.data
+
  exit $ERROR
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c

index 9667666..afb819f 100644 (file)
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -297,7 +297,7 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
         }
  #endif
  
-       if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
+       if (regno < (sizeof(struct user_pt_regs) / sizeof(unsigned long))) {
                 *data = ((unsigned long *)task->thread.regs)[regno];
                 return 0;
         }
@@ -360,10 +360,10 @@ static int gpr_get(struct task_struct *target, const struct user_regset *regset,
                 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
                                           &target->thread.regs->orig_gpr3,
                                           offsetof(struct pt_regs, orig_gpr3),
-                                         sizeof(struct pt_regs));
+                                         sizeof(struct user_pt_regs));
         if (!ret)
                 ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
-                                              sizeof(struct pt_regs), -1);
+                                              sizeof(struct user_pt_regs), -1);
  
         return ret;
  }
@@ -853,10 +853,10 @@ static int tm_cgpr_get(struct task_struct *target,
                 ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
                                           &target->thread.ckpt_regs.orig_gpr3,
                                           offsetof(struct pt_regs, orig_gpr3),
-                                         sizeof(struct pt_regs));
+                                         sizeof(struct user_pt_regs));
         if (!ret)
                 ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
-                                              sizeof(struct pt_regs), -1);
+                                              sizeof(struct user_pt_regs), -1);
  
         return ret;
  }
@@ -1609,7 +1609,7 @@ static int ppr_get(struct task_struct *target,
                       void *kbuf, void __user *ubuf)
  {
         return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-                                  &target->thread.ppr, 0, sizeof(u64));
+                                  &target->thread.regs->ppr, 0, sizeof(u64));
  }
  
  static int ppr_set(struct task_struct *target,
@@ -1618,7 +1618,7 @@ static int ppr_set(struct task_struct *target,
                       const void *kbuf, const void __user *ubuf)
  {
         return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                 &target->thread.ppr, 0, sizeof(u64));
+                                 &target->thread.regs->ppr, 0, sizeof(u64));
  }
  
  static int dscr_get(struct task_struct *target,
@@ -2508,6 +2508,7 @@ void ptrace_disable(struct task_struct *child)
  {
         /* make sure the single step bit is not set. */
         user_disable_single_step(child);
+       clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
  }
  
  #ifdef CONFIG_PPC_ADV_DEBUG_REGS
@@ -3130,7 +3131,7 @@ long arch_ptrace(struct task_struct *child, long request,
         case PTRACE_GETREGS:    /* Get all pt_regs from the child. */
                 return copy_regset_to_user(child, &user_ppc_native_view,
                                            REGSET_GPR,
-                                          0, sizeof(struct pt_regs),
+                                          0, sizeof(struct user_pt_regs),
                                            datavp);
  
  #ifdef CONFIG_PPC64
@@ -3139,7 +3140,7 @@ long arch_ptrace(struct task_struct *child, long request,
         case PTRACE_SETREGS:    /* Set all gp regs in the child. */
                 return copy_regset_from_user(child, &user_ppc_native_view,
                                              REGSET_GPR,
-                                            0, sizeof(struct pt_regs),
+                                            0, sizeof(struct user_pt_regs),
                                              datavp);
  
         case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
@@ -3264,6 +3265,16 @@ long do_syscall_trace_enter(struct pt_regs *regs)
  {
         user_exit();
  
+       if (test_thread_flag(TIF_SYSCALL_EMU)) {
+               ptrace_report_syscall(regs);
+               /*
+                * Returning -1 will skip the syscall execution. We want to
+                * avoid clobbering any register also, thus, not 'gotoing'
+                * skip label.
+                */
+               return -1;
+       }
+
         /*
          * The tracer may decide to abort the syscall, if so tracehook
          * will return !0. Note that the tracer may also just change
@@ -3324,3 +3335,42 @@ void do_syscall_trace_leave(struct pt_regs *regs)
  
         user_enter();
  }
+
+void __init pt_regs_check(void)
+{
+       BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
+                    offsetof(struct user_pt_regs, gpr));
+       BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
+                    offsetof(struct user_pt_regs, nip));
+       BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+                    offsetof(struct user_pt_regs, msr));
+       BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+                    offsetof(struct user_pt_regs, msr));
+       BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+                    offsetof(struct user_pt_regs, orig_gpr3));
+       BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
+                    offsetof(struct user_pt_regs, ctr));
+       BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
+                    offsetof(struct user_pt_regs, link));
+       BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
+                    offsetof(struct user_pt_regs, xer));
+       BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
+                    offsetof(struct user_pt_regs, ccr));
+#ifdef __powerpc64__
+       BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
+                    offsetof(struct user_pt_regs, softe));
+#else
+       BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
+                    offsetof(struct user_pt_regs, mq));
+#endif
+       BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+                    offsetof(struct user_pt_regs, trap));
+       BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
+                    offsetof(struct user_pt_regs, dar));
+       BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
+                    offsetof(struct user_pt_regs, dsisr));
+       BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
+                    offsetof(struct user_pt_regs, result));
+
+       BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c

index 8afd146..de35bd8 100644 (file)
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -981,7 +981,15 @@ int rtas_ibm_suspend_me(u64 handle)
                 goto out;
         }
  
-       stop_topology_update();
+       cpu_hotplug_disable();
+
+       /* Check if we raced with a CPU-Offline Operation */
+       if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
+               pr_err("%s: Raced against a concurrent CPU-Offline\n",
+                      __func__);
+               atomic_set(&data.error, -EBUSY);
+               goto out_hotplug_enable;
+       }
  
         /* Call function on all CPUs.  One of us will make the
          * rtas call
@@ -994,7 +1002,8 @@ int rtas_ibm_suspend_me(u64 handle)
         if (atomic_read(&data.error) != 0)
                 printk(KERN_ERR "Error doing global join\n");
  
-       start_topology_update();
+out_hotplug_enable:
+       cpu_hotplug_enable();
  
         /* Take down CPUs not online prior to suspend */
         cpuret = rtas_offline_cpus_mask(offline_mask);
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c

index 44d66c3..38cadae 100644 (file)
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -91,6 +91,8 @@ static char *rtas_event_type(int type)
                         return "Dump Notification Event";
                 case RTAS_TYPE_PRRN:
                         return "Platform Resource Reassignment Event";
+               case RTAS_TYPE_HOTPLUG:
+                       return "Hotplug Event";
         }
  
         return rtas_type[0];
@@ -150,8 +152,10 @@ static void printk_log_rtas(char *buf, int len)
         } else {
                 struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
  
-               printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
-                      error_log_cnt, rtas_event_type(rtas_error_type(errlog)),
+               printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n",
+                      error_log_cnt,
+                      rtas_event_type(rtas_error_type(errlog)),
+                      rtas_error_type(errlog),
                        rtas_error_severity(errlog));
         }
  }
@@ -274,27 +278,16 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
  }
  
  #ifdef CONFIG_PPC_PSERIES
-static s32 prrn_update_scope;
-
-static void prrn_work_fn(struct work_struct *work)
+static void handle_prrn_event(s32 scope)
  {
         /*
          * For PRRN, we must pass the negative of the scope value in
          * the RTAS event.
          */
-       pseries_devicetree_update(-prrn_update_scope);
+       pseries_devicetree_update(-scope);
         numa_update_cpu_topology(false);
  }
  
-static DECLARE_WORK(prrn_work, prrn_work_fn);
-
-static void prrn_schedule_update(u32 scope)
-{
-       flush_work(&prrn_work);
-       prrn_update_scope = scope;
-       schedule_work(&prrn_work);
-}
-
  static void handle_rtas_event(const struct rtas_error_log *log)
  {
         if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
@@ -303,7 +296,7 @@ static void handle_rtas_event(const struct rtas_error_log *log)
         /* For PRRN Events the extended log length is used to denote
          * the scope for calling rtas update-nodes.
          */
-       prrn_schedule_update(rtas_error_extended_log_length(log));
+       handle_prrn_event(rtas_error_extended_log_length(log));
  }
  
  #else
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c

index 93fa0c9..9ca9db7 100644 (file)
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,6 +33,7 @@
  #include <linux/serial_8250.h>
  #include <linux/percpu.h>
  #include <linux/memblock.h>
+#include <linux/bootmem.h>
  #include <linux/of_platform.h>
  #include <linux/hugetlb.h>
  #include <asm/debugfs.h>
@@ -966,6 +967,8 @@ void __init setup_arch(char **cmdline_p)
  
         initmem_init();
  
+       early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
+
  #ifdef CONFIG_DUMMY_CONSOLE
         conswitchp = &dummy_con;
  #endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c

index 6a501b2..faf0022 100644 (file)
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -243,13 +243,19 @@ static void cpu_ready_for_interrupts(void)
         }
  
         /*
-        * Fixup HFSCR:TM based on CPU features. The bit is set by our
-        * early asm init because at that point we haven't updated our
-        * CPU features from firmware and device-tree. Here we have,
-        * so let's do it.
+        * Set HFSCR:TM based on CPU features:
+        * In the special case of TM no suspend (P9N DD2.1), Linux is
+        * told TM is off via the dt-ftrs but told to (partially) use
+        * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM]
+        * will be off from dt-ftrs but we need to turn it on for the
+        * no suspend case.
          */
-       if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP))
-               mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
+       if (cpu_has_feature(CPU_FTR_HVMODE)) {
+               if (cpu_has_feature(CPU_FTR_TM_COMP))
+                       mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM);
+               else
+                       mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
+       }
  
         /* Set IR and DR in PACA MSR */
         get_paca()->kernel_msr = MSR_KERNEL;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index 61c1fad..3f15edf 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -34,6 +34,8 @@
  #include <linux/topology.h>
  #include <linux/profile.h>
  #include <linux/processor.h>
+#include <linux/random.h>
+#include <linux/stackprotector.h>
  
  #include <asm/ptrace.h>
  #include <linux/atomic.h>
@@ -74,14 +76,32 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 };
  #endif
  
  struct thread_info *secondary_ti;
+bool has_big_cores;
  
  DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
  DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
  DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
  
  EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
  EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
  EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL_GPL(has_big_cores);
+
+#define MAX_THREAD_LIST_SIZE   8
+#define THREAD_GROUP_SHARE_L1   1
+struct thread_groups {
+       unsigned int property;
+       unsigned int nr_groups;
+       unsigned int threads_per_group;
+       unsigned int thread_list[MAX_THREAD_LIST_SIZE];
+};
+
+/*
+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
+ * the set its siblings that share the L1-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
  
  /* SMP operations for this machine */
  struct smp_ops_t *smp_ops;
@@ -674,6 +694,185 @@ static void set_cpus_unrelated(int i, int j,
  }
  #endif
  
+/*
+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
+ *                      property for the CPU device node @dn and stores
+ *                      the parsed output in the thread_groups
+ *                      structure @tg if the ibm,thread-groups[0]
+ *                      matches @property.
+ *
+ * @dn: The device node of the CPU device.
+ * @tg: Pointer to a thread group structure into which the parsed
+ *      output of "ibm,thread-groups" is stored.
+ * @property: The property of the thread-group that the caller is
+ *            interested in.
+ *
+ * ibm,thread-groups[0..N-1] array defines which group of threads in
+ * the CPU-device node can be grouped together based on the property.
+ *
+ * ibm,thread-groups[0] tells us the property based on which the
+ * threads are being grouped together. If this value is 1, it implies
+ * that the threads in the same group share L1, translation cache.
+ *
+ * ibm,thread-groups[1] tells us how many such thread groups exist.
+ *
+ * ibm,thread-groups[2] tells us the number of threads in each such
+ * group.
+ *
+ * ibm,thread-groups[3..N-1] is the list of threads identified by
+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
+ * the grouping.
+ *
+ * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
+ * implies that there are 2 groups of 4 threads each, where each group
+ * of threads share L1, translation cache.
+ *
+ * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
+ * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
+ * 11, 12} structure
+ *
+ * Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ */
+static int parse_thread_groups(struct device_node *dn,
+                              struct thread_groups *tg,
+                              unsigned int property)
+{
+       int i;
+       u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
+       u32 *thread_list;
+       size_t total_threads;
+       int ret;
+
+       ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+                                        thread_group_array, 3);
+       if (ret)
+               return ret;
+
+       tg->property = thread_group_array[0];
+       tg->nr_groups = thread_group_array[1];
+       tg->threads_per_group = thread_group_array[2];
+       if (tg->property != property ||
+           tg->nr_groups < 1 ||
+           tg->threads_per_group < 1)
+               return -ENODATA;
+
+       total_threads = tg->nr_groups * tg->threads_per_group;
+
+       ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+                                        thread_group_array,
+                                        3 + total_threads);
+       if (ret)
+               return ret;
+
+       thread_list = &thread_group_array[3];
+
+       for (i = 0 ; i < total_threads; i++)
+               tg->thread_list[i] = thread_list[i];
+
+       return 0;
+}
+
+/*
+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
+ *                              that @cpu belongs to.
+ *
+ * @cpu : The logical CPU whose thread group is being searched.
+ * @tg : The thread-group structure of the CPU node which @cpu belongs
+ *       to.
+ *
+ * Returns the index to tg->thread_list that points to the the start
+ * of the thread_group that @cpu belongs to.
+ *
+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
+ * tg->thread_list.
+ */
+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
+{
+       int hw_cpu_id = get_hard_smp_processor_id(cpu);
+       int i, j;
+
+       for (i = 0; i < tg->nr_groups; i++) {
+               int group_start = i * tg->threads_per_group;
+
+               for (j = 0; j < tg->threads_per_group; j++) {
+                       int idx = group_start + j;
+
+                       if (tg->thread_list[idx] == hw_cpu_id)
+                               return group_start;
+               }
+       }
+
+       return -1;
+}
+
+static int init_cpu_l1_cache_map(int cpu)
+
+{
+       struct device_node *dn = of_get_cpu_node(cpu, NULL);
+       struct thread_groups tg = {.property = 0,
+                                  .nr_groups = 0,
+                                  .threads_per_group = 0};
+       int first_thread = cpu_first_thread_sibling(cpu);
+       int i, cpu_group_start = -1, err = 0;
+
+       if (!dn)
+               return -ENODATA;
+
+       err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
+       if (err)
+               goto out;
+
+       zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
+                               GFP_KERNEL,
+                               cpu_to_node(cpu));
+
+       cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
+
+       if (unlikely(cpu_group_start == -1)) {
+               WARN_ON_ONCE(1);
+               err = -ENODATA;
+               goto out;
+       }
+
+       for (i = first_thread; i < first_thread + threads_per_core; i++) {
+               int i_group_start = get_cpu_thread_group_start(i, &tg);
+
+               if (unlikely(i_group_start == -1)) {
+                       WARN_ON_ONCE(1);
+                       err = -ENODATA;
+                       goto out;
+               }
+
+               if (i_group_start == cpu_group_start)
+                       cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
+       }
+
+out:
+       of_node_put(dn);
+       return err;
+}
+
+static int init_big_cores(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               int err = init_cpu_l1_cache_map(cpu);
+
+               if (err)
+                       return err;
+
+               zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
+                                       GFP_KERNEL,
+                                       cpu_to_node(cpu));
+       }
+
+       has_big_cores = true;
+       return 0;
+}
+
  void __init smp_prepare_cpus(unsigned int max_cpus)
  {
         unsigned int cpu;
@@ -712,6 +911,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
         cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
         cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
  
+       init_big_cores();
+       if (has_big_cores) {
+               cpumask_set_cpu(boot_cpuid,
+                               cpu_smallcore_mask(boot_cpuid));
+       }
+
         if (smp_ops && smp_ops->probe)
                 smp_ops->probe();
  }
@@ -995,10 +1200,28 @@ static void remove_cpu_from_masks(int cpu)
                 set_cpus_unrelated(cpu, i, cpu_core_mask);
                 set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
                 set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+               if (has_big_cores)
+                       set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
         }
  }
  #endif
  
+static inline void add_cpu_to_smallcore_masks(int cpu)
+{
+       struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
+       int i, first_thread = cpu_first_thread_sibling(cpu);
+
+       if (!has_big_cores)
+               return;
+
+       cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
+
+       for (i = first_thread; i < first_thread + threads_per_core; i++) {
+               if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
+                       set_cpus_related(i, cpu, cpu_smallcore_mask);
+       }
+}
+
  static void add_cpu_to_masks(int cpu)
  {
         int first_thread = cpu_first_thread_sibling(cpu);
@@ -1015,6 +1238,7 @@ static void add_cpu_to_masks(int cpu)
                 if (cpu_online(i))
                         set_cpus_related(i, cpu, cpu_sibling_mask);
  
+       add_cpu_to_smallcore_masks(cpu);
         /*
          * Copy the thread sibling mask into the cache sibling mask
          * and mark any CPUs that share an L2 with this CPU.
@@ -1044,6 +1268,7 @@ static bool shared_caches;
  void start_secondary(void *unused)
  {
         unsigned int cpu = smp_processor_id();
+       struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
  
         mmgrab(&init_mm);
         current->active_mm = &init_mm;
@@ -1069,11 +1294,13 @@ void start_secondary(void *unused)
         /* Update topology CPU masks */
         add_cpu_to_masks(cpu);
  
+       if (has_big_cores)
+               sibling_mask = cpu_smallcore_mask;
         /*
          * Check for any shared caches. Note that this must be done on a
          * per-core basis because one core in the pair might be disabled.
          */
-       if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
+       if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
                 shared_caches = true;
  
         set_numa_node(numa_cpu_lookup_table[cpu]);
@@ -1083,6 +1310,8 @@ void start_secondary(void *unused)
         notify_cpu_starting(cpu);
         set_cpu_online(cpu, true);
  
+       boot_init_stack_canary();
+
         local_irq_enable();
  
         /* We can enable ftrace for secondary cpus now */
@@ -1140,6 +1369,13 @@ static const struct cpumask *shared_cache_mask(int cpu)
         return cpu_l2_cache_mask(cpu);
  }
  
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+       return cpu_smallcore_mask(cpu);
+}
+#endif
+
  static struct sched_domain_topology_level power9_topology[] = {
  #ifdef CONFIG_SCHED_SMT
         { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
@@ -1167,6 +1403,13 @@ void __init smp_cpus_done(unsigned int max_cpus)
         shared_proc_topology_init();
         dump_numa_cpu_topology();
  
+#ifdef CONFIG_SCHED_SMT
+       if (has_big_cores) {
+               pr_info("Using small cores at SMT level\n");
+               power9_topology[0].mask = smallcore_smt_mask;
+               powerpc_topology[0].mask = smallcore_smt_mask;
+       }
+#endif
         /*
          * If any CPU detects that it's sharing a cache with another CPU then
          * use the deeper topology that is aware of this sharing.
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S

index f83bf6f..185216b 100644 (file)
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -262,7 +262,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
  
         addi    r1,r1,-128
  #ifdef CONFIG_PPC_BOOK3S_64
-       bl      slb_flush_and_rebolt
+       bl      slb_flush_and_restore_bolted
  #endif
         bl      do_after_copyback
         addi    r1,r1,128
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c

index 70f145e..3646aff 100644 (file)
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -111,6 +111,7 @@ struct clock_event_device decrementer_clockevent = {
         .rating                 = 200,
         .irq                    = 0,
         .set_next_event         = decrementer_set_next_event,
+       .set_state_oneshot_stopped = decrementer_shutdown,
         .set_state_shutdown     = decrementer_shutdown,
         .tick_resume            = decrementer_shutdown,
         .features               = CLOCK_EVT_FEAT_ONESHOT |
@@ -175,7 +176,7 @@ static void calc_cputime_factors(void)
   * Read the SPURR on systems that have it, otherwise the PURR,
   * or if that doesn't exist return the timebase value passed in.
   */
-static unsigned long read_spurr(unsigned long tb)
+static inline unsigned long read_spurr(unsigned long tb)
  {
         if (cpu_has_feature(CPU_FTR_SPURR))
                 return mfspr(SPRN_SPURR);
@@ -281,26 +282,17 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
   * Account time for a transition between system, hard irq
   * or soft irq state.
   */
-static unsigned long vtime_delta(struct task_struct *tsk,
-                                unsigned long *stime_scaled,
-                                unsigned long *steal_time)
+static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
+                                       unsigned long now, unsigned long stime)
  {
-       unsigned long now, nowscaled, deltascaled;
-       unsigned long stime;
+       unsigned long stime_scaled = 0;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+       unsigned long nowscaled, deltascaled;
         unsigned long utime, utime_scaled;
-       struct cpu_accounting_data *acct = get_accounting(tsk);
  
-       WARN_ON_ONCE(!irqs_disabled());
-
-       now = mftb();
         nowscaled = read_spurr(now);
-       stime = now - acct->starttime;
-       acct->starttime = now;
         deltascaled = nowscaled - acct->startspurr;
         acct->startspurr = nowscaled;
-
-       *steal_time = calculate_stolen_time(now);
-
         utime = acct->utime - acct->utime_sspurr;
         acct->utime_sspurr = acct->utime;
  
@@ -314,17 +306,38 @@ static unsigned long vtime_delta(struct task_struct *tsk,
          * the user ticks get saved up in paca->user_time_scaled to be
          * used by account_process_tick.
          */
-       *stime_scaled = stime;
+       stime_scaled = stime;
         utime_scaled = utime;
         if (deltascaled != stime + utime) {
                 if (utime) {
-                       *stime_scaled = deltascaled * stime / (stime + utime);
-                       utime_scaled = deltascaled - *stime_scaled;
+                       stime_scaled = deltascaled * stime / (stime + utime);
+                       utime_scaled = deltascaled - stime_scaled;
                 } else {
-                       *stime_scaled = deltascaled;
+                       stime_scaled = deltascaled;
                 }
         }
         acct->utime_scaled += utime_scaled;
+#endif
+
+       return stime_scaled;
+}
+
+static unsigned long vtime_delta(struct task_struct *tsk,
+                                unsigned long *stime_scaled,
+                                unsigned long *steal_time)
+{
+       unsigned long now, stime;
+       struct cpu_accounting_data *acct = get_accounting(tsk);
+
+       WARN_ON_ONCE(!irqs_disabled());
+
+       now = mftb();
+       stime = now - acct->starttime;
+       acct->starttime = now;
+
+       *stime_scaled = vtime_delta_scaled(acct, now, stime);
+
+       *steal_time = calculate_stolen_time(now);
  
         return stime;
  }
@@ -341,7 +354,9 @@ void vtime_account_system(struct task_struct *tsk)
  
         if ((tsk->flags & PF_VCPU) && !irq_count()) {
                 acct->gtime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
                 acct->utime_scaled += stime_scaled;
+#endif
         } else {
                 if (hardirq_count())
                         acct->hardirq_time += stime;
@@ -350,7 +365,9 @@ void vtime_account_system(struct task_struct *tsk)
                 else
                         acct->stime += stime;
  
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
                 acct->stime_scaled += stime_scaled;
+#endif
         }
  }
  EXPORT_SYMBOL_GPL(vtime_account_system);
@@ -364,6 +381,21 @@ void vtime_account_idle(struct task_struct *tsk)
         acct->idle_time += stime + steal_time;
  }
  
+static void vtime_flush_scaled(struct task_struct *tsk,
+                              struct cpu_accounting_data *acct)
+{
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+       if (acct->utime_scaled)
+               tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
+       if (acct->stime_scaled)
+               tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
+
+       acct->utime_scaled = 0;
+       acct->utime_sspurr = 0;
+       acct->stime_scaled = 0;
+#endif
+}
+
  /*
   * Account the whole cputime accumulated in the paca
   * Must be called with interrupts disabled.
@@ -378,14 +410,13 @@ void vtime_flush(struct task_struct *tsk)
         if (acct->utime)
                 account_user_time(tsk, cputime_to_nsecs(acct->utime));
  
-       if (acct->utime_scaled)
-               tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
-
         if (acct->gtime)
                 account_guest_time(tsk, cputime_to_nsecs(acct->gtime));
  
-       if (acct->steal_time)
+       if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) {
                 account_steal_time(cputime_to_nsecs(acct->steal_time));
+               acct->steal_time = 0;
+       }
  
         if (acct->idle_time)
                 account_idle_time(cputime_to_nsecs(acct->idle_time));
@@ -393,8 +424,6 @@ void vtime_flush(struct task_struct *tsk)
         if (acct->stime)
                 account_system_index_time(tsk, cputime_to_nsecs(acct->stime),
                                           CPUTIME_SYSTEM);
-       if (acct->stime_scaled)
-               tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
  
         if (acct->hardirq_time)
                 account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time),
@@ -403,14 +432,12 @@ void vtime_flush(struct task_struct *tsk)
                 account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time),
                                           CPUTIME_SOFTIRQ);
  
+       vtime_flush_scaled(tsk, acct);
+
         acct->utime = 0;
-       acct->utime_scaled = 0;
-       acct->utime_sspurr = 0;
         acct->gtime = 0;
-       acct->steal_time = 0;
         acct->idle_time = 0;
         acct->stime = 0;
-       acct->stime_scaled = 0;
         acct->hardirq_time = 0;
         acct->softirq_time = 0;
  }
@@ -984,10 +1011,14 @@ static void register_decrementer_clockevent(int cpu)
         *dec = decrementer_clockevent;
         dec->cpumask = cpumask_of(cpu);
  
+       clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max);
+
         printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
                     dec->name, dec->mult, dec->shift, cpu);
  
-       clockevents_register_device(dec);
+       /* Set values for KVM, see kvm_emulate_dec() */
+       decrementer_clockevent.mult = dec->mult;
+       decrementer_clockevent.shift = dec->shift;
  }
  
  static void enable_large_decrementer(void)
@@ -1035,18 +1066,7 @@ static void __init set_decrementer_max(void)
  
  static void __init init_decrementer_clockevent(void)
  {
-       int cpu = smp_processor_id();
-
-       clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
-
-       decrementer_clockevent.max_delta_ns =
-               clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
-       decrementer_clockevent.max_delta_ticks = decrementer_max;
-       decrementer_clockevent.min_delta_ns =
-               clockevent_delta2ns(2, &decrementer_clockevent);
-       decrementer_clockevent.min_delta_ticks = 2;
-
-       register_decrementer_clockevent(cpu);
+       register_decrementer_clockevent(smp_processor_id());
  }
  
  void secondary_cpu_time_init(void)
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S

index 7716374..9fabdce 100644 (file)
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -92,13 +92,14 @@ _GLOBAL(tm_abort)
         blr
  EXPORT_SYMBOL_GPL(tm_abort);
  
-/* void tm_reclaim(struct thread_struct *thread,
+/*
+ * void tm_reclaim(struct thread_struct *thread,
   *                uint8_t cause)
   *
   *     - Performs a full reclaim.  This destroys outstanding
- *       transactions and updates thread->regs.tm_ckpt_* with the
- *       original checkpointed state.  Note that thread->regs is
- *       unchanged.
+ *       transactions and updates thread.ckpt_regs, thread.ckfp_state and
+ *       thread.ckvr_state with the original checkpointed state.  Note that
+ *       thread->regs is unchanged.
   *
   * Purpose is to both abort transactions of, and preserve the state of,
   * a transactions at a context switch. We preserve/restore both sets of process
@@ -163,15 +164,16 @@ _GLOBAL(tm_reclaim)
          */
         TRECLAIM(R4)                            /* Cause in r4 */
  
-       /* ******************** GPRs ******************** */
-       /* Stash the checkpointed r13 away in the scratch SPR and get the real
-        *  paca
+       /*
+        * ******************** GPRs ********************
+        * Stash the checkpointed r13 in the scratch SPR and get the real paca.
          */
         SET_SCRATCH0(r13)
         GET_PACA(r13)
  
-       /* Stash the checkpointed r1 away in paca tm_scratch and get the real
-        * stack pointer back
+       /*
+        * Stash the checkpointed r1 away in paca->tm_scratch and get the real
+        * stack pointer back into r1.
          */
         std     r1, PACATMSCRATCH(r13)
         ld      r1, PACAR1(r13)
@@ -209,14 +211,15 @@ _GLOBAL(tm_reclaim)
  
         addi    r7, r12, PT_CKPT_REGS           /* Thread's ckpt_regs */
  
-       /* Make r7 look like an exception frame so that we
-        * can use the neat GPRx(n) macros.  r7 is NOT a pt_regs ptr!
+       /*
+        * Make r7 look like an exception frame so that we can use the neat
+        * GPRx(n) macros. r7 is NOT a pt_regs ptr!
          */
         subi    r7, r7, STACK_FRAME_OVERHEAD
  
         /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
         SAVE_GPR(0, r7)                         /* user r0 */
-       SAVE_GPR(2, r7)                 /* user r2 */
+       SAVE_GPR(2, r7)                         /* user r2 */
         SAVE_4GPRS(3, r7)                       /* user r3-r6 */
         SAVE_GPR(8, r7)                         /* user r8 */
         SAVE_GPR(9, r7)                         /* user r9 */
@@ -237,7 +240,8 @@ _GLOBAL(tm_reclaim)
         /* ******************** NIP ******************** */
         mfspr   r3, SPRN_TFHAR
         std     r3, _NIP(r7)                    /* Returns to failhandler */
-       /* The checkpointed NIP is ignored when rescheduling/rechkpting,
+       /*
+        * The checkpointed NIP is ignored when rescheduling/rechkpting,
          * but is used in signal return to 'wind back' to the abort handler.
          */
  
@@ -260,12 +264,13 @@ _GLOBAL(tm_reclaim)
         std     r3, THREAD_TM_TAR(r12)
         std     r4, THREAD_TM_DSCR(r12)
  
-       /* MSR and flags:  We don't change CRs, and we don't need to alter
-        * MSR.
+       /*
+        * MSR and flags: We don't change CRs, and we don't need to alter MSR.
          */
  
  
-       /* ******************** FPR/VR/VSRs ************
+       /*
+        * ******************** FPR/VR/VSRs ************
          * After reclaiming, capture the checkpointed FPRs/VRs.
          *
          * We enabled VEC/FP/VSX in the msr above, so we can execute these
@@ -275,7 +280,7 @@ _GLOBAL(tm_reclaim)
  
         /* Altivec (VEC/VMX/VR)*/
         addi    r7, r3, THREAD_CKVRSTATE
-       SAVE_32VRS(0, r6, r7)   /* r6 scratch, r7 transact vr state */
+       SAVE_32VRS(0, r6, r7)   /* r6 scratch, r7 ckvr_state */
         mfvscr  v0
         li      r6, VRSTATE_VSCR
         stvx    v0, r7, r6
@@ -286,12 +291,13 @@ _GLOBAL(tm_reclaim)
  
         /* Floating Point (FP) */
         addi    r7, r3, THREAD_CKFPSTATE
-       SAVE_32FPRS_VSRS(0, R6, R7)     /* r6 scratch, r7 transact fp state */
+       SAVE_32FPRS_VSRS(0, R6, R7)     /* r6 scratch, r7 ckfp_state */
         mffs    fr0
         stfd    fr0,FPSTATE_FPSCR(r7)
  
  
-       /* TM regs, incl TEXASR -- these live in thread_struct.  Note they've
+       /*
+        * TM regs, incl TEXASR -- these live in thread_struct.  Note they've
          * been updated by the treclaim, to explain to userland the failure
          * cause (aborted).
          */
@@ -327,7 +333,7 @@ _GLOBAL(tm_reclaim)
         blr
  
  
-       /*
+       /*
          * void __tm_recheckpoint(struct thread_struct *thread)
          *      - Restore the checkpointed register state saved by tm_reclaim
          *        when we switch_to a process.
@@ -343,7 +349,8 @@ _GLOBAL(__tm_recheckpoint)
         std     r2, STK_GOT(r1)
         stdu    r1, -TM_FRAME_SIZE(r1)
  
-       /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
+       /*
+        * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
          * This is used for backing up the NVGPRs:
          */
         SAVE_NVGPRS(r1)
@@ -352,8 +359,9 @@ _GLOBAL(__tm_recheckpoint)
  
         addi    r7, r3, PT_CKPT_REGS            /* Thread's ckpt_regs */
  
-       /* Make r7 look like an exception frame so that we
-        * can use the neat GPRx(n) macros.  r7 is now NOT a pt_regs ptr!
+       /*
+        * Make r7 look like an exception frame so that we can use the neat
+        * GPRx(n) macros. r7 is now NOT a pt_regs ptr!
          */
         subi    r7, r7, STACK_FRAME_OVERHEAD
  
@@ -421,14 +429,15 @@ restore_gprs:
  
         REST_NVGPRS(r7)                         /* GPR14-31 */
  
-       /* Load up PPR and DSCR here so we don't run with user values for long
-        */
+       /* Load up PPR and DSCR here so we don't run with user values for long */
         mtspr   SPRN_DSCR, r5
         mtspr   SPRN_PPR, r6
  
-       /* Do final sanity check on TEXASR to make sure FS is set.  Do this
+       /*
+        * Do final sanity check on TEXASR to make sure FS is set. Do this
          * here before we load up the userspace r1 so any bugs we hit will get
-        * a call chain */
+        * a call chain.
+        */
         mfspr   r5, SPRN_TEXASR
         srdi    r5, r5, 16
         li      r6, (TEXASR_FS)@h
@@ -436,8 +445,9 @@ restore_gprs:
  1:     tdeqi   r6, 0
         EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
  
-       /* Do final sanity check on MSR to make sure we are not transactional
-        * or suspended
+       /*
+        * Do final sanity check on MSR to make sure we are not transactional
+        * or suspended.
          */
         mfmsr   r6
         li      r5, (MSR_TS_MASK)@higher
@@ -453,8 +463,8 @@ restore_gprs:
         REST_GPR(6, r7)
  
         /*
-        * Store r1 and r5 on the stack so that we can access them
-        * after we clear MSR RI.
+        * Store r1 and r5 on the stack so that we can access them after we
+        * clear MSR RI.
          */
  
         REST_GPR(5, r7)
@@ -484,7 +494,8 @@ restore_gprs:
  
         HMT_MEDIUM
  
-       /* Our transactional state has now changed.
+       /*
+        * Our transactional state has now changed.
          *
          * Now just get out of here.  Transactional (current) state will be
          * updated once restore is called on the return path in the _switch-ed
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile

index d22d8ba..b1725ad 100644 (file)
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -3,11 +3,9 @@
  # Makefile for the powerpc trace subsystem
  #
  
-subdir-ccflags-$(CONFIG_PPC_WERROR)    := -Werror
-
  ifdef CONFIG_FUNCTION_TRACER
  # do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
  endif
  
  obj32-$(CONFIG_FUNCTION_TRACER)                += ftrace_32.o
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c

index 4bfbb54..4bf051d 100644 (file)
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -30,6 +30,16 @@
  
  
  #ifdef CONFIG_DYNAMIC_FTRACE
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#define        NUM_FTRACE_TRAMPS       8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
  static unsigned int
  ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
  {
@@ -85,13 +95,16 @@ static int test_24bit_addr(unsigned long ip, unsigned long addr)
         return create_branch((unsigned int *)ip, addr, 0);
  }
  
-#ifdef CONFIG_MODULES
-
  static int is_bl_op(unsigned int op)
  {
         return (op & 0xfc000003) == 0x48000001;
  }
  
+static int is_b_op(unsigned int op)
+{
+       return (op & 0xfc000003) == 0x48000000;
+}
+
  static unsigned long find_bl_target(unsigned long ip, unsigned int op)
  {
         static int offset;
@@ -104,6 +117,7 @@ static unsigned long find_bl_target(unsigned long ip, unsigned int op)
         return ip + (long)offset;
  }
  
+#ifdef CONFIG_MODULES
  #ifdef CONFIG_PPC64
  static int
  __ftrace_make_nop(struct module *mod,
@@ -270,6 +284,146 @@ __ftrace_make_nop(struct module *mod,
  #endif /* PPC64 */
  #endif /* CONFIG_MODULES */
  
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+       int i;
+
+       /*
+        * We have the compiler generated long_branch tramps at the end
+        * and we prefer those
+        */
+       for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+               if (!ftrace_tramps[i])
+                       continue;
+               else if (create_branch((void *)ip, ftrace_tramps[i], 0))
+                       return ftrace_tramps[i];
+
+       return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+       int i;
+
+       for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+               if (!ftrace_tramps[i]) {
+                       ftrace_tramps[i] = tramp;
+                       return 0;
+               }
+
+       return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+       int i, op;
+       unsigned long ptr;
+       static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
+
+       /* Is this a known long jump tramp? */
+       for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+               if (!ftrace_tramps[i])
+                       break;
+               else if (ftrace_tramps[i] == tramp)
+                       return 0;
+
+       /* Is this a known plt tramp? */
+       for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+               if (!ftrace_plt_tramps[i])
+                       break;
+               else if (ftrace_plt_tramps[i] == tramp)
+                       return -1;
+
+       /* New trampoline -- read where this goes */
+       if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) {
+               pr_debug("Fetching opcode failed.\n");
+               return -1;
+       }
+
+       /* Is this a 24 bit branch? */
+       if (!is_b_op(op)) {
+               pr_debug("Trampoline is not a long branch tramp.\n");
+               return -1;
+       }
+
+       /* lets find where the pointer goes */
+       ptr = find_bl_target(tramp, op);
+
+       if (ptr != ppc_global_function_entry((void *)_mcount)) {
+               pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+               return -1;
+       }
+
+       /* Let's re-write the tramp to go to ftrace_[regs_]caller */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+       ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+       ptr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+       if (!create_branch((void *)tramp, ptr, 0)) {
+               pr_debug("%ps is not reachable from existing mcount tramp\n",
+                               (void *)ptr);
+               return -1;
+       }
+
+       if (patch_branch((unsigned int *)tramp, ptr, 0)) {
+               pr_debug("REL24 out of range!\n");
+               return -1;
+       }
+
+       if (add_ftrace_tramp(tramp)) {
+               pr_debug("No tramp locations left\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+       unsigned long tramp, ip = rec->ip;
+       unsigned int op;
+
+       /* Read where this goes */
+       if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+               pr_err("Fetching opcode failed.\n");
+               return -EFAULT;
+       }
+
+       /* Make sure that that this is still a 24bit jump */
+       if (!is_bl_op(op)) {
+               pr_err("Not expected bl: opcode is %x\n", op);
+               return -EINVAL;
+       }
+
+       /* Let's find where the pointer goes */
+       tramp = find_bl_target(ip, op);
+
+       pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+       if (setup_mcount_compiler_tramp(tramp)) {
+               /* Are other trampolines reachable? */
+               if (!find_ftrace_tramp(ip)) {
+                       pr_err("No ftrace trampolines reachable from %ps\n",
+                                       (void *)ip);
+                       return -EINVAL;
+               }
+       }
+
+       if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+               pr_err("Patching NOP failed.\n");
+               return -EPERM;
+       }
+
+       return 0;
+}
+
  int ftrace_make_nop(struct module *mod,
                     struct dyn_ftrace *rec, unsigned long addr)
  {
@@ -286,7 +440,8 @@ int ftrace_make_nop(struct module *mod,
                 old = ftrace_call_replace(ip, addr, 1);
                 new = PPC_INST_NOP;
                 return ftrace_modify_code(ip, old, new);
-       }
+       } else if (core_kernel_text(ip))
+               return __ftrace_make_nop_kernel(rec, addr);
  
  #ifdef CONFIG_MODULES
         /*
@@ -456,6 +611,53 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
  #endif /* CONFIG_PPC64 */
  #endif /* CONFIG_MODULES */
  
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+       unsigned int op;
+       void *ip = (void *)rec->ip;
+       unsigned long tramp, entry, ptr;
+
+       /* Make sure we're being asked to patch branch to a known ftrace addr */
+       entry = ppc_global_function_entry((void *)ftrace_caller);
+       ptr = ppc_global_function_entry((void *)addr);
+
+       if (ptr != entry) {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+               entry = ppc_global_function_entry((void *)ftrace_regs_caller);
+               if (ptr != entry) {
+#endif
+                       pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
+                       return -EINVAL;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+               }
+#endif
+       }
+
+       /* Make sure we have a nop */
+       if (probe_kernel_read(&op, ip, sizeof(op))) {
+               pr_err("Unable to read ftrace location %p\n", ip);
+               return -EFAULT;
+       }
+
+       if (op != PPC_INST_NOP) {
+               pr_err("Unexpected call sequence at %p: %x\n", ip, op);
+               return -EINVAL;
+       }
+
+       tramp = find_ftrace_tramp((unsigned long)ip);
+       if (!tramp) {
+               pr_err("No ftrace trampolines reachable from %ps\n", ip);
+               return -EINVAL;
+       }
+
+       if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+               pr_err("Error patching branch to ftrace tramp!\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
  int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
  {
         unsigned long ip = rec->ip;
@@ -471,7 +673,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
                 old = PPC_INST_NOP;
                 new = ftrace_call_replace(ip, addr, 1);
                 return ftrace_modify_code(ip, old, new);
-       }
+       } else if (core_kernel_text(ip))
+               return __ftrace_make_call_kernel(rec, addr);
  
  #ifdef CONFIG_MODULES
         /*
@@ -603,6 +806,12 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
                 old = ftrace_call_replace(ip, old_addr, 1);
                 new = ftrace_call_replace(ip, addr, 1);
                 return ftrace_modify_code(ip, old, new);
+       } else if (core_kernel_text(ip)) {
+               /*
+                * We always patch out of range locations to go to the regs
+                * variant, so there is nothing to do here
+                */
+               return 0;
         }
  
  #ifdef CONFIG_MODULES
@@ -654,10 +863,54 @@ void arch_ftrace_update_code(int command)
         ftrace_modify_all_code(command);
  }
  
+#ifdef CONFIG_PPC64
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
+int __init ftrace_dyn_arch_init(void)
+{
+       int i;
+       unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+       u32 stub_insns[] = {
+               0xe98d0000 | PACATOC,   /* ld      r12,PACATOC(r13)     */
+               0x3d8c0000,             /* addis   r12,r12,<high>       */
+               0x398c0000,             /* addi    r12,r12,<low>        */
+               0x7d8903a6,             /* mtctr   r12                  */
+               0x4e800420,             /* bctr                         */
+       };
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+       unsigned long addr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+       unsigned long addr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+       long reladdr = addr - kernel_toc_addr();
+
+       if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+               pr_err("Address of %ps out of range of kernel_toc.\n",
+                               (void *)addr);
+               return -1;
+       }
+
+       for (i = 0; i < 2; i++) {
+               memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+               tramp[i][1] |= PPC_HA(reladdr);
+               tramp[i][2] |= PPC_LO(reladdr);
+               add_ftrace_tramp((unsigned long)tramp[i]);
+       }
+
+       return 0;
+}
+#else
  int __init ftrace_dyn_arch_init(void)
  {
         return 0;
  }
+#endif
  #endif /* CONFIG_DYNAMIC_FTRACE */
  
  #ifdef CONFIG_FUNCTION_GRAPH_TRACER
diff --git a/arch/powerpc/kernel/trace/ftrace_64.S b/arch/powerpc/kernel/trace/ftrace_64.S

index e25f77c..1782af2 100644 (file)
--- a/arch/powerpc/kernel/trace/ftrace_64.S
+++ b/arch/powerpc/kernel/trace/ftrace_64.S
@@ -14,6 +14,18 @@
  #include <asm/ppc-opcode.h>
  #include <asm/export.h>
  
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+       .space 64
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+       .space 64
+.popsection
+
  _GLOBAL(mcount)
  _GLOBAL(_mcount)
  EXPORT_SYMBOL(_mcount)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c

index ab1bd06..9a86572 100644 (file)
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -247,8 +247,6 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
                 mdelay(MSEC_PER_SEC);
         }
  
-       if (in_interrupt())
-               panic("Fatal exception in interrupt");
         if (panic_on_oops)
                 panic("Fatal exception");
         do_exit(signr);
@@ -535,10 +533,10 @@ int machine_check_e500mc(struct pt_regs *regs)
         printk("Caused by (from MCSR=%lx): ", reason);
  
         if (reason & MCSR_MCP)
-               printk("Machine Check Signal\n");
+               pr_cont("Machine Check Signal\n");
  
         if (reason & MCSR_ICPERR) {
-               printk("Instruction Cache Parity Error\n");
+               pr_cont("Instruction Cache Parity Error\n");
  
                 /*
                  * This is recoverable by invalidating the i-cache.
@@ -556,7 +554,7 @@ int machine_check_e500mc(struct pt_regs *regs)
         }
  
         if (reason & MCSR_DCPERR_MC) {
-               printk("Data Cache Parity Error\n");
+               pr_cont("Data Cache Parity Error\n");
  
                 /*
                  * In write shadow mode we auto-recover from the error, but it
@@ -575,38 +573,38 @@ int machine_check_e500mc(struct pt_regs *regs)
         }
  
         if (reason & MCSR_L2MMU_MHIT) {
-               printk("Hit on multiple TLB entries\n");
+               pr_cont("Hit on multiple TLB entries\n");
                 recoverable = 0;
         }
  
         if (reason & MCSR_NMI)
-               printk("Non-maskable interrupt\n");
+               pr_cont("Non-maskable interrupt\n");
  
         if (reason & MCSR_IF) {
-               printk("Instruction Fetch Error Report\n");
+               pr_cont("Instruction Fetch Error Report\n");
                 recoverable = 0;
         }
  
         if (reason & MCSR_LD) {
-               printk("Load Error Report\n");
+               pr_cont("Load Error Report\n");
                 recoverable = 0;
         }
  
         if (reason & MCSR_ST) {
-               printk("Store Error Report\n");
+               pr_cont("Store Error Report\n");
                 recoverable = 0;
         }
  
         if (reason & MCSR_LDG) {
-               printk("Guarded Load Error Report\n");
+               pr_cont("Guarded Load Error Report\n");
                 recoverable = 0;
         }
  
         if (reason & MCSR_TLBSYNC)
-               printk("Simultaneous tlbsync operations\n");
+               pr_cont("Simultaneous tlbsync operations\n");
  
         if (reason & MCSR_BSL2_ERR) {
-               printk("Level 2 Cache Error\n");
+               pr_cont("Level 2 Cache Error\n");
                 recoverable = 0;
         }
  
@@ -616,7 +614,7 @@ int machine_check_e500mc(struct pt_regs *regs)
                 addr = mfspr(SPRN_MCAR);
                 addr |= (u64)mfspr(SPRN_MCARU) << 32;
  
-               printk("Machine Check %s Address: %#llx\n",
+               pr_cont("Machine Check %s Address: %#llx\n",
                        reason & MCSR_MEA ? "Effective" : "Physical", addr);
         }
  
@@ -640,29 +638,29 @@ int machine_check_e500(struct pt_regs *regs)
         printk("Caused by (from MCSR=%lx): ", reason);
  
         if (reason & MCSR_MCP)
-               printk("Machine Check Signal\n");
+               pr_cont("Machine Check Signal\n");
         if (reason & MCSR_ICPERR)
-               printk("Instruction Cache Parity Error\n");
+               pr_cont("Instruction Cache Parity Error\n");
         if (reason & MCSR_DCP_PERR)
-               printk("Data Cache Push Parity Error\n");
+               pr_cont("Data Cache Push Parity Error\n");
         if (reason & MCSR_DCPERR)
-               printk("Data Cache Parity Error\n");
+               pr_cont("Data Cache Parity Error\n");
         if (reason & MCSR_BUS_IAERR)
-               printk("Bus - Instruction Address Error\n");
+               pr_cont("Bus - Instruction Address Error\n");
         if (reason & MCSR_BUS_RAERR)
-               printk("Bus - Read Address Error\n");
+               pr_cont("Bus - Read Address Error\n");
         if (reason & MCSR_BUS_WAERR)
-               printk("Bus - Write Address Error\n");
+               pr_cont("Bus - Write Address Error\n");
         if (reason & MCSR_BUS_IBERR)
-               printk("Bus - Instruction Data Error\n");
+               pr_cont("Bus - Instruction Data Error\n");
         if (reason & MCSR_BUS_RBERR)
-               printk("Bus - Read Data Bus Error\n");
+               pr_cont("Bus - Read Data Bus Error\n");
         if (reason & MCSR_BUS_WBERR)
-               printk("Bus - Write Data Bus Error\n");
+               pr_cont("Bus - Write Data Bus Error\n");
         if (reason & MCSR_BUS_IPERR)
-               printk("Bus - Instruction Parity Error\n");
+               pr_cont("Bus - Instruction Parity Error\n");
         if (reason & MCSR_BUS_RPERR)
-               printk("Bus - Read Parity Error\n");
+               pr_cont("Bus - Read Parity Error\n");
  
         return 0;
  }
@@ -680,19 +678,19 @@ int machine_check_e200(struct pt_regs *regs)
         printk("Caused by (from MCSR=%lx): ", reason);
  
         if (reason & MCSR_MCP)
-               printk("Machine Check Signal\n");
+               pr_cont("Machine Check Signal\n");
         if (reason & MCSR_CP_PERR)
-               printk("Cache Push Parity Error\n");
+               pr_cont("Cache Push Parity Error\n");
         if (reason & MCSR_CPERR)
-               printk("Cache Parity Error\n");
+               pr_cont("Cache Parity Error\n");
         if (reason & MCSR_EXCP_ERR)
-               printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
+               pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
         if (reason & MCSR_BUS_IRERR)
-               printk("Bus - Read Bus Error on instruction fetch\n");
+               pr_cont("Bus - Read Bus Error on instruction fetch\n");
         if (reason & MCSR_BUS_DRERR)
-               printk("Bus - Read Bus Error on data load\n");
+               pr_cont("Bus - Read Bus Error on data load\n");
         if (reason & MCSR_BUS_WRERR)
-               printk("Bus - Write Bus Error on buffered store or cache line push\n");
+               pr_cont("Bus - Write Bus Error on buffered store or cache line push\n");
  
         return 0;
  }
@@ -705,30 +703,30 @@ int machine_check_generic(struct pt_regs *regs)
         printk("Caused by (from SRR1=%lx): ", reason);
         switch (reason & 0x601F0000) {
         case 0x80000:
-               printk("Machine check signal\n");
+               pr_cont("Machine check signal\n");
                 break;
         case 0:         /* for 601 */
         case 0x40000:
         case 0x140000:  /* 7450 MSS error and TEA */
-               printk("Transfer error ack signal\n");
+               pr_cont("Transfer error ack signal\n");
                 break;
         case 0x20000:
-               printk("Data parity error signal\n");
+               pr_cont("Data parity error signal\n");
                 break;
         case 0x10000:
-               printk("Address parity error signal\n");
+               pr_cont("Address parity error signal\n");
                 break;
         case 0x20000000:
-               printk("L1 Data Cache error\n");
+               pr_cont("L1 Data Cache error\n");
                 break;
         case 0x40000000:
-               printk("L1 Instruction Cache error\n");
+               pr_cont("L1 Instruction Cache error\n");
                 break;
         case 0x00100000:
-               printk("L2 data cache parity error\n");
+               pr_cont("L2 data cache parity error\n");
                 break;
         default:
-               printk("Unknown values in msr\n");
+               pr_cont("Unknown values in msr\n");
         }
         return 0;
  }
@@ -741,9 +739,7 @@ void machine_check_exception(struct pt_regs *regs)
         if (!nested)
                 nmi_enter();
  
-       /* 64s accounts the mce in machine_check_early when in HVMODE */
-       if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE))
-               __this_cpu_inc(irq_stat.mce_exceptions);
+       __this_cpu_inc(irq_stat.mce_exceptions);
  
         add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
  
@@ -767,12 +763,17 @@ void machine_check_exception(struct pt_regs *regs)
         if (check_io_access(regs))
                 goto bail;
  
-       die("Machine check", regs, SIGBUS);
-
         /* Must die if the interrupt is not recoverable */
         if (!(regs->msr & MSR_RI))
                 nmi_panic(regs, "Unrecoverable Machine check");
  
+       if (!nested)
+               nmi_exit();
+
+       die("Machine check", regs, SIGBUS);
+
+       return;
+
  bail:
         if (!nested)
                 nmi_exit();
@@ -1433,7 +1434,7 @@ void program_check_exception(struct pt_regs *regs)
                         goto bail;
                 } else {
                         printk(KERN_EMERG "Unexpected TM Bad Thing exception "
-                              "at %lx (msr 0x%x)\n", regs->nip, reason);
+                              "at %lx (msr 0x%lx)\n", regs->nip, regs->msr);
                         die("Unrecoverable exception", regs, SIGABRT);
                 }
         }
@@ -1547,14 +1548,6 @@ void StackOverflow(struct pt_regs *regs)
         panic("kernel stack overflow");
  }
  
-void nonrecoverable_exception(struct pt_regs *regs)
-{
-       printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n",
-              regs->nip, regs->msr);
-       debugger(regs);
-       die("nonrecoverable exception", regs, SIGKILL);
-}
-
  void kernel_fp_unavailable_exception(struct pt_regs *regs)
  {
         enum ctx_state prev_state = exception_enter();
@@ -1750,16 +1743,20 @@ void fp_unavailable_tm(struct pt_regs *regs)
           * checkpointed FP registers need to be loaded.
          */
         tm_reclaim_current(TM_CAUSE_FAC_UNAV);
-       /* Reclaim didn't save out any FPRs to transact_fprs. */
+
+       /*
+        * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
+        * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
+        *
+        * At this point, ck{fp,vr}_state contains the exact values we want to
+        * recheckpoint.
+        */
  
         /* Enable FP for the task: */
         current->thread.load_fp = 1;
  
-       /* This loads and recheckpoints the FP registers from
-        * thread.fpr[].  They will remain in registers after the
-        * checkpoint so we don't need to reload them after.
-        * If VMX is in use, the VRs now hold checkpointed values,
-        * so we don't want to load the VRs from the thread_struct.
+       /*
+        * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
          */
         tm_recheckpoint(&current->thread);
  }
@@ -2086,8 +2083,8 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
   */
  void unrecoverable_exception(struct pt_regs *regs)
  {
-       printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n",
-              regs->trap, regs->nip);
+       pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
+                regs->trap, regs->nip, regs->msr);
         die("Unrecoverable exception", regs, SIGABRT);
  }
  NOKPROBE_SYMBOL(unrecoverable_exception);
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S

index 3745113..2a7eb54 100644 (file)
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -37,6 +37,7 @@ data_page_branch:
         mtlr    r0
         addi    r3, r3, __kernel_datapage_offset-data_page_branch
         lwz     r0,0(r3)
+  .cfi_restore lr
         add     r3,r0,r3
         blr
    .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S

index 769c262..1e0bc59 100644 (file)
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -139,6 +139,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
          */
  99:
         li      r0,__NR_clock_gettime
+  .cfi_restore lr
         sc
         blr
    .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S

index abf17fe..bf96686 100644 (file)
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -37,6 +37,7 @@ data_page_branch:
         mtlr    r0
         addi    r3, r3, __kernel_datapage_offset-data_page_branch
         lwz     r0,0(r3)
+  .cfi_restore lr
         add     r3,r0,r3
         blr
    .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S

index c002adc..a4ed9ed 100644 (file)
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -169,6 +169,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime)
          */
  99:
         li      r0,__NR_clock_gettime
+  .cfi_restore lr
         sc
         blr
    .cfi_endproc
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S

index 105a976..434581b 100644 (file)
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -4,6 +4,9 @@
  #else
  #define PROVIDE32(x)   PROVIDE(x)
  #endif
+
+#define BSS_FIRST_SECTIONS *(.bss.prominit)
+
  #include <asm/page.h>
  #include <asm-generic/vmlinux.lds.h>
  #include <asm/cache.h>
@@ -99,6 +102,9 @@ SECTIONS
  #endif
                 /* careful! __ftr_alt_* sections need to be close to .text */
                 *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
+#ifdef CONFIG_PPC64
+               *(.tramp.ftrace.text);
+#endif
                 SCHED_TEXT
                 CPUIDLE_TEXT
                 LOCK_TEXT
@@ -181,7 +187,15 @@ SECTIONS
   */
         . = ALIGN(STRICT_ALIGN_SIZE);
         __init_begin = .;
-       INIT_TEXT_SECTION(PAGE_SIZE) :kernel
+       . = ALIGN(PAGE_SIZE);
+       .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+               _sinittext = .;
+               INIT_TEXT
+               _einittext = .;
+#ifdef CONFIG_PPC64
+               *(.tramp.ftrace.init);
+#endif
+       } :kernel
  
         /* .exit.text is discarded at runtime, not link time,
          * to deal with references from __bug_table
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile

index e814f40..64f1135 100644 (file)
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -3,8 +3,6 @@
  # Makefile for Kernel-based Virtual Machine module
  #
  
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
  ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
  KVM := ../../../virt/kvm
  
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile

index 6702868..3bf9fc6 100644 (file)
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -3,8 +3,6 @@
  # Makefile for ppc-specific library files..
  #
  
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
  ccflags-$(CONFIG_PPC64)        := $(NO_MINIMAL_TOC)
  
  CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
@@ -14,6 +12,8 @@ obj-y += string.o alloc.o code-patching.o feature-fixups.o
  
  obj-$(CONFIG_PPC32)    += div64.o copy_32.o crtsavres.o strlen_32.o
  
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
  # See corresponding test in arch/powerpc/Makefile
  # 64-bit linker creates .sfpr on demand for final link (vmlinux),
  # so it is only needed for modules, and only for older linkers which
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c

index 5ffee29..89502cb 100644 (file)
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -98,8 +98,7 @@ static int map_patch_area(void *addr, unsigned long text_poke_addr)
         else
                 pfn = __pa_symbol(addr) >> PAGE_SHIFT;
  
-       err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT),
-                               pgprot_val(PAGE_KERNEL));
+       err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
  
         pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
         if (err)
diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c

new file mode 100644 (file)

index 0000000..407b992
--- /dev/null
+++ b/arch/powerpc/lib/error-inject.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+       /*
+        * Emulate 'blr'. 'regs' represents the state on entry of a predefined
+        * function in the kernel/module, captured on a kprobe. We don't need
+        * to worry about 32-bit userspace on a 64-bit kernel.
+        */
+       regs->nip = regs->link;
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S

index ec531de..3c3be02 100644 (file)
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -40,7 +40,7 @@ _GLOBAL(memset)
  .Lms:  PPC_MTOCRF(1,r0)
         mr      r6,r3
         blt     cr1,8f
-       beq+    3f                      /* if already 8-byte aligned */
+       beq     3f                      /* if already 8-byte aligned */
         subf    r5,r0,r5
         bf      31,1f
         stb     r4,0(r6)
@@ -85,7 +85,7 @@ _GLOBAL(memset)
         addi    r6,r6,8
  8:     cmpwi   r5,0
         PPC_MTOCRF(1,r5)
-       beqlr+
+       beqlr
         bf      29,9f
         stw     r4,0(r6)
         addi    r6,r6,4
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c

index cf77d75..36484a2 100644 (file)
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -67,7 +67,7 @@ void __init MMU_init_hw(void)
         /* PIN up to the 3 first 8Mb after IMMR in DTLB table */
  #ifdef CONFIG_PIN_TLB_DATA
         unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
-       unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY;
+       unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY;
  #ifdef CONFIG_PIN_TLB_IMMR
         int i = 29;
  #else
@@ -91,11 +91,10 @@ static void __init mmu_mapin_immr(void)
  {
         unsigned long p = PHYS_IMMR_BASE;
         unsigned long v = VIRT_IMMR_BASE;
-       unsigned long f = pgprot_val(PAGE_KERNEL_NCG);
         int offset;
  
         for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
-               map_kernel_page(v + offset, p + offset, f);
+               map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
  }
  
  /* Address of instructions to patch */
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile

index cdf6a99..ca96e7b 100644 (file)
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -3,10 +3,10 @@
  # Makefile for the linux ppc-specific parts of the memory manager.
  #
  
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
  ccflags-$(CONFIG_PPC64)        := $(NO_MINIMAL_TOC)
  
+CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
+
  obj-y                          := fault.o mem.o pgtable.o mmap.o \
                                    init_$(BITS).o pgtable_$(BITS).o \
                                    init-common.o mmu_context.o drmem.o
@@ -15,7 +15,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH)  += mmu_context_nohash.o tlb_nohash.o \
  obj-$(CONFIG_PPC_BOOK3E)       += tlb_low_$(BITS)e.o
  hash64-$(CONFIG_PPC_NATIVE)    := hash_native_64.o
  obj-$(CONFIG_PPC_BOOK3E_64)   += pgtable-book3e.o
-obj-$(CONFIG_PPC_BOOK3S_64)    += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
+obj-$(CONFIG_PPC_BOOK3S_64)    += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
  obj-$(CONFIG_PPC_RADIX_MMU)    += pgtable-radix.o tlb-radix.o
  obj-$(CONFIG_PPC_STD_MMU_32)   += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
  obj-$(CONFIG_PPC_STD_MMU)      += tlb_hash$(BITS).o
@@ -43,5 +43,12 @@ obj-$(CONFIG_HIGHMEM)                += highmem.o
  obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
  obj-$(CONFIG_SPAPR_TCE_IOMMU)  += mmu_context_iommu.o
  obj-$(CONFIG_PPC_PTDUMP)       += dump_linuxpagetables.o
+ifdef CONFIG_PPC_PTDUMP
+obj-$(CONFIG_4xx)              += dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_8xx)          += dump_linuxpagetables-8xx.o
+obj-$(CONFIG_PPC_BOOK3E_MMU)   += dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_BOOK3S_32)    += dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_BOOK3S_64)    += dump_linuxpagetables-book3s64.o
+endif
  obj-$(CONFIG_PPC_HTDUMP)       += dump_hashpagetable.o
  obj-$(CONFIG_PPC_MEM_KEYS)     += pkeys.o
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c

index 3825284..b6e7b59 100644 (file)
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -228,7 +228,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
                 do {
                         SetPageReserved(page);
                         map_kernel_page(vaddr, page_to_phys(page),
-                                pgprot_val(pgprot_noncached(PAGE_KERNEL)));
+                                       pgprot_noncached(PAGE_KERNEL));
                         page++;
                         vaddr += PAGE_SIZE;
                 } while (size -= PAGE_SIZE);
diff --git a/arch/powerpc/mm/dump_linuxpagetables-8xx.c b/arch/powerpc/mm/dump_linuxpagetables-8xx.c

new file mode 100644 (file)

index 0000000..ab9e3f2
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables-8xx.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <asm/pgtable.h>
+
+#include "dump_linuxpagetables.h"
+
+static const struct flag_info flag_array[] = {
+       {
+               .mask   = _PAGE_SH,
+               .val    = 0,
+               .set    = "user",
+               .clear  = "    ",
+       }, {
+               .mask   = _PAGE_RO | _PAGE_NA,
+               .val    = 0,
+               .set    = "rw",
+       }, {
+               .mask   = _PAGE_RO | _PAGE_NA,
+               .val    = _PAGE_RO,
+               .set    = "r ",
+       }, {
+               .mask   = _PAGE_RO | _PAGE_NA,
+               .val    = _PAGE_NA,
+               .set    = "  ",
+       }, {
+               .mask   = _PAGE_EXEC,
+               .val    = _PAGE_EXEC,
+               .set    = " X ",
+               .clear  = "   ",
+       }, {
+               .mask   = _PAGE_PRESENT,
+               .val    = _PAGE_PRESENT,
+               .set    = "present",
+               .clear  = "       ",
+       }, {
+               .mask   = _PAGE_GUARDED,
+               .val    = _PAGE_GUARDED,
+               .set    = "guarded",
+               .clear  = "       ",
+       }, {
+               .mask   = _PAGE_DIRTY,
+               .val    = _PAGE_DIRTY,
+               .set    = "dirty",
+               .clear  = "     ",
+       }, {
+               .mask   = _PAGE_ACCESSED,
+               .val    = _PAGE_ACCESSED,
+               .set    = "accessed",
+               .clear  = "        ",
+       }, {
+               .mask   = _PAGE_NO_CACHE,
+               .val    = _PAGE_NO_CACHE,
+               .set    = "no cache",
+               .clear  = "        ",
+       }, {
+               .mask   = _PAGE_SPECIAL,
+               .val    = _PAGE_SPECIAL,
+               .set    = "special",
+       }
+};
+
+struct pgtable_level pg_level[5] = {
+       {
+       }, { /* pgd */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       }, { /* pud */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       }, { /* pmd */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       }, { /* pte */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       },
+};
diff --git a/arch/powerpc/mm/dump_linuxpagetables-book3s64.c b/arch/powerpc/mm/dump_linuxpagetables-book3s64.c

new file mode 100644 (file)

index 0000000..ed6fcf7
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables-book3s64.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <asm/pgtable.h>
+
+#include "dump_linuxpagetables.h"
+
+static const struct flag_info flag_array[] = {
+       {
+               .mask   = _PAGE_PRIVILEGED,
+               .val    = 0,
+               .set    = "user",
+               .clear  = "    ",
+       }, {
+               .mask   = _PAGE_READ,
+               .val    = _PAGE_READ,
+               .set    = "r",
+               .clear  = " ",
+       }, {
+               .mask   = _PAGE_WRITE,
+               .val    = _PAGE_WRITE,
+               .set    = "w",
+               .clear  = " ",
+       }, {
+               .mask   = _PAGE_EXEC,
+               .val    = _PAGE_EXEC,
+               .set    = " X ",
+               .clear  = "   ",
+       }, {
+               .mask   = _PAGE_PTE,
+               .val    = _PAGE_PTE,
+               .set    = "pte",
+               .clear  = "   ",
+       }, {
+               .mask   = _PAGE_PRESENT,
+               .val    = _PAGE_PRESENT,
+               .set    = "valid",
+               .clear  = "     ",
+       }, {
+               .mask   = _PAGE_PRESENT | _PAGE_INVALID,
+               .val    = 0,
+               .set    = "       ",
+               .clear  = "present",
+       }, {
+               .mask   = H_PAGE_HASHPTE,
+               .val    = H_PAGE_HASHPTE,
+               .set    = "hpte",
+               .clear  = "    ",
+       }, {
+               .mask   = _PAGE_DIRTY,
+               .val    = _PAGE_DIRTY,
+               .set    = "dirty",
+               .clear  = "     ",
+       }, {
+               .mask   = _PAGE_ACCESSED,
+               .val    = _PAGE_ACCESSED,
+               .set    = "accessed",
+               .clear  = "        ",
+       }, {
+               .mask   = _PAGE_NON_IDEMPOTENT,
+               .val    = _PAGE_NON_IDEMPOTENT,
+               .set    = "non-idempotent",
+               .clear  = "              ",
+       }, {
+               .mask   = _PAGE_TOLERANT,
+               .val    = _PAGE_TOLERANT,
+               .set    = "tolerant",
+               .clear  = "        ",
+       }, {
+               .mask   = H_PAGE_BUSY,
+               .val    = H_PAGE_BUSY,
+               .set    = "busy",
+       }, {
+#ifdef CONFIG_PPC_64K_PAGES
+               .mask   = H_PAGE_COMBO,
+               .val    = H_PAGE_COMBO,
+               .set    = "combo",
+       }, {
+               .mask   = H_PAGE_4K_PFN,
+               .val    = H_PAGE_4K_PFN,
+               .set    = "4K_pfn",
+       }, {
+#else /* CONFIG_PPC_64K_PAGES */
+               .mask   = H_PAGE_F_GIX,
+               .val    = H_PAGE_F_GIX,
+               .set    = "f_gix",
+               .is_val = true,
+               .shift  = H_PAGE_F_GIX_SHIFT,
+       }, {
+               .mask   = H_PAGE_F_SECOND,
+               .val    = H_PAGE_F_SECOND,
+               .set    = "f_second",
+       }, {
+#endif /* CONFIG_PPC_64K_PAGES */
+               .mask   = _PAGE_SPECIAL,
+               .val    = _PAGE_SPECIAL,
+               .set    = "special",
+       }
+};
+
+struct pgtable_level pg_level[5] = {
+       {
+       }, { /* pgd */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       }, { /* pud */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       }, { /* pmd */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       }, { /* pte */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       },
+};
diff --git a/arch/powerpc/mm/dump_linuxpagetables-generic.c b/arch/powerpc/mm/dump_linuxpagetables-generic.c

new file mode 100644 (file)

index 0000000..1e3829e
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables-generic.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <asm/pgtable.h>
+
+#include "dump_linuxpagetables.h"
+
+static const struct flag_info flag_array[] = {
+       {
+               .mask   = _PAGE_USER,
+               .val    = _PAGE_USER,
+               .set    = "user",
+               .clear  = "    ",
+       }, {
+               .mask   = _PAGE_RW,
+               .val    = _PAGE_RW,
+               .set    = "rw",
+               .clear  = "r ",
+       }, {
+#ifndef CONFIG_PPC_BOOK3S_32
+               .mask   = _PAGE_EXEC,
+               .val    = _PAGE_EXEC,
+               .set    = " X ",
+               .clear  = "   ",
+       }, {
+#endif
+               .mask   = _PAGE_PRESENT,
+               .val    = _PAGE_PRESENT,
+               .set    = "present",
+               .clear  = "       ",
+       }, {
+               .mask   = _PAGE_GUARDED,
+               .val    = _PAGE_GUARDED,
+               .set    = "guarded",
+               .clear  = "       ",
+       }, {
+               .mask   = _PAGE_DIRTY,
+               .val    = _PAGE_DIRTY,
+               .set    = "dirty",
+               .clear  = "     ",
+       }, {
+               .mask   = _PAGE_ACCESSED,
+               .val    = _PAGE_ACCESSED,
+               .set    = "accessed",
+               .clear  = "        ",
+       }, {
+               .mask   = _PAGE_WRITETHRU,
+               .val    = _PAGE_WRITETHRU,
+               .set    = "write through",
+               .clear  = "             ",
+       }, {
+               .mask   = _PAGE_NO_CACHE,
+               .val    = _PAGE_NO_CACHE,
+               .set    = "no cache",
+               .clear  = "        ",
+       }, {
+               .mask   = _PAGE_SPECIAL,
+               .val    = _PAGE_SPECIAL,
+               .set    = "special",
+       }
+};
+
+struct pgtable_level pg_level[5] = {
+       {
+       }, { /* pgd */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       }, { /* pud */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       }, { /* pmd */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       }, { /* pte */
+               .flag   = flag_array,
+               .num    = ARRAY_SIZE(flag_array),
+       },
+};
diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c

index 876e2a3..2b74f8a 100644 (file)
--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -27,6 +27,8 @@
  #include <asm/page.h>
  #include <asm/pgalloc.h>
  
+#include "dump_linuxpagetables.h"
+
  #ifdef CONFIG_PPC32
  #define KERN_VIRT_START        0
  #endif
@@ -101,159 +103,6 @@ static struct addr_marker address_markers[] = {
         { -1,   NULL },
  };
  
-struct flag_info {
-       u64             mask;
-       u64             val;
-       const char      *set;
-       const char      *clear;
-       bool            is_val;
-       int             shift;
-};
-
-static const struct flag_info flag_array[] = {
-       {
-               .mask   = _PAGE_USER | _PAGE_PRIVILEGED,
-               .val    = _PAGE_USER,
-               .set    = "user",
-               .clear  = "    ",
-       }, {
-               .mask   = _PAGE_RW | _PAGE_RO | _PAGE_NA,
-               .val    = _PAGE_RW,
-               .set    = "rw",
-       }, {
-               .mask   = _PAGE_RW | _PAGE_RO | _PAGE_NA,
-               .val    = _PAGE_RO,
-               .set    = "ro",
-       }, {
-#if _PAGE_NA != 0
-               .mask   = _PAGE_RW | _PAGE_RO | _PAGE_NA,
-               .val    = _PAGE_RO,
-               .set    = "na",
-       }, {
-#endif
-               .mask   = _PAGE_EXEC,
-               .val    = _PAGE_EXEC,
-               .set    = " X ",
-               .clear  = "   ",
-       }, {
-               .mask   = _PAGE_PTE,
-               .val    = _PAGE_PTE,
-               .set    = "pte",
-               .clear  = "   ",
-       }, {
-               .mask   = _PAGE_PRESENT,
-               .val    = _PAGE_PRESENT,
-               .set    = "present",
-               .clear  = "       ",
-       }, {
-#ifdef CONFIG_PPC_BOOK3S_64
-               .mask   = H_PAGE_HASHPTE,
-               .val    = H_PAGE_HASHPTE,
-#else
-               .mask   = _PAGE_HASHPTE,
-               .val    = _PAGE_HASHPTE,
-#endif
-               .set    = "hpte",
-               .clear  = "    ",
-       }, {
-#ifndef CONFIG_PPC_BOOK3S_64
-               .mask   = _PAGE_GUARDED,
-               .val    = _PAGE_GUARDED,
-               .set    = "guarded",
-               .clear  = "       ",
-       }, {
-#endif
-               .mask   = _PAGE_DIRTY,
-               .val    = _PAGE_DIRTY,
-               .set    = "dirty",
-               .clear  = "     ",
-       }, {
-               .mask   = _PAGE_ACCESSED,
-               .val    = _PAGE_ACCESSED,
-               .set    = "accessed",
-               .clear  = "        ",
-       }, {
-#ifndef CONFIG_PPC_BOOK3S_64
-               .mask   = _PAGE_WRITETHRU,
-               .val    = _PAGE_WRITETHRU,
-               .set    = "write through",
-               .clear  = "             ",
-       }, {
-#endif
-#ifndef CONFIG_PPC_BOOK3S_64
-               .mask   = _PAGE_NO_CACHE,
-               .val    = _PAGE_NO_CACHE,
-               .set    = "no cache",
-               .clear  = "        ",
-       }, {
-#else
-               .mask   = _PAGE_NON_IDEMPOTENT,
-               .val    = _PAGE_NON_IDEMPOTENT,
-               .set    = "non-idempotent",
-               .clear  = "              ",
-       }, {
-               .mask   = _PAGE_TOLERANT,
-               .val    = _PAGE_TOLERANT,
-               .set    = "tolerant",
-               .clear  = "        ",
-       }, {
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
-               .mask   = H_PAGE_BUSY,
-               .val    = H_PAGE_BUSY,
-               .set    = "busy",
-       }, {
-#ifdef CONFIG_PPC_64K_PAGES
-               .mask   = H_PAGE_COMBO,
-               .val    = H_PAGE_COMBO,
-               .set    = "combo",
-       }, {
-               .mask   = H_PAGE_4K_PFN,
-               .val    = H_PAGE_4K_PFN,
-               .set    = "4K_pfn",
-       }, {
-#else /* CONFIG_PPC_64K_PAGES */
-               .mask   = H_PAGE_F_GIX,
-               .val    = H_PAGE_F_GIX,
-               .set    = "f_gix",
-               .is_val = true,
-               .shift  = H_PAGE_F_GIX_SHIFT,
-       }, {
-               .mask   = H_PAGE_F_SECOND,
-               .val    = H_PAGE_F_SECOND,
-               .set    = "f_second",
-       }, {
-#endif /* CONFIG_PPC_64K_PAGES */
-#endif
-               .mask   = _PAGE_SPECIAL,
-               .val    = _PAGE_SPECIAL,
-               .set    = "special",
-       }
-};
-
-struct pgtable_level {
-       const struct flag_info *flag;
-       size_t num;
-       u64 mask;
-};
-
-static struct pgtable_level pg_level[] = {
-       {
-       }, { /* pgd */
-               .flag   = flag_array,
-               .num    = ARRAY_SIZE(flag_array),
-       }, { /* pud */
-               .flag   = flag_array,
-               .num    = ARRAY_SIZE(flag_array),
-       }, { /* pmd */
-               .flag   = flag_array,
-               .num    = ARRAY_SIZE(flag_array),
-       }, { /* pte */
-               .flag   = flag_array,
-               .num    = ARRAY_SIZE(flag_array),
-       },
-};
-
  static void dump_flag_info(struct pg_state *st, const struct flag_info
                 *flag, u64 pte, int num)
  {
@@ -418,12 +267,13 @@ static void walk_pagetables(struct pg_state *st)
         unsigned int i;
         unsigned long addr;
  
+       addr = st->start_address;
+
         /*
          * Traverse the linux pagetable structure and dump pages that are in
          * the hash pagetable.
          */
-       for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
-               addr = KERN_VIRT_START + i * PGDIR_SIZE;
+       for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
                 if (!pgd_none(*pgd) && !pgd_huge(*pgd))
                         /* pgd exists */
                         walk_pud(st, pgd, addr);
@@ -472,9 +322,14 @@ static int ptdump_show(struct seq_file *m, void *v)
  {
         struct pg_state st = {
                 .seq = m,
-               .start_address = KERN_VIRT_START,
                 .marker = address_markers,
         };
+
+       if (radix_enabled())
+               st.start_address = PAGE_OFFSET;
+       else
+               st.start_address = KERN_VIRT_START;
+
         /* Traverse kernel page tables */
         walk_pagetables(&st);
         note_page(&st, 0, 0, 0);
diff --git a/arch/powerpc/mm/dump_linuxpagetables.h b/arch/powerpc/mm/dump_linuxpagetables.h

new file mode 100644 (file)

index 0000000..5d51363
--- /dev/null
+++ b/arch/powerpc/mm/dump_linuxpagetables.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/types.h>
+
+struct flag_info {
+       u64             mask;
+       u64             val;
+       const char      *set;
+       const char      *clear;
+       bool            is_val;
+       int             shift;
+};
+
+struct pgtable_level {
+       const struct flag_info *flag;
+       size_t num;
+       u64 mask;
+};
+
+extern struct pgtable_level pg_level[5];
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c

index 729f02d..aaa28fd 100644 (file)
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -115,6 +115,8 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
         tlbiel_hash_set_isa300(0, is, 0, 2, 1);
  
         asm volatile("ptesync": : :"memory");
+
+       asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
  }
  
  void hash__tlbiel_all(unsigned int action)
@@ -140,8 +142,6 @@ void hash__tlbiel_all(unsigned int action)
                 tlbiel_all_isa206(POWER7_TLB_SETS, is);
         else
                 WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
-
-       asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
  }
  
  static inline unsigned long  ___tlbie(unsigned long vpn, int psize,
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c

index f23a89d..0cc7fbc 100644 (file)
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1001,9 +1001,9 @@ void __init hash__early_init_mmu(void)
          * 4k use hugepd format, so for hash set then to
          * zero
          */
-       __pmd_val_bits = 0;
-       __pud_val_bits = 0;
-       __pgd_val_bits = 0;
+       __pmd_val_bits = HASH_PMD_VAL_BITS;
+       __pud_val_bits = HASH_PUD_VAL_BITS;
+       __pgd_val_bits = HASH_PGD_VAL_BITS;
  
         __kernel_virt_start = H_KERN_VIRT_START;
         __kernel_virt_size = H_KERN_VIRT_SIZE;
@@ -1125,7 +1125,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
         if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
  
                 copy_mm_to_paca(mm);
-               slb_flush_and_rebolt();
+               slb_flush_and_restore_bolted();
         }
  }
  #endif /* CONFIG_PPC_64K_PAGES */
@@ -1197,7 +1197,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
         if (user_region) {
                 if (psize != get_paca_psize(ea)) {
                         copy_mm_to_paca(mm);
-                       slb_flush_and_rebolt();
+                       slb_flush_and_restore_bolted();
                 }
         } else if (get_paca()->vmalloc_sllp !=
                    mmu_psize_defs[mmu_vmalloc_psize].sllp) {
@@ -1482,7 +1482,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
  #endif
  
  void hash_preload(struct mm_struct *mm, unsigned long ea,
-                 unsigned long access, unsigned long trap)
+                 bool is_exec, unsigned long trap)
  {
         int hugepage_shift;
         unsigned long vsid;
@@ -1490,6 +1490,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
         pte_t *ptep;
         unsigned long flags;
         int rc, ssize, update_flags = 0;
+       unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
  
         BUG_ON(REGION_ID(ea) != USER_REGION_ID);
  
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c

index 01f213d..dfbc3b3 100644 (file)
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -51,6 +51,12 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
                         new_pmd |= _PAGE_DIRTY;
         } while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd)));
  
+       /*
+        * Make sure this is thp or devmap entry
+        */
+       if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)))
+               return 0;
+
         rflags = htab_convert_pte_flags(new_pmd);
  
  #if 0
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c

index b320f50..2e6a8f9 100644 (file)
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -62,6 +62,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
                         new_pte |= _PAGE_DIRTY;
         } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
  
+       /* Make sure this is a hugetlb entry */
+       if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))
+               return 0;
+
         rflags = htab_convert_pte_flags(new_pte);
         if (unlikely(mmu_psize == MMU_PAGE_16G))
                 offset = PTRS_PER_PUD;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c

index e87f9ef..a7226ed 100644 (file)
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -19,6 +19,7 @@
  #include <linux/moduleparam.h>
  #include <linux/swap.h>
  #include <linux/swapops.h>
+#include <linux/kmemleak.h>
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
  #include <asm/tlb.h>
@@ -95,7 +96,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
                         break;
                 else {
  #ifdef CONFIG_PPC_BOOK3S_64
-                       *hpdp = __hugepd(__pa(new) |
+                       *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS |
                                          (shift_to_mmu_psize(pshift) << 2));
  #elif defined(CONFIG_PPC_8xx)
                         *hpdp = __hugepd(__pa(new) | _PMD_USER |
@@ -112,6 +113,8 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
                 for (i = i - 1 ; i >= 0; i--, hpdp--)
                         *hpdp = __hugepd(0);
                 kmem_cache_free(cachep, new);
+       } else {
+               kmemleak_ignore(new);
         }
         spin_unlock(ptl);
         return 0;
@@ -837,8 +840,12 @@ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
                                 ret_pte = (pte_t *) pmdp;
                                 goto out;
                         }
-
-                       if (pmd_huge(pmd)) {
+                       /*
+                        * pmd_large check below will handle the swap pmd pte
+                        * we need to do both the check because they are config
+                        * dependent.
+                        */
+                       if (pmd_huge(pmd) || pmd_large(pmd)) {
                                 ret_pte = (pte_t *) pmdp;
                                 goto out;
                         } else if (is_hugepd(__hugepd(pmd_val(pmd))))
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c

index 04ccb27..dd949d6 100644 (file)
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -309,11 +309,11 @@ void __init paging_init(void)
         unsigned long end = __fix_to_virt(FIX_HOLE);
  
         for (; v < end; v += PAGE_SIZE)
-               map_kernel_page(v, 0, 0); /* XXX gross */
+               map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
  #endif
  
  #ifdef CONFIG_HIGHMEM
-       map_kernel_page(PKMAP_BASE, 0, 0);      /* XXX gross */
+       map_kernel_page(PKMAP_BASE, 0, __pgprot(0));    /* XXX gross */
         pkmap_page_table = virt_to_kpte(PKMAP_BASE);
  
         kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
@@ -509,7 +509,8 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
          * We don't need to worry about _PAGE_PRESENT here because we are
          * called with either mm->page_table_lock held or ptl lock held
          */
-       unsigned long access, trap;
+       unsigned long trap;
+       bool is_exec;
  
         if (radix_enabled()) {
                 prefetch((void *)address);
@@ -531,16 +532,16 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
         trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
         switch (trap) {
         case 0x300:
-               access = 0UL;
+               is_exec = false;
                 break;
         case 0x400:
-               access = _PAGE_EXEC;
+               is_exec = true;
                 break;
         default:
                 return;
         }
  
-       hash_preload(vma->vm_mm, address, access, trap);
+       hash_preload(vma->vm_mm, address, is_exec, trap);
  #endif /* CONFIG_PPC_STD_MMU */
  #if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
         && defined(CONFIG_HUGETLB_PAGE)
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c

index dbd8f76..510f103 100644 (file)
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -53,6 +53,8 @@ int hash__alloc_context_id(void)
  }
  EXPORT_SYMBOL_GPL(hash__alloc_context_id);
  
+void slb_setup_new_exec(void);
+
  static int hash__init_new_context(struct mm_struct *mm)
  {
         int index;
@@ -84,6 +86,13 @@ static int hash__init_new_context(struct mm_struct *mm)
         return index;
  }
  
+void hash__setup_new_exec(void)
+{
+       slice_setup_new_exec();
+
+       slb_setup_new_exec();
+}
+
  static int radix__init_new_context(struct mm_struct *mm)
  {
         unsigned long rts_field;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h

index e5d779e..8574fbb 100644 (file)
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -22,6 +22,7 @@
  #include <asm/mmu.h>
  
  #ifdef CONFIG_PPC_MMU_NOHASH
+#include <asm/trace.h>
  
  /*
   * On 40x and 8xx, we directly inline tlbia and tlbivax
@@ -30,10 +31,12 @@
  static inline void _tlbil_all(void)
  {
         asm volatile ("sync; tlbia; isync" : : : "memory");
+       trace_tlbia(MMU_NO_CONTEXT);
  }
  static inline void _tlbil_pid(unsigned int pid)
  {
         asm volatile ("sync; tlbia; isync" : : : "memory");
+       trace_tlbia(pid);
  }
  #define _tlbil_pid_noind(pid)  _tlbil_pid(pid)
  
@@ -55,6 +58,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
                              unsigned int tsize, unsigned int ind)
  {
         asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
+       trace_tlbie(0, 0, address, pid, 0, 0, 0);
  }
  #elif defined(CONFIG_PPC_BOOK3E)
  extern void _tlbil_va(unsigned long address, unsigned int pid,
@@ -82,7 +86,7 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
  #else /* CONFIG_PPC_MMU_NOHASH */
  
  extern void hash_preload(struct mm_struct *mm, unsigned long ea,
-                        unsigned long access, unsigned long trap);
+                        bool is_exec, unsigned long trap);
  
  
  extern void _tlbie(unsigned long address);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c

index 055b211..693ae1c 100644 (file)
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1521,6 +1521,10 @@ int start_topology_update(void)
                 }
         }
  
+       pr_info("Starting topology update%s%s\n",
+               (prrn_enabled ? " prrn_enabled" : ""),
+               (vphn_enabled ? " vphn_enabled" : ""));
+
         return rc;
  }
  
@@ -1542,6 +1546,8 @@ int stop_topology_update(void)
                 rc = del_timer_sync(&topology_timer);
         }
  
+       pr_info("Stopping topology update\n");
+
         return rc;
  }
  
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c

index a229893..e0ccf36 100644 (file)
--- a/arch/powerpc/mm/pgtable-book3e.c
+++ b/arch/powerpc/mm/pgtable-book3e.c
@@ -42,7 +42,7 @@ int __meminit vmemmap_create_mapping(unsigned long start,
          * thus must have the low bits clear
          */
         for (i = 0; i < page_size; i += PAGE_SIZE)
-               BUG_ON(map_kernel_page(start + i, phys, flags));
+               BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags)));
  
         return 0;
  }
@@ -70,7 +70,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
   * map_kernel_page adds an entry to the ioremap page table
   * and adds an entry to the HPT, possibly bolting it
   */
-int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
+int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
  {
         pgd_t *pgdp;
         pud_t *pudp;
@@ -89,8 +89,6 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
                 ptep = pte_alloc_kernel(pmdp, ea);
                 if (!ptep)
                         return -ENOMEM;
-               set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-                                                         __pgprot(flags)));
         } else {
                 pgdp = pgd_offset_k(ea);
  #ifndef __PAGETABLE_PUD_FOLDED
@@ -113,9 +111,8 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
                         pmd_populate_kernel(&init_mm, pmdp, ptep);
                 }
                 ptep = pte_offset_kernel(pmdp, ea);
-               set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-                                                         __pgprot(flags)));
         }
+       set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
  
         smp_wmb();
         return 0;
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c

index 01d7c0f..9f93c9f 100644 (file)
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -69,9 +69,14 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                 pmd_t *pmdp, pmd_t pmd)
  {
  #ifdef CONFIG_DEBUG_VM
-       WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+       /*
+        * Make sure hardware valid bit is not set. We don't do
+        * tlb flush for this update.
+        */
+
+       WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
         assert_spin_locked(pmd_lockptr(mm, pmdp));
-       WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
+       WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd)));
  #endif
         trace_hugepage_set_pmd(addr, pmd_val(pmd));
         return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
@@ -106,7 +111,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
  {
         unsigned long old_pmd;
  
-       old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+       old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
         flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
         /*
          * This ensures that generic code that rely on IRQ disabling
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c

index 692bfc9..c08d490 100644 (file)
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -142,7 +142,7 @@ void hash__vmemmap_remove_mapping(unsigned long start,
   * map_kernel_page adds an entry to the ioremap page table
   * and adds an entry to the HPT, possibly bolting it
   */
-int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
+int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
  {
         pgd_t *pgdp;
         pud_t *pudp;
@@ -161,8 +161,7 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flag
                 ptep = pte_alloc_kernel(pmdp, ea);
                 if (!ptep)
                         return -ENOMEM;
-               set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
-                                                         __pgprot(flags)));
+               set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
         } else {
                 /*
                  * If the mm subsystem is not fully up, we cannot create a
@@ -170,7 +169,7 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flag
                  * entry in the hardware page table.
                  *
                  */
-               if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+               if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot),
                                       mmu_io_psize, mmu_kernel_ssize)) {
                         printk(KERN_ERR "Failed to do bolted mapping IO "
                                "memory at %016lx !\n", pa);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c

index c879979..9311560 100644 (file)
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -241,9 +241,8 @@ void radix__mark_initmem_nx(void)
  }
  #endif /* CONFIG_STRICT_KERNEL_RWX */
  
-static inline void __meminit print_mapping(unsigned long start,
-                                          unsigned long end,
-                                          unsigned long size)
+static inline void __meminit
+print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
  {
         char buf[10];
  
@@ -252,7 +251,17 @@ static inline void __meminit print_mapping(unsigned long start,
  
         string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
  
-       pr_info("Mapped 0x%016lx-0x%016lx with %s pages\n", start, end, buf);
+       pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
+               exec ? " (exec)" : "");
+}
+
+static unsigned long next_boundary(unsigned long addr, unsigned long end)
+{
+#ifdef CONFIG_STRICT_KERNEL_RWX
+       if (addr < __pa_symbol(__init_begin))
+               return __pa_symbol(__init_begin);
+#endif
+       return end;
  }
  
  static int __meminit create_physical_mapping(unsigned long start,
@@ -260,13 +269,8 @@ static int __meminit create_physical_mapping(unsigned long start,
                                              int nid)
  {
         unsigned long vaddr, addr, mapping_size = 0;
+       bool prev_exec, exec = false;
         pgprot_t prot;
-       unsigned long max_mapping_size;
-#ifdef CONFIG_STRICT_KERNEL_RWX
-       int split_text_mapping = 1;
-#else
-       int split_text_mapping = 0;
-#endif
         int psize;
  
         start = _ALIGN_UP(start, PAGE_SIZE);
@@ -274,14 +278,12 @@ static int __meminit create_physical_mapping(unsigned long start,
                 unsigned long gap, previous_size;
                 int rc;
  
-               gap = end - addr;
+               gap = next_boundary(addr, end) - addr;
                 previous_size = mapping_size;
-               max_mapping_size = PUD_SIZE;
+               prev_exec = exec;
  
-retry:
                 if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
-                   mmu_psize_defs[MMU_PAGE_1G].shift &&
-                   PUD_SIZE <= max_mapping_size) {
+                   mmu_psize_defs[MMU_PAGE_1G].shift) {
                         mapping_size = PUD_SIZE;
                         psize = MMU_PAGE_1G;
                 } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
@@ -293,32 +295,21 @@ retry:
                         psize = mmu_virtual_psize;
                 }
  
-               if (split_text_mapping && (mapping_size == PUD_SIZE) &&
-                       (addr <= __pa_symbol(__init_begin)) &&
-                       (addr + mapping_size) >= __pa_symbol(_stext)) {
-                       max_mapping_size = PMD_SIZE;
-                       goto retry;
-               }
-
-               if (split_text_mapping && (mapping_size == PMD_SIZE) &&
-                   (addr <= __pa_symbol(__init_begin)) &&
-                   (addr + mapping_size) >= __pa_symbol(_stext)) {
-                       mapping_size = PAGE_SIZE;
-                       psize = mmu_virtual_psize;
-               }
-
-               if (mapping_size != previous_size) {
-                       print_mapping(start, addr, previous_size);
-                       start = addr;
-               }
-
                 vaddr = (unsigned long)__va(addr);
  
                 if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
-                   overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size))
+                   overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
                         prot = PAGE_KERNEL_X;
-               else
+                       exec = true;
+               } else {
                         prot = PAGE_KERNEL;
+                       exec = false;
+               }
+
+               if (mapping_size != previous_size || exec != prev_exec) {
+                       print_mapping(start, addr, previous_size, prev_exec);
+                       start = addr;
+               }
  
                 rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
                 if (rc)
@@ -327,7 +318,7 @@ retry:
                 update_page_count(psize, 1);
         }
  
-       print_mapping(start, addr, mapping_size);
+       print_mapping(start, addr, mapping_size, exec);
         return 0;
  }
  
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c

index d71c777..010e1c6 100644 (file)
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -44,20 +44,13 @@ static inline int is_exec_fault(void)
  static inline int pte_looks_normal(pte_t pte)
  {
  
-#if defined(CONFIG_PPC_BOOK3S_64)
-       if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
+       if (pte_present(pte) && !pte_special(pte)) {
                 if (pte_ci(pte))
                         return 0;
                 if (pte_user(pte))
                         return 1;
         }
         return 0;
-#else
-       return (pte_val(pte) &
-               (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
-                _PAGE_PRIVILEGED)) ==
-               (_PAGE_PRESENT | _PAGE_USER);
-#endif
  }
  
  static struct page *maybe_pte_to_page(pte_t pte)
@@ -73,7 +66,7 @@ static struct page *maybe_pte_to_page(pte_t pte)
         return page;
  }
  
-#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0
+#ifdef CONFIG_PPC_BOOK3S
  
  /* Server-style MMU handles coherency when hashing if HW exec permission
   * is supposed per page (currently 64-bit only). If not, then, we always
@@ -106,7 +99,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
         return pte;
  }
  
-#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */
+#else /* CONFIG_PPC_BOOK3S */
  
  /* Embedded type MMU with HW exec support. This is a bit more complicated
   * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
@@ -117,7 +110,7 @@ static pte_t set_pte_filter(pte_t pte)
         struct page *pg;
  
         /* No exec permission in the first place, move on */
-       if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte))
+       if (!pte_exec(pte) || !pte_looks_normal(pte))
                 return pte;
  
         /* If you set _PAGE_EXEC on weird pages you're on your own */
@@ -137,7 +130,7 @@ static pte_t set_pte_filter(pte_t pte)
         }
  
         /* Else, we filter out _PAGE_EXEC */
-       return __pte(pte_val(pte) & ~_PAGE_EXEC);
+       return pte_exprotect(pte);
  }
  
  static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
@@ -150,7 +143,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
          * if necessary. Also if _PAGE_EXEC is already set, same deal,
          * we just bail out
          */
-       if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault())
+       if (dirty || pte_exec(pte) || !is_exec_fault())
                 return pte;
  
  #ifdef CONFIG_DEBUG_VM
@@ -176,10 +169,10 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
         set_bit(PG_arch_1, &pg->flags);
  
   bail:
-       return __pte(pte_val(pte) | _PAGE_EXEC);
+       return pte_mkexec(pte);
  }
  
-#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */
+#endif /* CONFIG_PPC_BOOK3S */
  
  /*
   * set_pte stores a linux PTE into the linux page table.
@@ -188,14 +181,13 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
                 pte_t pte)
  {
         /*
-        * When handling numa faults, we already have the pte marked
-        * _PAGE_PRESENT, but we can be sure that it is not in hpte.
-        * Hence we can use set_pte_at for them.
+        * Make sure hardware valid bit is not set. We don't do
+        * tlb flush for this update.
          */
-       VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+       VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
  
         /* Add the pte bit when trying to set a pte */
-       pte = __pte(pte_val(pte) | _PAGE_PTE);
+       pte = pte_mkpte(pte);
  
         /* Note: mm->context.id might not yet have been assigned as
          * this context might not have been activated yet when this
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c

index 120a49b..5877f5a 100644 (file)
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -76,56 +76,69 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
  void __iomem *
  ioremap(phys_addr_t addr, unsigned long size)
  {
-       return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED,
-                               __builtin_return_address(0));
+       pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
+
+       return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
  }
  EXPORT_SYMBOL(ioremap);
  
  void __iomem *
  ioremap_wc(phys_addr_t addr, unsigned long size)
  {
-       return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
-                               __builtin_return_address(0));
+       pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
+
+       return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
  }
  EXPORT_SYMBOL(ioremap_wc);
  
+void __iomem *
+ioremap_wt(phys_addr_t addr, unsigned long size)
+{
+       pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
+
+       return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
+void __iomem *
+ioremap_coherent(phys_addr_t addr, unsigned long size)
+{
+       pgprot_t prot = pgprot_cached(PAGE_KERNEL);
+
+       return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_coherent);
+
  void __iomem *
  ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
  {
+       pte_t pte = __pte(flags);
+
         /* writeable implies dirty for kernel addresses */
-       if ((flags & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO)
-               flags |= _PAGE_DIRTY | _PAGE_HWWRITE;
+       if (pte_write(pte))
+               pte = pte_mkdirty(pte);
  
         /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
-       flags &= ~(_PAGE_USER | _PAGE_EXEC);
-       flags |= _PAGE_PRIVILEGED;
+       pte = pte_exprotect(pte);
+       pte = pte_mkprivileged(pte);
  
-       return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
+       return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0));
  }
  EXPORT_SYMBOL(ioremap_prot);
  
  void __iomem *
  __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
  {
-       return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
+       return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
  }
  
  void __iomem *
-__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
-                void *caller)
+__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
  {
         unsigned long v, i;
         phys_addr_t p;
         int err;
  
-       /* Make sure we have the base flags */
-       if ((flags & _PAGE_PRESENT) == 0)
-               flags |= pgprot_val(PAGE_KERNEL);
-
-       /* Non-cacheable page cannot be coherent */
-       if (flags & _PAGE_NO_CACHE)
-               flags &= ~_PAGE_COHERENT;
-
         /*
          * Choose an address to map it to.
          * Once the vmalloc system is running, we use it.
@@ -183,7 +196,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
  
         err = 0;
         for (i = 0; i < size && err == 0; i += PAGE_SIZE)
-               err = map_kernel_page(v+i, p+i, flags);
+               err = map_kernel_page(v + i, p + i, prot);
         if (err) {
                 if (slab_is_available())
                         vunmap((void *)v);
@@ -209,7 +222,7 @@ void iounmap(volatile void __iomem *addr)
  }
  EXPORT_SYMBOL(iounmap);
  
-int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
  {
         pmd_t *pd;
         pte_t *pg;
@@ -224,10 +237,8 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
                 /* The PTE should never be already set nor present in the
                  * hash table
                  */
-               BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) &&
-                      flags);
-               set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
-                                                    __pgprot(flags)));
+               BUG_ON((pte_present(*pg) | pte_hashpte(*pg)) && pgprot_val(prot));
+               set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, prot));
         }
         smp_wmb();
         return err;
@@ -238,7 +249,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
   */
  static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
  {
-       unsigned long v, s, f;
+       unsigned long v, s;
         phys_addr_t p;
         int ktext;
  
@@ -248,11 +259,10 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
         for (; s < top; s += PAGE_SIZE) {
                 ktext = ((char *)v >= _stext && (char *)v < etext) ||
                         ((char *)v >= _sinittext && (char *)v < _einittext);
-               f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL);
-               map_kernel_page(v, p, f);
+               map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
  #ifdef CONFIG_PPC_STD_MMU_32
                 if (ktext)
-                       hash_preload(&init_mm, v, 0, 0x300);
+                       hash_preload(&init_mm, v, false, 0x300);
  #endif
                 v += PAGE_SIZE;
                 p += PAGE_SIZE;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c

index 53e9eee..fb1375c 100644 (file)
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -113,17 +113,12 @@ unsigned long ioremap_bot = IOREMAP_BASE;
   * __ioremap_at - Low level function to establish the page tables
   *                for an IO mapping
   */
-void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
-                           unsigned long flags)
+void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
  {
         unsigned long i;
  
-       /* Make sure we have the base flags */
-       if ((flags & _PAGE_PRESENT) == 0)
-               flags |= pgprot_val(PAGE_KERNEL);
-
         /* We don't support the 4K PFN hack with ioremap */
-       if (flags & H_PAGE_4K_PFN)
+       if (pgprot_val(prot) & H_PAGE_4K_PFN)
                 return NULL;
  
         WARN_ON(pa & ~PAGE_MASK);
@@ -131,7 +126,7 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
         WARN_ON(size & ~PAGE_MASK);
  
         for (i = 0; i < size; i += PAGE_SIZE)
-               if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
+               if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
                         return NULL;
  
         return (void __iomem *)ea;
@@ -152,7 +147,7 @@ void __iounmap_at(void *ea, unsigned long size)
  }
  
  void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
-                               unsigned long flags, void *caller)
+                               pgprot_t prot, void *caller)
  {
         phys_addr_t paligned;
         void __iomem *ret;
@@ -182,11 +177,11 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
                         return NULL;
  
                 area->phys_addr = paligned;
-               ret = __ioremap_at(paligned, area->addr, size, flags);
+               ret = __ioremap_at(paligned, area->addr, size, prot);
                 if (!ret)
                         vunmap(area->addr);
         } else {
-               ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
+               ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot);
                 if (ret)
                         ioremap_bot += size;
         }
@@ -199,49 +194,59 @@ void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
  void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
                          unsigned long flags)
  {
-       return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
+       return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
  }
  
  void __iomem * ioremap(phys_addr_t addr, unsigned long size)
  {
-       unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
+       pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
         void *caller = __builtin_return_address(0);
  
         if (ppc_md.ioremap)
-               return ppc_md.ioremap(addr, size, flags, caller);
-       return __ioremap_caller(addr, size, flags, caller);
+               return ppc_md.ioremap(addr, size, prot, caller);
+       return __ioremap_caller(addr, size, prot, caller);
  }
  
  void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
  {
-       unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
+       pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
+       void *caller = __builtin_return_address(0);
+
+       if (ppc_md.ioremap)
+               return ppc_md.ioremap(addr, size, prot, caller);
+       return __ioremap_caller(addr, size, prot, caller);
+}
+
+void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
+{
+       pgprot_t prot = pgprot_cached(PAGE_KERNEL);
         void *caller = __builtin_return_address(0);
  
         if (ppc_md.ioremap)
-               return ppc_md.ioremap(addr, size, flags, caller);
-       return __ioremap_caller(addr, size, flags, caller);
+               return ppc_md.ioremap(addr, size, prot, caller);
+       return __ioremap_caller(addr, size, prot, caller);
  }
  
  void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
                              unsigned long flags)
  {
+       pte_t pte = __pte(flags);
         void *caller = __builtin_return_address(0);
  
         /* writeable implies dirty for kernel addresses */
-       if (flags & _PAGE_WRITE)
-               flags |= _PAGE_DIRTY;
+       if (pte_write(pte))
+               pte = pte_mkdirty(pte);
  
         /* we don't want to let _PAGE_EXEC leak out */
-       flags &= ~_PAGE_EXEC;
+       pte = pte_exprotect(pte);
         /*
          * Force kernel mapping.
          */
-       flags &= ~_PAGE_USER;
-       flags |= _PAGE_PRIVILEGED;
+       pte = pte_mkprivileged(pte);
  
         if (ppc_md.ioremap)
-               return ppc_md.ioremap(addr, size, flags, caller);
-       return __ioremap_caller(addr, size, flags, caller);
+               return ppc_md.ioremap(addr, size, pte_pgprot(pte), caller);
+       return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
  }
  
  
@@ -306,7 +311,7 @@ struct page *pud_page(pud_t pud)
   */
  struct page *pmd_page(pmd_t pmd)
  {
-       if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
+       if (pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
                 return pte_page(pmd_pte(pmd));
         return virt_to_page(pmd_page_vaddr(pmd));
  }
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c

index bea6c54..38a793b 100644 (file)
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -163,7 +163,7 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
   * Preload a translation in the hash table
   */
  void hash_preload(struct mm_struct *mm, unsigned long ea,
-                 unsigned long access, unsigned long trap)
+                 bool is_exec, unsigned long trap)
  {
         pmd_t *pmd;
  
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c

index 9f574e5..c3fdf29 100644 (file)
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -14,6 +14,7 @@
   *      2 of the License, or (at your option) any later version.
   */
  
+#include <asm/asm-prototypes.h>
  #include <asm/pgtable.h>
  #include <asm/mmu.h>
  #include <asm/mmu_context.h>
@@ -30,11 +31,10 @@
  
  enum slb_index {
         LINEAR_INDEX    = 0, /* Kernel linear map  (0xc000000000000000) */
-       VMALLOC_INDEX   = 1, /* Kernel virtual map (0xd000000000000000) */
-       KSTACK_INDEX    = 2, /* Kernel stack map */
+       KSTACK_INDEX    = 1, /* Kernel stack map */
  };
  
-extern void slb_allocate(unsigned long ea);
+static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
  
  #define slb_esid_mask(ssize)   \
         (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
@@ -45,13 +45,43 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
         return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
  }
  
-static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
+static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
                                          unsigned long flags)
  {
-       return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
+       return (vsid << slb_vsid_shift(ssize)) | flags |
                 ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
  }
  
+static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
+                                        unsigned long flags)
+{
+       return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
+}
+
+static void assert_slb_exists(unsigned long ea)
+{
+#ifdef CONFIG_DEBUG_VM
+       unsigned long tmp;
+
+       WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+       asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
+       WARN_ON(tmp == 0);
+#endif
+}
+
+static void assert_slb_notexists(unsigned long ea)
+{
+#ifdef CONFIG_DEBUG_VM
+       unsigned long tmp;
+
+       WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+       asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
+       WARN_ON(tmp != 0);
+#endif
+}
+
  static inline void slb_shadow_update(unsigned long ea, int ssize,
                                      unsigned long flags,
                                      enum slb_index index)
@@ -84,6 +114,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
          */
         slb_shadow_update(ea, ssize, flags, index);
  
+       assert_slb_notexists(ea);
         asm volatile("slbmte  %0,%1" :
                      : "r" (mk_vsid_data(ea, ssize, flags)),
                        "r" (mk_esid_data(ea, ssize, index))
@@ -105,17 +136,20 @@ void __slb_restore_bolted_realmode(void)
                      : "r" (be64_to_cpu(p->save_area[index].vsid)),
                        "r" (be64_to_cpu(p->save_area[index].esid)));
         }
+
+       assert_slb_exists(local_paca->kstack);
  }
  
  /*
   * Insert the bolted entries into an empty SLB.
- * This is not the same as rebolt because the bolted segments are not
- * changed, just loaded from the shadow area.
   */
  void slb_restore_bolted_realmode(void)
  {
         __slb_restore_bolted_realmode();
         get_paca()->slb_cache_ptr = 0;
+
+       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
  }
  
  /*
@@ -123,113 +157,262 @@ void slb_restore_bolted_realmode(void)
   */
  void slb_flush_all_realmode(void)
  {
-       /*
-        * This flushes all SLB entries including 0, so it must be realmode.
-        */
         asm volatile("slbmte %0,%0; slbia" : : "r" (0));
  }
  
-static void __slb_flush_and_rebolt(void)
+/*
+ * This flushes non-bolted entries, it can be run in virtual mode. Must
+ * be called with interrupts disabled.
+ */
+void slb_flush_and_restore_bolted(void)
  {
-       /* If you change this make sure you change SLB_NUM_BOLTED
-        * and PR KVM appropriately too. */
-       unsigned long linear_llp, vmalloc_llp, lflags, vflags;
-       unsigned long ksp_esid_data, ksp_vsid_data;
+       struct slb_shadow *p = get_slb_shadow();
  
-       linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
-       vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
-       lflags = SLB_VSID_KERNEL | linear_llp;
-       vflags = SLB_VSID_KERNEL | vmalloc_llp;
+       BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
  
-       ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, KSTACK_INDEX);
-       if ((ksp_esid_data & ~0xfffffffUL) <= PAGE_OFFSET) {
-               ksp_esid_data &= ~SLB_ESID_V;
-               ksp_vsid_data = 0;
-               slb_shadow_clear(KSTACK_INDEX);
-       } else {
-               /* Update stack entry; others don't change */
-               slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, KSTACK_INDEX);
-               ksp_vsid_data =
-                       be64_to_cpu(get_slb_shadow()->save_area[KSTACK_INDEX].vsid);
-       }
+       WARN_ON(!irqs_disabled());
+
+       /*
+        * We can't take a PMU exception in the following code, so hard
+        * disable interrupts.
+        */
+       hard_irq_disable();
  
-       /* We need to do this all in asm, so we're sure we don't touch
-        * the stack between the slbia and rebolting it. */
         asm volatile("isync\n"
                      "slbia\n"
-                    /* Slot 1 - first VMALLOC segment */
-                    "slbmte    %0,%1\n"
-                    /* Slot 2 - kernel stack */
-                    "slbmte    %2,%3\n"
-                    "isync"
-                    :: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, vflags)),
-                       "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, VMALLOC_INDEX)),
-                       "r"(ksp_vsid_data),
-                       "r"(ksp_esid_data)
+                    "slbmte  %0, %1\n"
+                    "isync\n"
+                    :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)),
+                       "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid))
                      : "memory");
+       assert_slb_exists(get_paca()->kstack);
+
+       get_paca()->slb_cache_ptr = 0;
+
+       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
  }
  
-void slb_flush_and_rebolt(void)
+void slb_save_contents(struct slb_entry *slb_ptr)
  {
+       int i;
+       unsigned long e, v;
  
-       WARN_ON(!irqs_disabled());
+       /* Save slb_cache_ptr value. */
+       get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
+
+       if (!slb_ptr)
+               return;
+
+       for (i = 0; i < mmu_slb_size; i++) {
+               asm volatile("slbmfee  %0,%1" : "=r" (e) : "r" (i));
+               asm volatile("slbmfev  %0,%1" : "=r" (v) : "r" (i));
+               slb_ptr->esid = e;
+               slb_ptr->vsid = v;
+               slb_ptr++;
+       }
+}
+
+void slb_dump_contents(struct slb_entry *slb_ptr)
+{
+       int i, n;
+       unsigned long e, v;
+       unsigned long llp;
+
+       if (!slb_ptr)
+               return;
+
+       pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
+       pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr);
+
+       for (i = 0; i < mmu_slb_size; i++) {
+               e = slb_ptr->esid;
+               v = slb_ptr->vsid;
+               slb_ptr++;
+
+               if (!e && !v)
+                       continue;
+
+               pr_err("%02d %016lx %016lx\n", i, e, v);
+
+               if (!(e & SLB_ESID_V)) {
+                       pr_err("\n");
+                       continue;
+               }
+               llp = v & SLB_VSID_LLP;
+               if (v & SLB_VSID_B_1T) {
+                       pr_err("  1T  ESID=%9lx  VSID=%13lx LLP:%3lx\n",
+                              GET_ESID_1T(e),
+                              (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
+               } else {
+                       pr_err(" 256M ESID=%9lx  VSID=%13lx LLP:%3lx\n",
+                              GET_ESID(e),
+                              (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
+               }
+       }
+       pr_err("----------------------------------\n");
+
+       /* Dump slb cache entires as well. */
+       pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
+       pr_err("Valid SLB cache entries:\n");
+       n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
+       for (i = 0; i < n; i++)
+               pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
+       pr_err("Rest of SLB cache entries:\n");
+       for (i = n; i < SLB_CACHE_ENTRIES; i++)
+               pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
+}
  
+void slb_vmalloc_update(void)
+{
         /*
-        * We can't take a PMU exception in the following code, so hard
-        * disable interrupts.
+        * vmalloc is not bolted, so just have to flush non-bolted.
          */
-       hard_irq_disable();
+       slb_flush_and_restore_bolted();
+}
  
-       __slb_flush_and_rebolt();
-       get_paca()->slb_cache_ptr = 0;
+static bool preload_hit(struct thread_info *ti, unsigned long esid)
+{
+       unsigned char i;
+
+       for (i = 0; i < ti->slb_preload_nr; i++) {
+               unsigned char idx;
+
+               idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
+               if (esid == ti->slb_preload_esid[idx])
+                       return true;
+       }
+       return false;
  }
  
-void slb_vmalloc_update(void)
+static bool preload_add(struct thread_info *ti, unsigned long ea)
  {
-       unsigned long vflags;
+       unsigned char idx;
+       unsigned long esid;
+
+       if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
+               /* EAs are stored >> 28 so 256MB segments don't need clearing */
+               if (ea & ESID_MASK_1T)
+                       ea &= ESID_MASK_1T;
+       }
+
+       esid = ea >> SID_SHIFT;
  
-       vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
-       slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX);
-       slb_flush_and_rebolt();
+       if (preload_hit(ti, esid))
+               return false;
+
+       idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
+       ti->slb_preload_esid[idx] = esid;
+       if (ti->slb_preload_nr == SLB_PRELOAD_NR)
+               ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
+       else
+               ti->slb_preload_nr++;
+
+       return true;
  }
  
-/* Helper function to compare esids.  There are four cases to handle.
- * 1. The system is not 1T segment size capable.  Use the GET_ESID compare.
- * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare.
- * 3. The system is 1T capable, only one of the two addresses is > 1T.  This is not a match.
- * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare.
- */
-static inline int esids_match(unsigned long addr1, unsigned long addr2)
+static void preload_age(struct thread_info *ti)
  {
-       int esid_1t_count;
+       if (!ti->slb_preload_nr)
+               return;
+       ti->slb_preload_nr--;
+       ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
+}
  
-       /* System is not 1T segment size capable. */
-       if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
-               return (GET_ESID(addr1) == GET_ESID(addr2));
+void slb_setup_new_exec(void)
+{
+       struct thread_info *ti = current_thread_info();
+       struct mm_struct *mm = current->mm;
+       unsigned long exec = 0x10000000;
  
-       esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
-                               ((addr2 >> SID_SHIFT_1T) != 0));
+       WARN_ON(irqs_disabled());
  
-       /* both addresses are < 1T */
-       if (esid_1t_count == 0)
-               return (GET_ESID(addr1) == GET_ESID(addr2));
+       /*
+        * preload cache can only be used to determine whether a SLB
+        * entry exists if it does not start to overflow.
+        */
+       if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
+               return;
  
-       /* One address < 1T, the other > 1T.  Not a match */
-       if (esid_1t_count == 1)
-               return 0;
+       hard_irq_disable();
  
-       /* Both addresses are > 1T. */
-       return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
+       /*
+        * We have no good place to clear the slb preload cache on exec,
+        * flush_thread is about the earliest arch hook but that happens
+        * after we switch to the mm and have aleady preloaded the SLBEs.
+        *
+        * For the most part that's probably okay to use entries from the
+        * previous exec, they will age out if unused. It may turn out to
+        * be an advantage to clear the cache before switching to it,
+        * however.
+        */
+
+       /*
+        * preload some userspace segments into the SLB.
+        * Almost all 32 and 64bit PowerPC executables are linked at
+        * 0x10000000 so it makes sense to preload this segment.
+        */
+       if (!is_kernel_addr(exec)) {
+               if (preload_add(ti, exec))
+                       slb_allocate_user(mm, exec);
+       }
+
+       /* Libraries and mmaps. */
+       if (!is_kernel_addr(mm->mmap_base)) {
+               if (preload_add(ti, mm->mmap_base))
+                       slb_allocate_user(mm, mm->mmap_base);
+       }
+
+       /* see switch_slb */
+       asm volatile("isync" : : : "memory");
+
+       local_irq_enable();
  }
  
+void preload_new_slb_context(unsigned long start, unsigned long sp)
+{
+       struct thread_info *ti = current_thread_info();
+       struct mm_struct *mm = current->mm;
+       unsigned long heap = mm->start_brk;
+
+       WARN_ON(irqs_disabled());
+
+       /* see above */
+       if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
+               return;
+
+       hard_irq_disable();
+
+       /* Userspace entry address. */
+       if (!is_kernel_addr(start)) {
+               if (preload_add(ti, start))
+                       slb_allocate_user(mm, start);
+       }
+
+       /* Top of stack, grows down. */
+       if (!is_kernel_addr(sp)) {
+               if (preload_add(ti, sp))
+                       slb_allocate_user(mm, sp);
+       }
+
+       /* Bottom of heap, grows up. */
+       if (heap && !is_kernel_addr(heap)) {
+               if (preload_add(ti, heap))
+                       slb_allocate_user(mm, heap);
+       }
+
+       /* see switch_slb */
+       asm volatile("isync" : : : "memory");
+
+       local_irq_enable();
+}
+
+
  /* Flush all user entries from the segment table of the current processor. */
  void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
  {
-       unsigned long offset;
-       unsigned long slbie_data = 0;
-       unsigned long pc = KSTK_EIP(tsk);
-       unsigned long stack = KSTK_ESP(tsk);
-       unsigned long exec_base;
+       struct thread_info *ti = task_thread_info(tsk);
+       unsigned char i;
  
         /*
          * We need interrupts hard-disabled here, not just soft-disabled,
@@ -238,91 +421,107 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
          * which would update the slb_cache/slb_cache_ptr fields in the PACA.
          */
         hard_irq_disable();
-       offset = get_paca()->slb_cache_ptr;
-       if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
-           offset <= SLB_CACHE_ENTRIES) {
-               int i;
-               asm volatile("isync" : : : "memory");
-               for (i = 0; i < offset; i++) {
-                       slbie_data = (unsigned long)get_paca()->slb_cache[i]
-                               << SID_SHIFT; /* EA */
-                       slbie_data |= user_segment_size(slbie_data)
-                               << SLBIE_SSIZE_SHIFT;
-                       slbie_data |= SLBIE_C; /* C set for user addresses */
-                       asm volatile("slbie %0" : : "r" (slbie_data));
-               }
-               asm volatile("isync" : : : "memory");
+       asm volatile("isync" : : : "memory");
+       if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+               /*
+                * SLBIA IH=3 invalidates all Class=1 SLBEs and their
+                * associated lookaside structures, which matches what
+                * switch_slb wants. So ARCH_300 does not use the slb
+                * cache.
+                */
+               asm volatile(PPC_SLBIA(3));
         } else {
-               __slb_flush_and_rebolt();
-       }
+               unsigned long offset = get_paca()->slb_cache_ptr;
+
+               if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
+                   offset <= SLB_CACHE_ENTRIES) {
+                       unsigned long slbie_data = 0;
+
+                       for (i = 0; i < offset; i++) {
+                               unsigned long ea;
+
+                               ea = (unsigned long)
+                                       get_paca()->slb_cache[i] << SID_SHIFT;
+                               /*
+                                * Could assert_slb_exists here, but hypervisor
+                                * or machine check could have come in and
+                                * removed the entry at this point.
+                                */
+
+                               slbie_data = ea;
+                               slbie_data |= user_segment_size(slbie_data)
+                                               << SLBIE_SSIZE_SHIFT;
+                               slbie_data |= SLBIE_C; /* user slbs have C=1 */
+                               asm volatile("slbie %0" : : "r" (slbie_data));
+                       }
+
+                       /* Workaround POWER5 < DD2.1 issue */
+                       if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
+                               asm volatile("slbie %0" : : "r" (slbie_data));
+
+               } else {
+                       struct slb_shadow *p = get_slb_shadow();
+                       unsigned long ksp_esid_data =
+                               be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
+                       unsigned long ksp_vsid_data =
+                               be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
+
+                       asm volatile(PPC_SLBIA(1) "\n"
+                                    "slbmte    %0,%1\n"
+                                    "isync"
+                                    :: "r"(ksp_vsid_data),
+                                       "r"(ksp_esid_data));
+
+                       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+               }
  
-       /* Workaround POWER5 < DD2.1 issue */
-       if (offset == 1 || offset > SLB_CACHE_ENTRIES)
-               asm volatile("slbie %0" : : "r" (slbie_data));
+               get_paca()->slb_cache_ptr = 0;
+       }
+       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
  
-       get_paca()->slb_cache_ptr = 0;
         copy_mm_to_paca(mm);
  
         /*
-        * preload some userspace segments into the SLB.
-        * Almost all 32 and 64bit PowerPC executables are linked at
-        * 0x10000000 so it makes sense to preload this segment.
+        * We gradually age out SLBs after a number of context switches to
+        * reduce reload overhead of unused entries (like we do with FP/VEC
+        * reload). Each time we wrap 256 switches, take an entry out of the
+        * SLB preload cache.
          */
-       exec_base = 0x10000000;
-
-       if (is_kernel_addr(pc) || is_kernel_addr(stack) ||
-           is_kernel_addr(exec_base))
-               return;
+       tsk->thread.load_slb++;
+       if (!tsk->thread.load_slb) {
+               unsigned long pc = KSTK_EIP(tsk);
  
-       slb_allocate(pc);
+               preload_age(ti);
+               preload_add(ti, pc);
+       }
  
-       if (!esids_match(pc, stack))
-               slb_allocate(stack);
+       for (i = 0; i < ti->slb_preload_nr; i++) {
+               unsigned char idx;
+               unsigned long ea;
  
-       if (!esids_match(pc, exec_base) &&
-           !esids_match(stack, exec_base))
-               slb_allocate(exec_base);
-}
+               idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
+               ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
  
-static inline void patch_slb_encoding(unsigned int *insn_addr,
-                                     unsigned int immed)
-{
+               slb_allocate_user(mm, ea);
+       }
  
         /*
-        * This function patches either an li or a cmpldi instruction with
-        * a new immediate value. This relies on the fact that both li
-        * (which is actually addi) and cmpldi both take a 16-bit immediate
-        * value, and it is situated in the same location in the instruction,
-        * ie. bits 16-31 (Big endian bit order) or the lower 16 bits.
-        * The signedness of the immediate operand differs between the two
-        * instructions however this code is only ever patching a small value,
-        * much less than 1 << 15, so we can get away with it.
-        * To patch the value we read the existing instruction, clear the
-        * immediate value, and or in our new value, then write the instruction
-        * back.
+        * Synchronize slbmte preloads with possible subsequent user memory
+        * address accesses by the kernel (user mode won't happen until
+        * rfid, which is safe).
          */
-       unsigned int insn = (*insn_addr & 0xffff0000) | immed;
-       patch_instruction(insn_addr, insn);
+       asm volatile("isync" : : : "memory");
  }
  
-extern u32 slb_miss_kernel_load_linear[];
-extern u32 slb_miss_kernel_load_io[];
-extern u32 slb_compare_rr_to_size[];
-extern u32 slb_miss_kernel_load_vmemmap[];
-
  void slb_set_size(u16 size)
  {
-       if (mmu_slb_size == size)
-               return;
-
         mmu_slb_size = size;
-       patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size);
  }
  
  void slb_initialize(void)
  {
         unsigned long linear_llp, vmalloc_llp, io_llp;
-       unsigned long lflags, vflags;
+       unsigned long lflags;
         static int slb_encoding_inited;
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
         unsigned long vmemmap_llp;
@@ -338,34 +537,24 @@ void slb_initialize(void)
  #endif
         if (!slb_encoding_inited) {
                 slb_encoding_inited = 1;
-               patch_slb_encoding(slb_miss_kernel_load_linear,
-                                  SLB_VSID_KERNEL | linear_llp);
-               patch_slb_encoding(slb_miss_kernel_load_io,
-                                  SLB_VSID_KERNEL | io_llp);
-               patch_slb_encoding(slb_compare_rr_to_size,
-                                  mmu_slb_size);
-
                 pr_devel("SLB: linear  LLP = %04lx\n", linear_llp);
                 pr_devel("SLB: io      LLP = %04lx\n", io_llp);
-
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
-               patch_slb_encoding(slb_miss_kernel_load_vmemmap,
-                                  SLB_VSID_KERNEL | vmemmap_llp);
                 pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
  #endif
         }
  
-       get_paca()->stab_rr = SLB_NUM_BOLTED;
+       get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
+       get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+       get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
  
         lflags = SLB_VSID_KERNEL | linear_llp;
-       vflags = SLB_VSID_KERNEL | vmalloc_llp;
  
         /* Invalidate the entire SLB (even entry 0) & all the ERATS */
         asm volatile("isync":::"memory");
         asm volatile("slbmte  %0,%0"::"r" (0) : "memory");
         asm volatile("isync; slbia; isync":::"memory");
         create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
-       create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX);
  
         /* For the boot cpu, we're running on the stack in init_thread_union,
          * which is in the first segment of the linear mapping, and also
@@ -381,122 +570,259 @@ void slb_initialize(void)
         asm volatile("isync":::"memory");
  }
  
-static void insert_slb_entry(unsigned long vsid, unsigned long ea,
-                            int bpsize, int ssize)
+static void slb_cache_update(unsigned long esid_data)
  {
-       unsigned long flags, vsid_data, esid_data;
-       enum slb_index index;
         int slb_cache_index;
  
-       /*
-        * We are irq disabled, hence should be safe to access PACA.
-        */
-       VM_WARN_ON(!irqs_disabled());
-
-       /*
-        * We can't take a PMU exception in the following code, so hard
-        * disable interrupts.
-        */
-       hard_irq_disable();
-
-       index = get_paca()->stab_rr;
-
-       /*
-        * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
-        */
-       if (index < (mmu_slb_size - 1))
-               index++;
-       else
-               index = SLB_NUM_BOLTED;
-
-       get_paca()->stab_rr = index;
-
-       flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
-       vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
-                   ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
-       esid_data = mk_esid_data(ea, ssize, index);
-
-       /*
-        * No need for an isync before or after this slbmte. The exception
-        * we enter with and the rfid we exit with are context synchronizing.
-        * Also we only handle user segments here.
-        */
-       asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
-                    : "memory");
+       if (cpu_has_feature(CPU_FTR_ARCH_300))
+               return; /* ISAv3.0B and later does not use slb_cache */
  
         /*
          * Now update slb cache entries
          */
-       slb_cache_index = get_paca()->slb_cache_ptr;
+       slb_cache_index = local_paca->slb_cache_ptr;
         if (slb_cache_index < SLB_CACHE_ENTRIES) {
                 /*
                  * We have space in slb cache for optimized switch_slb().
                  * Top 36 bits from esid_data as per ISA
                  */
-               get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
-               get_paca()->slb_cache_ptr++;
+               local_paca->slb_cache[slb_cache_index++] = esid_data >> 28;
+               local_paca->slb_cache_ptr++;
         } else {
                 /*
                  * Our cache is full and the current cache content strictly
                  * doesn't indicate the active SLB conents. Bump the ptr
                  * so that switch_slb() will ignore the cache.
                  */
-               get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+               local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
         }
  }
  
-static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
+static enum slb_index alloc_slb_index(bool kernel)
  {
-       struct mm_struct *mm = current->mm;
-       unsigned long vsid;
-       int bpsize;
+       enum slb_index index;
  
         /*
-        * We are always above 1TB, hence use high user segment size.
+        * The allocation bitmaps can become out of synch with the SLB
+        * when the _switch code does slbie when bolting a new stack
+        * segment and it must not be anywhere else in the SLB. This leaves
+        * a kernel allocated entry that is unused in the SLB. With very
+        * large systems or small segment sizes, the bitmaps could slowly
+        * fill with these entries. They will eventually be cleared out
+        * by the round robin allocator in that case, so it's probably not
+        * worth accounting for.
          */
-       vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
-       bpsize = get_slice_psize(mm, ea);
-       insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
+
+       /*
+        * SLBs beyond 32 entries are allocated with stab_rr only
+        * POWER7/8/9 have 32 SLB entries, this could be expanded if a
+        * future CPU has more.
+        */
+       if (local_paca->slb_used_bitmap != U32_MAX) {
+               index = ffz(local_paca->slb_used_bitmap);
+               local_paca->slb_used_bitmap |= 1U << index;
+               if (kernel)
+                       local_paca->slb_kern_bitmap |= 1U << index;
+       } else {
+               /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
+               index = local_paca->stab_rr;
+               if (index < (mmu_slb_size - 1))
+                       index++;
+               else
+                       index = SLB_NUM_BOLTED;
+               local_paca->stab_rr = index;
+               if (index < 32) {
+                       if (kernel)
+                               local_paca->slb_kern_bitmap |= 1U << index;
+                       else
+                               local_paca->slb_kern_bitmap &= ~(1U << index);
+               }
+       }
+       BUG_ON(index < SLB_NUM_BOLTED);
+
+       return index;
  }
  
-void slb_miss_large_addr(struct pt_regs *regs)
+static long slb_insert_entry(unsigned long ea, unsigned long context,
+                               unsigned long flags, int ssize, bool kernel)
  {
-       enum ctx_state prev_state = exception_enter();
-       unsigned long ea = regs->dar;
-       int context;
+       unsigned long vsid;
+       unsigned long vsid_data, esid_data;
+       enum slb_index index;
  
-       if (REGION_ID(ea) != USER_REGION_ID)
-               goto slb_bad_addr;
+       vsid = get_vsid(context, ea, ssize);
+       if (!vsid)
+               return -EFAULT;
  
         /*
-        * Are we beyound what the page table layout supports ?
+        * There must not be a kernel SLB fault in alloc_slb_index or before
+        * slbmte here or the allocation bitmaps could get out of whack with
+        * the SLB.
+        *
+        * User SLB faults or preloads take this path which might get inlined
+        * into the caller, so add compiler barriers here to ensure unsafe
+        * memory accesses do not come between.
          */
-       if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
-               goto slb_bad_addr;
+       barrier();
  
-       /* Lower address should have been handled by asm code */
-       if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
-               goto slb_bad_addr;
+       index = alloc_slb_index(kernel);
+
+       vsid_data = __mk_vsid_data(vsid, ssize, flags);
+       esid_data = mk_esid_data(ea, ssize, index);
+
+       /*
+        * No need for an isync before or after this slbmte. The exception
+        * we enter with and the rfid we exit with are context synchronizing.
+        * User preloads should add isync afterwards in case the kernel
+        * accesses user memory before it returns to userspace with rfid.
+        */
+       assert_slb_notexists(ea);
+       asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
+
+       barrier();
+
+       if (!kernel)
+               slb_cache_update(esid_data);
+
+       return 0;
+}
+
+static long slb_allocate_kernel(unsigned long ea, unsigned long id)
+{
+       unsigned long context;
+       unsigned long flags;
+       int ssize;
+
+       if (id == KERNEL_REGION_ID) {
+
+               /* We only support upto MAX_PHYSMEM_BITS */
+               if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS))
+                       return -EFAULT;
+
+               flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+       } else if (id == VMEMMAP_REGION_ID) {
+
+               if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+                       return -EFAULT;
+
+               flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
+#endif
+       } else if (id == VMALLOC_REGION_ID) {
+
+               if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+                       return -EFAULT;
+
+               if (ea < H_VMALLOC_END)
+                       flags = get_paca()->vmalloc_sllp;
+               else
+                       flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+       } else {
+               return -EFAULT;
+       }
+
+       ssize = MMU_SEGSIZE_1T;
+       if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
+               ssize = MMU_SEGSIZE_256M;
+
+       context = get_kernel_context(ea);
+       return slb_insert_entry(ea, context, flags, ssize, true);
+}
+
+static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
+{
+       unsigned long context;
+       unsigned long flags;
+       int bpsize;
+       int ssize;
  
         /*
          * consider this as bad access if we take a SLB miss
          * on an address above addr limit.
          */
-       if (ea >= current->mm->context.slb_addr_limit)
-               goto slb_bad_addr;
+       if (ea >= mm->context.slb_addr_limit)
+               return -EFAULT;
  
-       context = get_ea_context(&current->mm->context, ea);
+       context = get_user_context(&mm->context, ea);
         if (!context)
-               goto slb_bad_addr;
+               return -EFAULT;
+
+       if (unlikely(ea >= H_PGTABLE_RANGE)) {
+               WARN_ON(1);
+               return -EFAULT;
+       }
  
-       handle_multi_context_slb_miss(context, ea);
-       exception_exit(prev_state);
-       return;
+       ssize = user_segment_size(ea);
  
-slb_bad_addr:
-       if (user_mode(regs))
-               _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
-       else
-               bad_page_fault(regs, ea, SIGSEGV);
-       exception_exit(prev_state);
+       bpsize = get_slice_psize(mm, ea);
+       flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+
+       return slb_insert_entry(ea, context, flags, ssize, false);
+}
+
+long do_slb_fault(struct pt_regs *regs, unsigned long ea)
+{
+       unsigned long id = REGION_ID(ea);
+
+       /* IRQs are not reconciled here, so can't check irqs_disabled */
+       VM_WARN_ON(mfmsr() & MSR_EE);
+
+       if (unlikely(!(regs->msr & MSR_RI)))
+               return -EINVAL;
+
+       /*
+        * SLB kernel faults must be very careful not to touch anything
+        * that is not bolted. E.g., PACA and global variables are okay,
+        * mm->context stuff is not.
+        *
+        * SLB user faults can access all of kernel memory, but must be
+        * careful not to touch things like IRQ state because it is not
+        * "reconciled" here. The difficulty is that we must use
+        * fast_exception_return to return from kernel SLB faults without
+        * looking at possible non-bolted memory. We could test user vs
+        * kernel faults in the interrupt handler asm and do a full fault,
+        * reconcile, ret_from_except for user faults which would make them
+        * first class kernel code. But for performance it's probably nicer
+        * if they go via fast_exception_return too.
+        */
+       if (id >= KERNEL_REGION_ID) {
+               long err;
+#ifdef CONFIG_DEBUG_VM
+               /* Catch recursive kernel SLB faults. */
+               BUG_ON(local_paca->in_kernel_slb_handler);
+               local_paca->in_kernel_slb_handler = 1;
+#endif
+               err = slb_allocate_kernel(ea, id);
+#ifdef CONFIG_DEBUG_VM
+               local_paca->in_kernel_slb_handler = 0;
+#endif
+               return err;
+       } else {
+               struct mm_struct *mm = current->mm;
+               long err;
+
+               if (unlikely(!mm))
+                       return -EFAULT;
+
+               err = slb_allocate_user(mm, ea);
+               if (!err)
+                       preload_add(current_thread_info(), ea);
+
+               return err;
+       }
+}
+
+void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err)
+{
+       if (err == -EFAULT) {
+               if (user_mode(regs))
+                       _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+               else
+                       bad_page_fault(regs, ea, SIGSEGV);
+       } else if (err == -EINVAL) {
+               unrecoverable_exception(regs);
+       } else {
+               BUG();
+       }
  }
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S

deleted file mode 100644 (file)

index 4ac5057..0000000
--- a/arch/powerpc/mm/slb_low.S
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Low-level SLB routines
- *
- * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
- *
- * Based on earlier C version:
- * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
- *    Copyright (c) 2001 Dave Engebretsen
- * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/firmware.h>
-#include <asm/feature-fixups.h>
-
-/*
- * This macro generates asm code to compute the VSID scramble
- * function.  Used in slb_allocate() and do_stab_bolted.  The function
- * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
- *
- *     rt = register containing the proto-VSID and into which the
- *             VSID will be stored
- *     rx = scratch register (clobbered)
- *     rf = flags
- *
- *     - rt and rx must be different registers
- *     - The answer will end up in the low VSID_BITS bits of rt.  The higher
- *       bits may contain other garbage, so you may need to mask the
- *       result.
- */
-#define ASM_VSID_SCRAMBLE(rt, rx, rf, size)                            \
-       lis     rx,VSID_MULTIPLIER_##size@h;                            \
-       ori     rx,rx,VSID_MULTIPLIER_##size@l;                         \
-       mulld   rt,rt,rx;               /* rt = rt * MULTIPLIER */      \
-/*                                                                     \
- * powermac get slb fault before feature fixup, so make 65 bit part     \
- * the default part of feature fixup                                   \
- */                                                                    \
-BEGIN_MMU_FTR_SECTION                                                  \
-       srdi    rx,rt,VSID_BITS_65_##size;                              \
-       clrldi  rt,rt,(64-VSID_BITS_65_##size);                         \
-       add     rt,rt,rx;                                               \
-       addi    rx,rt,1;                                                \
-       srdi    rx,rx,VSID_BITS_65_##size;                              \
-       add     rt,rt,rx;                                               \
-       rldimi  rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
-MMU_FTR_SECTION_ELSE                                                   \
-       srdi    rx,rt,VSID_BITS_##size;                                 \
-       clrldi  rt,rt,(64-VSID_BITS_##size);                            \
-       add     rt,rt,rx;               /* add high and low bits */     \
-       addi    rx,rt,1;                                                \
-       srdi    rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */   \
-       add     rt,rt,rx;                                               \
-       rldimi  rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
-
-
-/* void slb_allocate(unsigned long ea);
- *
- * Create an SLB entry for the given EA (user or kernel).
- *     r3 = faulting address, r13 = PACA
- *     r9, r10, r11 are clobbered by this function
- *     r3 is preserved.
- * No other registers are examined or changed.
- */
-_GLOBAL(slb_allocate)
-       /*
-        * Check if the address falls within the range of the first context, or
-        * if we may need to handle multi context. For the first context we
-        * allocate the slb entry via the fast path below. For large address we
-        * branch out to C-code and see if additional contexts have been
-        * allocated.
-        * The test here is:
-        *   (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
-        */
-       rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
-       bne-    8f
-
-       srdi    r9,r3,60                /* get region */
-       srdi    r10,r3,SID_SHIFT        /* get esid */
-       cmpldi  cr7,r9,0xc              /* cmp PAGE_OFFSET for later use */
-
-       /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
-       blt     cr7,0f                  /* user or kernel? */
-
-       /* Check if hitting the linear mapping or some other kernel space
-       */
-       bne     cr7,1f
-
-       /* Linear mapping encoding bits, the "li" instruction below will
-        * be patched by the kernel at boot
-        */
-.globl slb_miss_kernel_load_linear
-slb_miss_kernel_load_linear:
-       li      r11,0
-       /*
-        * context = (ea >> 60) - (0xc - 1)
-        * r9 = region id.
-        */
-       subi    r9,r9,KERNEL_REGION_CONTEXT_OFFSET
-
-BEGIN_FTR_SECTION
-       b       .Lslb_finish_load
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
-       b       .Lslb_finish_load_1T
-
-1:
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-       cmpldi  cr0,r9,0xf
-       bne     1f
-/* Check virtual memmap region. To be patched at kernel boot */
-.globl slb_miss_kernel_load_vmemmap
-slb_miss_kernel_load_vmemmap:
-       li      r11,0
-       b       6f
-1:
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-
-       /*
-        * r10 contains the ESID, which is the original faulting EA shifted
-        * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28)
-        * which is 0xd00038000. That can't be used as an immediate, even if we
-        * ignored the 0xd, so we have to load it into a register, and we only
-        * have one register free. So we must load all of (H_VMALLOC_END >> 28)
-        * into a register and compare ESID against that.
-        */
-       lis     r11,(H_VMALLOC_END >> 32)@h     // r11 = 0xffffffffd0000000
-       ori     r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800
-       // Rotate left 4, then mask with 0xffffffff0
-       rldic   r11,r11,4,28                    // r11 = 0xd00038000
-       cmpld   r10,r11                         // if r10 >= r11
-       bge     5f                              //   goto io_mapping
-
-       /*
-        * vmalloc mapping gets the encoding from the PACA as the mapping
-        * can be demoted from 64K -> 4K dynamically on some machines.
-        */
-       lhz     r11,PACAVMALLOCSLLP(r13)
-       b       6f
-5:
-       /* IO mapping */
-.globl slb_miss_kernel_load_io
-slb_miss_kernel_load_io:
-       li      r11,0
-6:
-       /*
-        * context = (ea >> 60) - (0xc - 1)
-        * r9 = region id.
-        */
-       subi    r9,r9,KERNEL_REGION_CONTEXT_OFFSET
-
-BEGIN_FTR_SECTION
-       b       .Lslb_finish_load
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
-       b       .Lslb_finish_load_1T
-
-0:     /*
-        * For userspace addresses, make sure this is region 0.
-        */
-       cmpdi   r9, 0
-       bne-    8f
-        /*
-         * user space make sure we are within the allowed limit
-        */
-       ld      r11,PACA_SLB_ADDR_LIMIT(r13)
-       cmpld   r3,r11
-       bge-    8f
-
-       /* when using slices, we extract the psize off the slice bitmaps
-        * and then we need to get the sllp encoding off the mmu_psize_defs
-        * array.
-        *
-        * XXX This is a bit inefficient especially for the normal case,
-        * so we should try to implement a fast path for the standard page
-        * size using the old sllp value so we avoid the array. We cannot
-        * really do dynamic patching unfortunately as processes might flip
-        * between 4k and 64k standard page size
-        */
-#ifdef CONFIG_PPC_MM_SLICES
-       /* r10 have esid */
-       cmpldi  r10,16
-       /* below SLICE_LOW_TOP */
-       blt     5f
-       /*
-        * Handle hpsizes,
-        * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
-        */
-       srdi    r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
-       addi    r9,r11,PACAHIGHSLICEPSIZE
-       lbzx    r9,r13,r9               /* r9 is hpsizes[r11] */
-       /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
-       rldicl  r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
-       b       6f
-
-5:
-       /*
-        * Handle lpsizes
-        * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
-        */
-       srdi    r11,r10,1 /* index */
-       addi    r9,r11,PACALOWSLICESPSIZE
-       lbzx    r9,r13,r9               /* r9 is lpsizes[r11] */
-       rldicl  r11,r10,0,63            /* r11 = r10 & 0x1 */
-6:
-       sldi    r11,r11,2  /* index * 4 */
-       /* Extract the psize and multiply to get an array offset */
-       srd     r9,r9,r11
-       andi.   r9,r9,0xf
-       mulli   r9,r9,MMUPSIZEDEFSIZE
-
-       /* Now get to the array and obtain the sllp
-        */
-       ld      r11,PACATOC(r13)
-       ld      r11,mmu_psize_defs@got(r11)
-       add     r11,r11,r9
-       ld      r11,MMUPSIZESLLP(r11)
-       ori     r11,r11,SLB_VSID_USER
-#else
-       /* paca context sllp already contains the SLB_VSID_USER bits */
-       lhz     r11,PACACONTEXTSLLP(r13)
-#endif /* CONFIG_PPC_MM_SLICES */
-
-       ld      r9,PACACONTEXTID(r13)
-BEGIN_FTR_SECTION
-       cmpldi  r10,0x1000
-       bge     .Lslb_finish_load_1T
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-       b       .Lslb_finish_load
-
-8:     /* invalid EA - return an error indication */
-       crset   4*cr0+eq                /* indicate failure */
-       blr
-
-/*
- * Finish loading of an SLB entry and return
- *
- * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
- */
-.Lslb_finish_load:
-       rldimi  r10,r9,ESID_BITS,0
-       ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
-       /* r3 = EA, r11 = VSID data */
-       /*
-        * Find a slot, round robin. Previously we tried to find a
-        * free slot first but that took too long. Unfortunately we
-        * dont have any LRU information to help us choose a slot.
-        */
-
-       mr      r9,r3
-
-       /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */
-7:     ld      r10,PACASTABRR(r13)
-       addi    r10,r10,1
-       /* This gets soft patched on boot. */
-.globl slb_compare_rr_to_size
-slb_compare_rr_to_size:
-       cmpldi  r10,0
-
-       blt+    4f
-       li      r10,SLB_NUM_BOLTED
-
-4:
-       std     r10,PACASTABRR(r13)
-
-3:
-       rldimi  r9,r10,0,36             /* r9  = EA[0:35] | entry */
-       oris    r10,r9,SLB_ESID_V@h     /* r10 = r9 | SLB_ESID_V */
-
-       /* r9 = ESID data, r11 = VSID data */
-
-       /*
-        * No need for an isync before or after this slbmte. The exception
-        * we enter with and the rfid we exit with are context synchronizing.
-        */
-       slbmte  r11,r10
-
-       /* we're done for kernel addresses */
-       crclr   4*cr0+eq                /* set result to "success" */
-       bgelr   cr7
-
-       /* Update the slb cache */
-       lhz     r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
-       cmpldi  r9,SLB_CACHE_ENTRIES
-       bge     1f
-
-       /* still room in the slb cache */
-       sldi    r11,r9,2                /* r11 = offset * sizeof(u32) */
-       srdi    r10,r10,28              /* get the 36 bits of the ESID */
-       add     r11,r11,r13             /* r11 = (u32 *)paca + offset */
-       stw     r10,PACASLBCACHE(r11)   /* paca->slb_cache[offset] = esid */
-       addi    r9,r9,1                 /* offset++ */
-       b       2f
-1:                                     /* offset >= SLB_CACHE_ENTRIES */
-       li      r9,SLB_CACHE_ENTRIES+1
-2:
-       sth     r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
-       crclr   4*cr0+eq                /* set result to "success" */
-       blr
-
-/*
- * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return.
- *
- * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
- */
-.Lslb_finish_load_1T:
-       srdi    r10,r10,(SID_SHIFT_1T - SID_SHIFT)      /* get 1T ESID */
-       rldimi  r10,r9,ESID_BITS_1T,0
-       ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
-
-       li      r10,MMU_SEGSIZE_1T
-       rldimi  r11,r10,SLB_VSID_SSIZE_SHIFT,0  /* insert segment size */
-
-       /* r3 = EA, r11 = VSID data */
-       clrrdi  r9,r3,SID_SHIFT_1T      /* clear out non-ESID bits */
-       b       7b
-
-
-_ASM_NOKPROBE_SYMBOL(slb_allocate)
-_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
-_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
-_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap)
-#endif
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c

index 205fe55..06898c1 100644 (file)
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -31,6 +31,7 @@
  #include <linux/spinlock.h>
  #include <linux/export.h>
  #include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
  #include <asm/mman.h>
  #include <asm/mmu.h>
  #include <asm/copro.h>
@@ -61,6 +62,13 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) {
  
  #endif
  
+static inline bool slice_addr_is_low(unsigned long addr)
+{
+       u64 tmp = (u64)addr;
+
+       return tmp < SLICE_LOW_TOP;
+}
+
  static void slice_range_to_mask(unsigned long start, unsigned long len,
                                 struct slice_mask *ret)
  {
@@ -70,7 +78,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
         if (SLICE_NUM_HIGH)
                 bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
  
-       if (start < SLICE_LOW_TOP) {
+       if (slice_addr_is_low(start)) {
                 unsigned long mend = min(end,
                                          (unsigned long)(SLICE_LOW_TOP - 1));
  
@@ -78,7 +86,7 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
                         - (1u << GET_LOW_SLICE_INDEX(start));
         }
  
-       if ((start + len) > SLICE_LOW_TOP) {
+       if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
                 unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
                 unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
                 unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
@@ -133,7 +141,7 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
                 if (!slice_low_has_vma(mm, i))
                         ret->low_slices |= 1u << i;
  
-       if (high_limit <= SLICE_LOW_TOP)
+       if (slice_addr_is_low(high_limit - 1))
                 return;
  
         for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++)
@@ -182,7 +190,7 @@ static bool slice_check_range_fits(struct mm_struct *mm,
         unsigned long end = start + len - 1;
         u64 low_slices = 0;
  
-       if (start < SLICE_LOW_TOP) {
+       if (slice_addr_is_low(start)) {
                 unsigned long mend = min(end,
                                          (unsigned long)(SLICE_LOW_TOP - 1));
  
@@ -192,7 +200,7 @@ static bool slice_check_range_fits(struct mm_struct *mm,
         if ((low_slices & available->low_slices) != low_slices)
                 return false;
  
-       if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) {
+       if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
                 unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
                 unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
                 unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
@@ -219,7 +227,7 @@ static void slice_flush_segments(void *parm)
         copy_mm_to_paca(current->active_mm);
  
         local_irq_save(flags);
-       slb_flush_and_rebolt();
+       slb_flush_and_restore_bolted();
         local_irq_restore(flags);
  #endif
  }
@@ -303,7 +311,7 @@ static bool slice_scan_available(unsigned long addr,
                                  int end, unsigned long *boundary_addr)
  {
         unsigned long slice;
-       if (addr < SLICE_LOW_TOP) {
+       if (slice_addr_is_low(addr)) {
                 slice = GET_LOW_SLICE_INDEX(addr);
                 *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
                 return !!(available->low_slices & (1u << slice));
@@ -706,7 +714,7 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
  
         VM_BUG_ON(radix_enabled());
  
-       if (addr < SLICE_LOW_TOP) {
+       if (slice_addr_is_low(addr)) {
                 psizes = mm->context.low_slices_psize;
                 index = GET_LOW_SLICE_INDEX(addr);
         } else {
@@ -757,6 +765,20 @@ void slice_init_new_context_exec(struct mm_struct *mm)
                 bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
  }
  
+#ifdef CONFIG_PPC_BOOK3S_64
+void slice_setup_new_exec(void)
+{
+       struct mm_struct *mm = current->mm;
+
+       slice_dbg("slice_setup_new_exec(mm=%p)\n", mm);
+
+       if (!is_32bit_task())
+               return;
+
+       mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+}
+#endif
+
  void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
                            unsigned long len, unsigned int psize)
  {
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c

index 4c4dfc4..6a23b9e 100644 (file)
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -366,6 +366,7 @@ static inline void _tlbiel_lpid_guest(unsigned long lpid, unsigned long ric)
                 __tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
  
         asm volatile("ptesync": : :"memory");
+       asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
  }
  
  
@@ -1016,7 +1017,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
                         goto local;
                 }
                 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
-               goto local;
         } else {
  local:
                 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c

index 15fe5f0..ae5d568 100644 (file)
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -503,6 +503,9 @@ static void setup_page_sizes(void)
                 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
                         struct mmu_psize_def *def = &mmu_psize_defs[psize];
  
+                       if (!def->shift)
+                               continue;
+
                         if (tlb1ps & (1U << (def->shift - 10))) {
                                 def->flags |= MMU_PAGE_SIZE_DIRECT;
  
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile

index 7a7834c..8d26d74 100644 (file)
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -1,5 +1,4 @@
  # SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
  
  ccflags-$(CONFIG_PPC64)        := $(NO_MINIMAL_TOC)
  
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile

index 82986d2..ab26df5 100644 (file)
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -1,5 +1,4 @@
  # SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
  
  obj-$(CONFIG_PERF_EVENTS)      += callchain.o perf_regs.o
  
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c

index 1fafc32..6954636 100644 (file)
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1392,7 +1392,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
         if (ret)
                 goto err_free_cpuhp_mem;
  
-       pr_info("%s performance monitor hardware support registered\n",
+       pr_debug("%s performance monitor hardware support registered\n",
                                                         pmu_ptr->pmu.name);
  
         return 0;
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c

index 7963658..6dbae98 100644 (file)
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -238,6 +238,7 @@ static int power7_marked_instr_event(u64 event)
         case 6:
                 if (psel == 0x64)
                         return pmc >= 3;
+               break;
         case 8:
                 return unit == 0xd;
         }
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig

index 60254a3..2a9d662 100644 (file)
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -2,7 +2,6 @@
  config ACADIA
         bool "Acadia"
         depends on 40x
-       default n
         select PPC40x_SIMPLE
         select 405EZ
         help
@@ -11,7 +10,6 @@ config ACADIA
  config EP405
         bool "EP405/EP405PC"
         depends on 40x
-       default n
         select 405GP
         select PCI
         help
@@ -20,7 +18,6 @@ config EP405
  config HOTFOOT
          bool "Hotfoot"
         depends on 40x
-       default n
         select PPC40x_SIMPLE
         select PCI
          help
@@ -29,7 +26,6 @@ config HOTFOOT
  config KILAUEA
         bool "Kilauea"
         depends on 40x
-       default n
         select 405EX
         select PPC40x_SIMPLE
         select PPC4xx_PCI_EXPRESS
@@ -41,7 +37,6 @@ config KILAUEA
  config MAKALU
         bool "Makalu"
         depends on 40x
-       default n
         select 405EX
         select PCI
         select PPC4xx_PCI_EXPRESS
@@ -62,7 +57,6 @@ config WALNUT
  config XILINX_VIRTEX_GENERIC_BOARD
         bool "Generic Xilinx Virtex board"
         depends on 40x
-       default n
         select XILINX_VIRTEX_II_PRO
         select XILINX_VIRTEX_4_FX
         select XILINX_INTC
@@ -80,7 +74,6 @@ config XILINX_VIRTEX_GENERIC_BOARD
  config OBS600
         bool "OpenBlockS 600"
         depends on 40x
-       default n
         select 405EX
         select PPC40x_SIMPLE
         help
@@ -90,7 +83,6 @@ config OBS600
  config PPC40x_SIMPLE
         bool "Simple PowerPC 40x board support"
         depends on 40x
-       default n
         help
           This option enables the simple PowerPC 40x platform support.
  
@@ -156,7 +148,6 @@ config IBM405_ERR51
  config APM8018X
         bool "APM8018X"
         depends on 40x
-       default n
         select PPC40x_SIMPLE
         help
           This option enables support for the AppliedMicro APM8018X evaluation
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig

index a601142..f024efd 100644 (file)
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -2,7 +2,6 @@
  config PPC_47x
         bool "Support for 47x variant"
         depends on 44x
-       default n
         select MPIC
         help
           This option enables support for the 47x family of processors and is
@@ -11,7 +10,6 @@ config PPC_47x
  config BAMBOO
         bool "Bamboo"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 440EP
         select PCI
@@ -21,7 +19,6 @@ config BAMBOO
  config BLUESTONE
         bool "Bluestone"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select APM821xx
         select PCI_MSI
@@ -44,7 +41,6 @@ config EBONY
  config SAM440EP
          bool "Sam440ep"
         depends on 44x
-        default n
          select 440EP
          select PCI
          help
@@ -53,7 +49,6 @@ config SAM440EP
  config SEQUOIA
         bool "Sequoia"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 440EPX
         help
@@ -62,7 +57,6 @@ config SEQUOIA
  config TAISHAN
         bool "Taishan"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 440GX
         select PCI
@@ -73,7 +67,6 @@ config TAISHAN
  config KATMAI
         bool "Katmai"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 440SPe
         select PCI
@@ -86,7 +79,6 @@ config KATMAI
  config RAINIER
         bool "Rainier"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 440GRX
         select PCI
@@ -96,7 +88,6 @@ config RAINIER
  config WARP
         bool "PIKA Warp"
         depends on 44x
-       default n
         select 440EP
         help
           This option enables support for the PIKA Warp(tm) Appliance. The Warp
@@ -109,7 +100,6 @@ config WARP
  config ARCHES
         bool "Arches"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 460EX # Odd since it uses 460GT but the effects are the same
         select PCI
@@ -120,7 +110,6 @@ config ARCHES
  config CANYONLANDS
         bool "Canyonlands"
         depends on 44x
-       default n
         select 460EX
         select PCI
         select PPC4xx_PCI_EXPRESS
@@ -134,7 +123,6 @@ config CANYONLANDS
  config GLACIER
         bool "Glacier"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 460EX # Odd since it uses 460GT but the effects are the same
         select PCI
@@ -147,7 +135,6 @@ config GLACIER
  config REDWOOD
         bool "Redwood"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 460SX
         select PCI
@@ -160,7 +147,6 @@ config REDWOOD
  config EIGER
         bool "Eiger"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 460SX
         select PCI
@@ -172,7 +158,6 @@ config EIGER
  config YOSEMITE
         bool "Yosemite"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 440EP
         select PCI
@@ -182,7 +167,6 @@ config YOSEMITE
  config ISS4xx
         bool "ISS 4xx Simulator"
         depends on (44x || 40x)
-       default n
         select 405GP if 40x
         select 440GP if 44x && !PPC_47x
         select PPC_FPU
@@ -193,7 +177,6 @@ config ISS4xx
  config CURRITUCK
         bool "IBM Currituck (476fpe) Support"
         depends on PPC_47x
-       default n
         select SWIOTLB
         select 476FPE
         select PPC4xx_PCI_EXPRESS
@@ -203,7 +186,6 @@ config CURRITUCK
  config FSP2
         bool "IBM FSP2 (476fpe) Support"
         depends on PPC_47x
-       default n
         select 476FPE
         select IBM_EMAC_EMAC4 if IBM_EMAC
         select IBM_EMAC_RGMII if IBM_EMAC
@@ -215,7 +197,6 @@ config FSP2
  config AKEBONO
         bool "IBM Akebono (476gtr) Support"
         depends on PPC_47x
-       default n
         select SWIOTLB
         select 476FPE
         select PPC4xx_PCI_EXPRESS
@@ -241,7 +222,6 @@ config AKEBONO
  config ICON
         bool "Icon"
         depends on 44x
-       default n
         select PPC44x_SIMPLE
         select 440SPe
         select PCI
@@ -252,7 +232,6 @@ config ICON
  config XILINX_VIRTEX440_GENERIC_BOARD
         bool "Generic Xilinx Virtex 5 FXT board support"
         depends on 44x
-       default n
         select XILINX_VIRTEX_5_FXT
         select XILINX_INTC
         help
@@ -280,7 +259,6 @@ config XILINX_ML510
  config PPC44x_SIMPLE
         bool "Simple PowerPC 44x board support"
         depends on 44x
-       default n
         help
           This option enables the simple PowerPC 44x platform support.
  
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c

index 04f0c73..7a507f7 100644 (file)
--- a/arch/powerpc/platforms/44x/fsp2.c
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -210,15 +210,15 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler)
         for_each_compatible_node(np, NULL, compat) {
                 irq = irq_of_parse_and_map(np, 0);
                 if (irq == NO_IRQ) {
-                       pr_err("device tree node %s is missing a interrupt",
-                             np->name);
+                       pr_err("device tree node %pOFn is missing a interrupt",
+                             np);
                         return;
                 }
  
                 rc = request_irq(irq, errirq_handler, 0, np->name, np);
                 if (rc) {
-                       pr_err("fsp_of_probe: request_irq failed: np=%s rc=%d",
-                             np->full_name, rc);
+                       pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d",
+                             np, rc);
                         return;
                 }
         }
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c

index 69d9f60..f5bbd45 100644 (file)
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -113,7 +113,6 @@ static void __init ocm_init_node(int count, struct device_node *node)
         int len;
  
         struct resource rsrc;
-       int ioflags;
  
         ocm = ocm_get_node(count);
  
@@ -179,9 +178,8 @@ static void __init ocm_init_node(int count, struct device_node *node)
  
         /* ioremap the non-cached region */
         if (ocm->nc.memtotal) {
-               ioflags = _PAGE_NO_CACHE | _PAGE_GUARDED | _PAGE_EXEC;
                 ocm->nc.virt = __ioremap(ocm->nc.phys, ocm->nc.memtotal,
-                                         ioflags);
+                                        _PAGE_EXEC | PAGE_KERNEL_NCG);
  
                 if (!ocm->nc.virt) {
                         printk(KERN_ERR
@@ -195,9 +193,8 @@ static void __init ocm_init_node(int count, struct device_node *node)
         /* ioremap the cached region */
  
         if (ocm->c.memtotal) {
-               ioflags = _PAGE_EXEC;
                 ocm->c.virt = __ioremap(ocm->c.phys, ocm->c.memtotal,
-                                        ioflags);
+                                       _PAGE_EXEC | PAGE_KERNEL);
  
                 if (!ocm->c.virt) {
                         printk(KERN_ERR
diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c

index 5e36508..1844bf5 100644 (file)
--- a/arch/powerpc/platforms/4xx/soc.c
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -200,7 +200,7 @@ void ppc4xx_reset_system(char *cmd)
         u32 reset_type = DBCR0_RST_SYSTEM;
         const u32 *prop;
  
-       np = of_find_node_by_type(NULL, "cpu");
+       np = of_get_cpu_node(0, NULL);
         if (np) {
                 prop = of_get_property(np, "reset-type", NULL);
  
diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig

index 6e04099..1947a88 100644 (file)
--- a/arch/powerpc/platforms/82xx/Kconfig
+++ b/arch/powerpc/platforms/82xx/Kconfig
@@ -51,7 +51,6 @@ endif
  
  config PQ2ADS
         bool
-       default n
  
  config 8260
         bool
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c

index 7e966f4..fff7242 100644 (file)
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -216,8 +216,8 @@ static int smp_85xx_start_cpu(int cpu)
  
         /* Map the spin table */
         if (ioremappable)
-               spin_table = ioremap_prot(*cpu_rel_addr,
-                       sizeof(struct epapr_spin_table), _PAGE_COHERENT);
+               spin_table = ioremap_coherent(*cpu_rel_addr,
+                                             sizeof(struct epapr_spin_table));
         else
                 spin_table = phys_to_virt(*cpu_rel_addr);
  
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c

index 027c42d..f1c805c 100644 (file)
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -66,7 +66,7 @@ static int __init get_freq(char *name, unsigned long *val)
         int found = 0;
  
         /* The cpu node should have timebase and clock frequency properties */
-       cpu = of_find_node_by_type(NULL, "cpu");
+       cpu = of_get_cpu_node(0, NULL);
  
         if (cpu) {
                 fp = of_get_property(cpu, name, NULL);
@@ -147,8 +147,9 @@ void __init mpc8xx_calibrate_decr(void)
          * we have to enable the timebase).  The decrementer interrupt
          * is wired into the vector table, nothing to do here for that.
          */
-       cpu = of_find_node_by_type(NULL, "cpu");
+       cpu = of_get_cpu_node(0, NULL);
         virq= irq_of_parse_and_map(cpu, 0);
+       of_node_put(cpu);
         irq = virq_to_hw(virq);
  
         sys_tmr2 = immr_map(im_sit);
diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c

index 4020167..9944fc3 100644 (file)
--- a/arch/powerpc/platforms/8xx/machine_check.c
+++ b/arch/powerpc/platforms/8xx/machine_check.c
@@ -18,9 +18,9 @@ int machine_check_8xx(struct pt_regs *regs)
         pr_err("Machine check in kernel mode.\n");
         pr_err("Caused by (from SRR1=%lx): ", reason);
         if (reason & 0x40000000)
-               pr_err("Fetch error at address %lx\n", regs->nip);
+               pr_cont("Fetch error at address %lx\n", regs->nip);
         else
-               pr_err("Data access error at address %lx\n", regs->dar);
+               pr_cont("Data access error at address %lx\n", regs->dar);
  
  #ifdef CONFIG_PCI
         /* the qspan pci read routines can cause machine checks -- Cort
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig

index 14ef17e..260a56b 100644 (file)
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -23,7 +23,6 @@ source "arch/powerpc/platforms/amigaone/Kconfig"
  
  config KVM_GUEST
         bool "KVM Guest support"
-       default n
         select EPAPR_PARAVIRT
         ---help---
           This option enables various optimizations for running under the KVM
@@ -34,7 +33,6 @@ config KVM_GUEST
  
  config EPAPR_PARAVIRT
         bool "ePAPR para-virtualization support"
-       default n
         help
           Enables ePAPR para-virtualization support for guests.
  
@@ -74,7 +72,6 @@ config PPC_DT_CPU_FTRS
  config UDBG_RTAS_CONSOLE
         bool "RTAS based debug console"
         depends on PPC_RTAS
-       default n
  
  config PPC_SMP_MUXED_IPI
         bool
@@ -86,16 +83,13 @@ config PPC_SMP_MUXED_IPI
  
  config IPIC
         bool
-       default n
  
  config MPIC
         bool
-       default n
  
  config MPIC_TIMER
         bool "MPIC Global Timer"
         depends on MPIC && FSL_SOC
-       default n
         help
           The MPIC global timer is a hardware timer inside the
           Freescale PIC complying with OpenPIC standard. When the
@@ -107,7 +101,6 @@ config MPIC_TIMER
  config FSL_MPIC_TIMER_WAKEUP
         tristate "Freescale MPIC global timer wakeup driver"
         depends on FSL_SOC &&  MPIC_TIMER && PM
-       default n
         help
           The driver provides a way to wake up the system by MPIC
           timer.
@@ -115,43 +108,35 @@ config FSL_MPIC_TIMER_WAKEUP
  
  config PPC_EPAPR_HV_PIC
         bool
-       default n
         select EPAPR_PARAVIRT
  
  config MPIC_WEIRD
         bool
-       default n
  
  config MPIC_MSGR
         bool "MPIC message register support"
         depends on MPIC
-       default n
         help
           Enables support for the MPIC message registers.  These
           registers are used for inter-processor communication.
  
  config PPC_I8259
         bool
-       default n
  
  config U3_DART
         bool
         depends on PPC64
-       default n
  
  config PPC_RTAS
         bool
-       default n
  
  config RTAS_ERROR_LOGGING
         bool
         depends on PPC_RTAS
-       default n
  
  config PPC_RTAS_DAEMON
         bool
         depends on PPC_RTAS
-       default n
  
  config RTAS_PROC
         bool "Proc interface to RTAS"
@@ -164,11 +149,9 @@ config RTAS_FLASH
  
  config MMIO_NVRAM
         bool
-       default n
  
  config MPIC_U3_HT_IRQS
         bool
-       default n
  
  config MPIC_BROKEN_REGREAD
         bool
@@ -187,15 +170,12 @@ config EEH
  
  config PPC_MPC106
         bool
-       default n
  
  config PPC_970_NAP
         bool
-       default n
  
  config PPC_P7_NAP
         bool
-       default n
  
  config PPC_INDIRECT_PIO
         bool
@@ -295,7 +275,6 @@ config CPM2
  
  config FSL_ULI1575
         bool
-       default n
         select GENERIC_ISA_DMA
         help
           Supports for the ULI1575 PCIe south bridge that exists on some
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype

index 6c6a7c7..f4e2c57 100644 (file)
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,7 +1,6 @@
  # SPDX-License-Identifier: GPL-2.0
  config PPC64
         bool "64-bit kernel"
-       default n
         select ZLIB_DEFLATE
         help
           This option selects whether a 32-bit or a 64-bit kernel
@@ -72,6 +71,7 @@ config PPC_BOOK3S_64
         select PPC_HAVE_PMU_SUPPORT
         select SYS_SUPPORTS_HUGETLBFS
         select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+       select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
         select ARCH_SUPPORTS_NUMA_BALANCING
         select IRQ_WORK
  
@@ -368,7 +368,6 @@ config PPC_MM_SLICES
         bool
         default y if PPC_BOOK3S_64
         default y if PPC_8xx && HUGETLB_PAGE
-       default n
  
  config PPC_HAVE_PMU_SUPPORT
         bool
@@ -382,7 +381,6 @@ config PPC_PERF_CTRS
  config FORCE_SMP
         # Allow platforms to force SMP=y by selecting this
         bool
-       default n
         select SMP
  
  config SMP
@@ -423,7 +421,6 @@ config CHECK_CACHE_COHERENCY
  
  config PPC_DOORBELL
         bool
-       default n
  
  endmenu
  
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile

index e46bb7e..143d441 100644 (file)
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -1,7 +1,5 @@
  # SPDX-License-Identifier: GPL-2.0
  
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
  obj-$(CONFIG_FSL_ULI1575)      += fsl_uli1575.o
  
  obj-$(CONFIG_PPC_PMAC)         += powermac/
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig

index 9f5958f..4b2f114 100644 (file)
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -1,7 +1,6 @@
  # SPDX-License-Identifier: GPL-2.0
  config PPC_CELL
         bool
-       default n
  
  config PPC_CELL_COMMON
         bool
@@ -22,7 +21,6 @@ config PPC_CELL_NATIVE
         select IBM_EMAC_RGMII if IBM_EMAC
         select IBM_EMAC_ZMII if IBM_EMAC #test only
         select IBM_EMAC_TAH if IBM_EMAC  #test only
-       default n
  
  config PPC_IBM_CELL_BLADE
         bool "IBM Cell Blade"
@@ -54,7 +52,6 @@ config SPU_FS
  
  config SPU_BASE
         bool
-       default n
         select PPC_COPRO_BASE
  
  config CBE_RAS
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c

index 5c409c9..f7e3637 100644 (file)
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -180,35 +180,22 @@ out:
  
  static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
  {
-       struct of_phandle_args oirq;
-       int ret;
         int i;
  
         for (i=0; i < 3; i++) {
-               ret = of_irq_parse_one(np, i, &oirq);
-               if (ret) {
-                       pr_debug("spu_new: failed to get irq %d\n", i);
-                       goto err;
-               }
-               ret = -EINVAL;
-               pr_debug("  irq %d no 0x%x on %pOF\n", i, oirq.args[0],
-                        oirq.np);
-               spu->irqs[i] = irq_create_of_mapping(&oirq);
-               if (!spu->irqs[i]) {
-                       pr_debug("spu_new: failed to map it !\n");
+               spu->irqs[i] = irq_of_parse_and_map(np, i);
+               if (!spu->irqs[i])
                         goto err;
-               }
         }
         return 0;
  
  err:
-       pr_debug("failed to map irq %x for spu %s\n", *oirq.args,
-               spu->name);
+       pr_debug("failed to map irq %x for spu %s\n", i, spu->name);
         for (; i >= 0; i--) {
                 if (spu->irqs[i])
                         irq_dispose_mapping(spu->irqs[i]);
         }
-       return ret;
+       return -EINVAL;
  }
  
  static int spu_map_resource(struct spu *spu, int nr,
@@ -295,8 +282,8 @@ static int __init of_enumerate_spus(int (*fn)(void *data))
         for_each_node_by_type(node, "spe") {
                 ret = fn(node);
                 if (ret) {
-                       printk(KERN_WARNING "%s: Error initializing %s\n",
-                               __func__, node->name);
+                       printk(KERN_WARNING "%s: Error initializing %pOFn\n",
+                               __func__, node);
                         of_node_put(node);
                         break;
                 }
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c

index 403523c..ecf703e 100644 (file)
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -112,7 +112,7 @@ static void __iomem *wii_ioremap_hw_regs(char *name, char *compatible)
         }
         error = of_address_to_resource(np, 0, &res);
         if (error) {
-               pr_err("no valid reg found for %s\n", np->name);
+               pr_err("no valid reg found for %pOFn\n", np);
                 goto out_put;
         }
  
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig

index 376d0be..2601fac 100644 (file)
--- a/arch/powerpc/platforms/maple/Kconfig
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -13,7 +13,6 @@ config PPC_MAPLE
         select PPC_RTAS
         select MMIO_NVRAM
         select ATA_NONSTANDARD if ATA
-       default n
         help
            This option enables support for the Maple 970FX Evaluation Board.
           For more information, refer to <http://www.970eval.com>
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig

index d458a79..98e3bc2 100644 (file)
--- a/arch/powerpc/platforms/pasemi/Kconfig
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -2,7 +2,6 @@
  config PPC_PASEMI
         depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
         bool "PA Semi SoC-based platforms"
-       default n
         select MPIC
         select PCI
         select PPC_UDBG_16550
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c

index c80f72c..53384eb 100644 (file)
--- a/arch/powerpc/platforms/pasemi/dma_lib.c
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -576,7 +576,7 @@ int pasemi_dma_init(void)
                 res.start = 0xfd800000;
                 res.end = res.start + 0x1000;
         }
-       dma_status = __ioremap(res.start, resource_size(&res), 0);
+       dma_status = ioremap_cache(res.start, resource_size(&res));
         pci_dev_put(iob_pdev);
  
         for (i = 0; i < MAX_TXCH; i++)
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile

index f2839ee..923bfb3 100644 (file)
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -1,9 +1,10 @@
  # SPDX-License-Identifier: GPL-2.0
  CFLAGS_bootx_init.o            += -fPIC
+CFLAGS_bootx_init.o            += $(call cc-option, -fno-stack-protector)
  
  ifdef CONFIG_FUNCTION_TRACER
  # Do not trace early boot code
-CFLAGS_REMOVE_bootx_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE)
  endif
  
  obj-y                          += pic.o setup.o time.o feature.o pci.o \
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c

index 4eb8cb3..ed2f54b 100644 (file)
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -1049,7 +1049,6 @@ core99_reset_cpu(struct device_node *node, long param, long value)
         unsigned long flags;
         struct macio_chip *macio;
         struct device_node *np;
-       struct device_node *cpus;
         const int dflt_reset_lines[] = {        KL_GPIO_RESET_CPU0,
                                                 KL_GPIO_RESET_CPU1,
                                                 KL_GPIO_RESET_CPU2,
@@ -1059,10 +1058,7 @@ core99_reset_cpu(struct device_node *node, long param, long value)
         if (macio->type != macio_keylargo)
                 return -ENODEV;
  
-       cpus = of_find_node_by_path("/cpus");
-       if (cpus == NULL)
-               return -ENODEV;
-       for (np = cpus->child; np != NULL; np = np->sibling) {
+       for_each_of_cpu_node(np) {
                 const u32 *num = of_get_property(np, "reg", NULL);
                 const u32 *rst = of_get_property(np, "soft-reset", NULL);
                 if (num == NULL || rst == NULL)
@@ -1072,7 +1068,6 @@ core99_reset_cpu(struct device_node *node, long param, long value)
                         break;
                 }
         }
-       of_node_put(cpus);
         if (np == NULL || reset_io == 0)
                 reset_io = dflt_reset_lines[param];
  
@@ -1504,16 +1499,12 @@ static long g5_reset_cpu(struct device_node *node, long param, long value)
         unsigned long flags;
         struct macio_chip *macio;
         struct device_node *np;
-       struct device_node *cpus;
  
         macio = &macio_chips[0];
         if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
                 return -ENODEV;
  
-       cpus = of_find_node_by_path("/cpus");
-       if (cpus == NULL)
-               return -ENODEV;
-       for (np = cpus->child; np != NULL; np = np->sibling) {
+       for_each_of_cpu_node(np) {
                 const u32 *num = of_get_property(np, "reg", NULL);
                 const u32 *rst = of_get_property(np, "soft-reset", NULL);
                 if (num == NULL || rst == NULL)
@@ -1523,7 +1514,6 @@ static long g5_reset_cpu(struct device_node *node, long param, long value)
                         break;
                 }
         }
-       of_node_put(cpus);
         if (np == NULL || reset_io == 0)
                 return -ENODEV;
  
@@ -2515,31 +2505,26 @@ found:
          * supposed to be set when not supported, but I'm not very confident
          * that all Apple OF revs did it properly, I do it the paranoid way.
          */
-       while (uninorth_base && uninorth_rev > 3) {
-               struct device_node *cpus = of_find_node_by_path("/cpus");
+       if (uninorth_base && uninorth_rev > 3) {
                 struct device_node *np;
  
-               if (!cpus || !cpus->child) {
-                       printk(KERN_WARNING "Can't find CPU(s) in device tree !\n");
-                       of_node_put(cpus);
-                       break;
-               }
-               np = cpus->child;
-               /* Nap mode not supported on SMP */
-               if (np->sibling) {
-                       of_node_put(cpus);
-                       break;
-               }
-               /* Nap mode not supported if flush-on-lock property is present */
-               if (of_get_property(np, "flush-on-lock", NULL)) {
-                       of_node_put(cpus);
-                       break;
+               for_each_of_cpu_node(np) {
+                       int cpu_count = 1;
+
+                       /* Nap mode not supported on SMP */
+                       if (of_get_property(np, "flush-on-lock", NULL) ||
+                           (cpu_count > 1)) {
+                               powersave_nap = 0;
+                               of_node_put(np);
+                               break;
+                       }
+
+                       cpu_count++;
+                       powersave_nap = 1;
                 }
-               of_node_put(cpus);
-               powersave_nap = 1;
-               printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
-               break;
         }
+       if (powersave_nap)
+               printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
  
         /* On CPUs that support it (750FX), lowspeed by default during
          * NAP mode
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c

index 3a529fc..2f00e3d 100644 (file)
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -243,10 +243,9 @@ static void __init l2cr_init(void)
  {
         /* Checks "l2cr-value" property in the registry */
         if (cpu_has_feature(CPU_FTR_L2CR)) {
-               struct device_node *np = of_find_node_by_name(NULL, "cpus");
-               if (!np)
-                       np = of_find_node_by_type(NULL, "cpu");
-               if (np) {
+               struct device_node *np;
+
+               for_each_of_cpu_node(np) {
                         const unsigned int *l2cr =
                                 of_get_property(np, "l2cr-value", NULL);
                         if (l2cr) {
@@ -256,6 +255,7 @@ static void __init l2cr_init(void)
                                 _set_L2CR(ppc_override_l2cr_value);
                         }
                         of_node_put(np);
+                       break;
                 }
         }
  
@@ -279,8 +279,8 @@ static void __init pmac_setup_arch(void)
         /* Set loops_per_jiffy to a half-way reasonable value,
            for use until calibrate_delay gets called. */
         loops_per_jiffy = 50000000 / HZ;
-       cpu = of_find_node_by_type(NULL, "cpu");
-       if (cpu != NULL) {
+
+       for_each_of_cpu_node(cpu) {
                 fp = of_get_property(cpu, "clock-frequency", NULL);
                 if (fp != NULL) {
                         if (pvr >= 0x30 && pvr < 0x80)
@@ -292,8 +292,9 @@ static void __init pmac_setup_arch(void)
                         else
                                 /* 601, 603, etc. */
                                 loops_per_jiffy = *fp / (2 * HZ);
+                       of_node_put(cpu);
+                       break;
                 }
-               of_node_put(cpu);
         }
  
         /* See if newworld or oldworld */
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c

index f92c191..f157e3d 100644 (file)
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -44,13 +44,6 @@
  #define DBG(x...)
  #endif
  
-/*
- * Offset between Unix time (1970-based) and Mac time (1904-based). Cuda and PMU
- * times wrap in 2040. If we need to handle later times, the read_time functions
- * need to be changed to interpret wrapped times as post-2040.
- */
-#define RTC_OFFSET     2082844800
-
  /*
   * Calibrate the decrementer frequency with the VIA timer 1.
   */
@@ -90,98 +83,6 @@ long __init pmac_time_init(void)
         return delta;
  }
  
-#ifdef CONFIG_ADB_CUDA
-static time64_t cuda_get_time(void)
-{
-       struct adb_request req;
-       time64_t now;
-
-       if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
-               return 0;
-       while (!req.complete)
-               cuda_poll();
-       if (req.reply_len != 7)
-               printk(KERN_ERR "cuda_get_time: got %d byte reply\n",
-                      req.reply_len);
-       now = (u32)((req.reply[3] << 24) + (req.reply[4] << 16) +
-                   (req.reply[5] << 8) + req.reply[6]);
-       /* it's either after year 2040, or the RTC has gone backwards */
-       WARN_ON(now < RTC_OFFSET);
-
-       return now - RTC_OFFSET;
-}
-
-#define cuda_get_rtc_time(tm)  rtc_time64_to_tm(cuda_get_time(), (tm))
-
-static int cuda_set_rtc_time(struct rtc_time *tm)
-{
-       u32 nowtime;
-       struct adb_request req;
-
-       nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
-       if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
-                        nowtime >> 24, nowtime >> 16, nowtime >> 8,
-                        nowtime) < 0)
-               return -ENXIO;
-       while (!req.complete)
-               cuda_poll();
-       if ((req.reply_len != 3) && (req.reply_len != 7))
-               printk(KERN_ERR "cuda_set_rtc_time: got %d byte reply\n",
-                      req.reply_len);
-       return 0;
-}
-
-#else
-#define cuda_get_time()                0
-#define cuda_get_rtc_time(tm)
-#define cuda_set_rtc_time(tm)  0
-#endif
-
-#ifdef CONFIG_ADB_PMU
-static time64_t pmu_get_time(void)
-{
-       struct adb_request req;
-       time64_t now;
-
-       if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
-               return 0;
-       pmu_wait_complete(&req);
-       if (req.reply_len != 4)
-               printk(KERN_ERR "pmu_get_time: got %d byte reply from PMU\n",
-                      req.reply_len);
-       now = (u32)((req.reply[0] << 24) + (req.reply[1] << 16) +
-                   (req.reply[2] << 8) + req.reply[3]);
-
-       /* it's either after year 2040, or the RTC has gone backwards */
-       WARN_ON(now < RTC_OFFSET);
-
-       return now - RTC_OFFSET;
-}
-
-#define pmu_get_rtc_time(tm)   rtc_time64_to_tm(pmu_get_time(), (tm))
-
-static int pmu_set_rtc_time(struct rtc_time *tm)
-{
-       u32 nowtime;
-       struct adb_request req;
-
-       nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
-       if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24,
-                       nowtime >> 16, nowtime >> 8, nowtime) < 0)
-               return -ENXIO;
-       pmu_wait_complete(&req);
-       if (req.reply_len != 0)
-               printk(KERN_ERR "pmu_set_rtc_time: %d byte reply from PMU\n",
-                      req.reply_len);
-       return 0;
-}
-
-#else
-#define pmu_get_time()         0
-#define pmu_get_rtc_time(tm)
-#define pmu_set_rtc_time(tm)   0
-#endif
-
  #ifdef CONFIG_PMAC_SMU
  static time64_t smu_get_time(void)
  {
@@ -191,11 +92,6 @@ static time64_t smu_get_time(void)
                 return 0;
         return rtc_tm_to_time64(&tm);
  }
-
-#else
-#define smu_get_time()                 0
-#define smu_get_rtc_time(tm, spin)
-#define smu_set_rtc_time(tm, spin)     0
  #endif
  
  /* Can't be __init, it's called when suspending and resuming */
@@ -203,12 +99,18 @@ time64_t pmac_get_boot_time(void)
  {
         /* Get the time from the RTC, used only at boot time */
         switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
         case SYS_CTRLER_CUDA:
                 return cuda_get_time();
+#endif
+#ifdef CONFIG_ADB_PMU
         case SYS_CTRLER_PMU:
                 return pmu_get_time();
+#endif
+#ifdef CONFIG_PMAC_SMU
         case SYS_CTRLER_SMU:
                 return smu_get_time();
+#endif
         default:
                 return 0;
         }
@@ -218,15 +120,21 @@ void pmac_get_rtc_time(struct rtc_time *tm)
  {
         /* Get the time from the RTC, used only at boot time */
         switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
         case SYS_CTRLER_CUDA:
-               cuda_get_rtc_time(tm);
+               rtc_time64_to_tm(cuda_get_time(), tm);
                 break;
+#endif
+#ifdef CONFIG_ADB_PMU
         case SYS_CTRLER_PMU:
-               pmu_get_rtc_time(tm);
+               rtc_time64_to_tm(pmu_get_time(), tm);
                 break;
+#endif
+#ifdef CONFIG_PMAC_SMU
         case SYS_CTRLER_SMU:
                 smu_get_rtc_time(tm, 1);
                 break;
+#endif
         default:
                 ;
         }
@@ -235,12 +143,18 @@ void pmac_get_rtc_time(struct rtc_time *tm)
  int pmac_set_rtc_time(struct rtc_time *tm)
  {
         switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
         case SYS_CTRLER_CUDA:
                 return cuda_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_ADB_PMU
         case SYS_CTRLER_PMU:
                 return pmu_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_PMAC_SMU
         case SYS_CTRLER_SMU:
                 return smu_set_rtc_time(tm, 1);
+#endif
         default:
                 return -ENODEV;
         }
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig

index f8dc98d..99083fe 100644 (file)
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -15,11 +15,6 @@ config PPC_POWERNV
         select PPC_SCOM
         select ARCH_RANDOM
         select CPU_FREQ
-       select CPU_FREQ_GOV_PERFORMANCE
-       select CPU_FREQ_GOV_POWERSAVE
-       select CPU_FREQ_GOV_USERSPACE
-       select CPU_FREQ_GOV_ONDEMAND
-       select CPU_FREQ_GOV_CONSERVATIVE
         select PPC_DOORBELL
         select MMU_NOTIFIER
         select FORCE_SMP
@@ -35,7 +30,6 @@ config OPAL_PRD
  config PPC_MEMTRACE
         bool "Enable removal of RAM from kernel mappings for tracing"
         depends on PPC_POWERNV && MEMORY_HOTREMOVE
-       default n
         help
           Enabling this option allows for the removal of memory (RAM)
           from the kernel mappings to be used for hardware tracing.
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c

index 3c1beae..abc0be7 100644 (file)
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -223,14 +223,6 @@ int pnv_eeh_post_init(void)
         eeh_probe_devices();
         eeh_addr_cache_build();
  
-       if (eeh_has_flag(EEH_POSTPONED_PROBE)) {
-               eeh_clear_flag(EEH_POSTPONED_PROBE);
-               if (eeh_enabled())
-                       pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
-               else
-                       pr_info("EEH: No capable adapters found\n");
-       }
-
         /* Register OPAL event notifier */
         eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
         if (eeh_event_irq < 0) {
@@ -391,12 +383,6 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
         if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
                 return NULL;
  
-       /* Skip if we haven't probed yet */
-       if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) {
-               eeh_add_flag(EEH_POSTPONED_PROBE);
-               return NULL;
-       }
-
         /* Initialize eeh device */
         edev->class_code = pdn->class_code;
         edev->mode      &= 0xFFFFFF00;
@@ -604,7 +590,7 @@ static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
                           EEH_STATE_MMIO_ENABLED |
                           EEH_STATE_DMA_ENABLED);
         } else if (!(pe->state & EEH_PE_ISOLATED)) {
-               eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+               eeh_pe_mark_isolated(pe);
                 pnv_eeh_get_phb_diag(pe);
  
                 if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
@@ -706,7 +692,7 @@ static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
                 if (phb->freeze_pe)
                         phb->freeze_pe(phb, pe->addr);
  
-               eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+               eeh_pe_mark_isolated(pe);
                 pnv_eeh_get_phb_diag(pe);
  
                 if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
@@ -1054,7 +1040,7 @@ static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
         int ret;
  
         /* The VF PE should have only one child device */
-       edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+       edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
         pdn = eeh_dev_to_pdn(edev);
         if (!pdn)
                 return -ENXIO;
@@ -1147,43 +1133,6 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option)
         return pnv_eeh_bridge_reset(bus->self, option);
  }
  
-/**
- * pnv_eeh_wait_state - Wait for PE state
- * @pe: EEH PE
- * @max_wait: maximal period in millisecond
- *
- * Wait for the state of associated PE. It might take some time
- * to retrieve the PE's state.
- */
-static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
-{
-       int ret;
-       int mwait;
-
-       while (1) {
-               ret = pnv_eeh_get_state(pe, &mwait);
-
-               /*
-                * If the PE's state is temporarily unavailable,
-                * we have to wait for the specified time. Otherwise,
-                * the PE's state will be returned immediately.
-                */
-               if (ret != EEH_STATE_UNAVAILABLE)
-                       return ret;
-
-               if (max_wait <= 0) {
-                       pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
-                               __func__, pe->addr, max_wait);
-                       return EEH_STATE_NOT_SUPPORT;
-               }
-
-               max_wait -= mwait;
-               msleep(mwait);
-       }
-
-       return EEH_STATE_NOT_SUPPORT;
-}
-
  /**
   * pnv_eeh_get_log - Retrieve error log
   * @pe: EEH PE
@@ -1611,7 +1560,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
                 if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
                     ret == EEH_NEXT_ERR_FENCED_PHB) &&
                     !((*pe)->state & EEH_PE_ISOLATED)) {
-                       eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+                       eeh_pe_mark_isolated(*pe);
                         pnv_eeh_get_phb_diag(*pe);
  
                         if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
@@ -1640,7 +1589,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
                         }
  
                         /* We possibly migrate to another PE */
-                       eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+                       eeh_pe_mark_isolated(*pe);
                 }
  
                 /*
@@ -1702,7 +1651,6 @@ static struct eeh_ops pnv_eeh_ops = {
         .get_pe_addr            = pnv_eeh_get_pe_addr,
         .get_state              = pnv_eeh_get_state,
         .reset                  = pnv_eeh_reset,
-       .wait_state             = pnv_eeh_wait_state,
         .get_log                = pnv_eeh_get_log,
         .configure_bridge       = pnv_eeh_configure_bridge,
         .err_inject             = pnv_eeh_err_inject,
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c

index 51dc398..a29fdf8 100644 (file)
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -90,17 +90,15 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
         walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
                           change_memblock_state);
  
-       lock_device_hotplug();
-       remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
-       unlock_device_hotplug();
  
         return true;
  }
  
  static u64 memtrace_alloc_node(u32 nid, u64 size)
  {
-       u64 start_pfn, end_pfn, nr_pages;
+       u64 start_pfn, end_pfn, nr_pages, pfn;
         u64 base_pfn;
+       u64 bytes = memory_block_size_bytes();
  
         if (!node_spanned_pages(nid))
                 return 0;
@@ -113,8 +111,21 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
         end_pfn = round_down(end_pfn - nr_pages, nr_pages);
  
         for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
-               if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true)
+               if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
+                       /*
+                        * Remove memory in memory block size chunks so that
+                        * iomem resources are always split to the same size and
+                        * we never try to remove memory that spans two iomem
+                        * resources.
+                        */
+                       lock_device_hotplug();
+                       end_pfn = base_pfn + nr_pages;
+                       for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
+                               remove_memory(nid, pfn << PAGE_SHIFT, bytes);
+                       }
+                       unlock_device_hotplug();
                         return base_pfn << PAGE_SHIFT;
+               }
         }
  
         return 0;
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c

index 8006c54..6f60e09 100644 (file)
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -17,7 +17,7 @@
  #include <linux/pci.h>
  #include <linux/memblock.h>
  #include <linux/iommu.h>
-#include <linux/debugfs.h>
+#include <linux/sizes.h>
  
  #include <asm/debugfs.h>
  #include <asm/tlb.h>
@@ -41,14 +41,6 @@
   */
  static DEFINE_SPINLOCK(npu_context_lock);
  
-/*
- * When an address shootdown range exceeds this threshold we invalidate the
- * entire TLB on the GPU for the given PID rather than each specific address in
- * the range.
- */
-static uint64_t atsd_threshold = 2 * 1024 * 1024;
-static struct dentry *atsd_threshold_dentry;
-
  /*
   * Other types of TCE cache invalidation are not functional in the
   * hardware.
@@ -454,79 +446,73 @@ static void put_mmio_atsd_reg(struct npu *npu, int reg)
  }
  
  /* MMIO ATSD register offsets */
-#define XTS_ATSD_AVA  1
-#define XTS_ATSD_STAT 2
-
-static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
-                               unsigned long launch, unsigned long va)
-{
-       struct npu *npu = mmio_atsd_reg->npu;
-       int reg = mmio_atsd_reg->reg;
-
-       __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
-       eieio();
-       __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]);
-}
+#define XTS_ATSD_LAUNCH 0
+#define XTS_ATSD_AVA    1
+#define XTS_ATSD_STAT   2
  
-static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
-                               unsigned long pid, bool flush)
+static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize)
  {
-       int i;
-       unsigned long launch;
-
-       for (i = 0; i <= max_npu2_index; i++) {
-               if (mmio_atsd_reg[i].reg < 0)
-                       continue;
+       unsigned long launch = 0;
  
-               /* IS set to invalidate matching PID */
-               launch = PPC_BIT(12);
-
-               /* PRS set to process-scoped */
-               launch |= PPC_BIT(13);
+       if (psize == MMU_PAGE_COUNT) {
+               /* IS set to invalidate entire matching PID */
+               launch |= PPC_BIT(12);
+       } else {
+               /* AP set to invalidate region of psize */
+               launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17);
+       }
  
-               /* AP */
-               launch |= (u64)
-                       mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+       /* PRS set to process-scoped */
+       launch |= PPC_BIT(13);
  
-               /* PID */
-               launch |= pid << PPC_BITLSHIFT(38);
+       /* PID */
+       launch |= pid << PPC_BITLSHIFT(38);
  
-               /* No flush */
-               launch |= !flush << PPC_BITLSHIFT(39);
+       /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */
  
-               /* Invalidating the entire process doesn't use a va */
-               mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
-       }
+       return launch;
  }
  
-static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
-                       unsigned long va, unsigned long pid, bool flush)
+static void mmio_atsd_regs_write(struct mmio_atsd_reg
+                       mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset,
+                       unsigned long val)
  {
-       int i;
-       unsigned long launch;
+       struct npu *npu;
+       int i, reg;
  
         for (i = 0; i <= max_npu2_index; i++) {
-               if (mmio_atsd_reg[i].reg < 0)
+               reg = mmio_atsd_reg[i].reg;
+               if (reg < 0)
                         continue;
  
-               /* IS set to invalidate target VA */
-               launch = 0;
+               npu = mmio_atsd_reg[i].npu;
+               __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset);
+       }
+}
+
+static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+                               unsigned long pid)
+{
+       unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT);
  
-               /* PRS set to process scoped */
-               launch |= PPC_BIT(13);
+       /* Invalidating the entire process doesn't use a va */
+       mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
+}
  
-               /* AP */
-               launch |= (u64)
-                       mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+static void mmio_invalidate_range(struct mmio_atsd_reg
+                       mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid,
+                       unsigned long start, unsigned long psize)
+{
+       unsigned long launch = get_atsd_launch_val(pid, psize);
  
-               /* PID */
-               launch |= pid << PPC_BITLSHIFT(38);
+       /* Write all VAs first */
+       mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start);
  
-               /* No flush */
-               launch |= !flush << PPC_BITLSHIFT(39);
+       /* Issue one barrier for all address writes */
+       eieio();
  
-               mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
-       }
+       /* Launch */
+       mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
  }
  
  #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
@@ -612,14 +598,36 @@ static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
  }
  
  /*
- * Invalidate either a single address or an entire PID depending on
- * the value of va.
+ * Invalidate a virtual address range
   */
-static void mmio_invalidate(struct npu_context *npu_context, int va,
-                       unsigned long address, bool flush)
+static void mmio_invalidate(struct npu_context *npu_context,
+                       unsigned long start, unsigned long size)
  {
         struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
         unsigned long pid = npu_context->mm->context.id;
+       unsigned long atsd_start = 0;
+       unsigned long end = start + size - 1;
+       int atsd_psize = MMU_PAGE_COUNT;
+
+       /*
+        * Convert the input range into one of the supported sizes. If the range
+        * doesn't fit, use the next larger supported size. Invalidation latency
+        * is high, so over-invalidation is preferred to issuing multiple
+        * invalidates.
+        *
+        * A 4K page size isn't supported by NPU/GPU ATS, so that case is
+        * ignored.
+        */
+       if (size == SZ_64K) {
+               atsd_start = start;
+               atsd_psize = MMU_PAGE_64K;
+       } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) {
+               atsd_start = ALIGN_DOWN(start, SZ_2M);
+               atsd_psize = MMU_PAGE_2M;
+       } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) {
+               atsd_start = ALIGN_DOWN(start, SZ_1G);
+               atsd_psize = MMU_PAGE_1G;
+       }
  
         if (npu_context->nmmu_flush)
                 /*
@@ -634,23 +642,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
          * an invalidate.
          */
         acquire_atsd_reg(npu_context, mmio_atsd_reg);
-       if (va)
-               mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
+
+       if (atsd_psize == MMU_PAGE_COUNT)
+               mmio_invalidate_pid(mmio_atsd_reg, pid);
         else
-               mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
+               mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start,
+                                       atsd_psize);
  
         mmio_invalidate_wait(mmio_atsd_reg);
-       if (flush) {
-               /*
-                * The GPU requires two flush ATSDs to ensure all entries have
-                * been flushed. We use PID 0 as it will never be used for a
-                * process on the GPU.
-                */
-               mmio_invalidate_pid(mmio_atsd_reg, 0, true);
-               mmio_invalidate_wait(mmio_atsd_reg);
-               mmio_invalidate_pid(mmio_atsd_reg, 0, true);
-               mmio_invalidate_wait(mmio_atsd_reg);
-       }
+
+       /*
+        * The GPU requires two flush ATSDs to ensure all entries have been
+        * flushed. We use PID 0 as it will never be used for a process on the
+        * GPU.
+        */
+       mmio_invalidate_pid(mmio_atsd_reg, 0);
+       mmio_invalidate_wait(mmio_atsd_reg);
+       mmio_invalidate_pid(mmio_atsd_reg, 0);
+       mmio_invalidate_wait(mmio_atsd_reg);
+
         release_atsd_reg(mmio_atsd_reg);
  }
  
@@ -667,7 +677,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
          * There should be no more translation requests for this PID, but we
          * need to ensure any entries for it are removed from the TLB.
          */
-       mmio_invalidate(npu_context, 0, 0, true);
+       mmio_invalidate(npu_context, 0, ~0UL);
  }
  
  static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -676,8 +686,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
                                 pte_t pte)
  {
         struct npu_context *npu_context = mn_to_npu_context(mn);
-
-       mmio_invalidate(npu_context, 1, address, true);
+       mmio_invalidate(npu_context, address, PAGE_SIZE);
  }
  
  static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -685,21 +694,7 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
                                         unsigned long start, unsigned long end)
  {
         struct npu_context *npu_context = mn_to_npu_context(mn);
-       unsigned long address;
-
-       if (end - start > atsd_threshold) {
-               /*
-                * Just invalidate the entire PID if the address range is too
-                * large.
-                */
-               mmio_invalidate(npu_context, 0, 0, true);
-       } else {
-               for (address = start; address < end; address += PAGE_SIZE)
-                       mmio_invalidate(npu_context, 1, address, false);
-
-               /* Do the flush only on the final addess == end */
-               mmio_invalidate(npu_context, 1, address, true);
-       }
+       mmio_invalidate(npu_context, start, end - start);
  }
  
  static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -962,11 +957,6 @@ int pnv_npu2_init(struct pnv_phb *phb)
         static int npu_index;
         uint64_t rc = 0;
  
-       if (!atsd_threshold_dentry) {
-               atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
-                                  0600, powerpc_debugfs_root, &atsd_threshold);
-       }
-
         phb->npu.nmmu_flush =
                 of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
         for_each_child_of_node(phb->hose->dn, dn) {
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c

index badb29b..d90ee4f 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal-powercap.c
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -199,7 +199,7 @@ void __init opal_powercap_init(void)
                 }
  
                 j = 0;
-               pcaps[i].pg.name = node->name;
+               pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node);
                 if (has_min) {
                         powercap_add_attr(min, "powercap-min",
                                           &pcaps[i].pattrs[j]);
@@ -237,6 +237,7 @@ out_pcaps_pattrs:
         while (--i >= 0) {
                 kfree(pcaps[i].pattrs);
                 kfree(pcaps[i].pg.attrs);
+               kfree(pcaps[i].pg.name);
         }
         kobject_put(powercap_kobj);
  out_pcaps:
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c

index f7d04b6..1796092 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -214,9 +214,9 @@ void __init opal_sensor_groups_init(void)
                 }
  
                 if (!of_property_read_u32(node, "ibm,chip-id", &chipid))
-                       sprintf(sgs[i].name, "%s%d", node->name, chipid);
+                       sprintf(sgs[i].name, "%pOFn%d", node, chipid);
                 else
-                       sprintf(sgs[i].name, "%s", node->name);
+                       sprintf(sgs[i].name, "%pOFn", node);
  
                 sgs[i].sg.name = sgs[i].name;
                 if (add_attr_group(ops, len, &sgs[i], sgid)) {
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c

index 9aa87df..916a4b7 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal-sysparam.c
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -194,7 +194,7 @@ void __init opal_sys_param_init(void)
         count = of_property_count_strings(sysparam, "param-name");
         if (count < 0) {
                 pr_err("SYSPARAM: No string found of property param-name in "
-                               "the node %s\n", sysparam->name);
+                               "the node %pOFn\n", sysparam);
                 goto out_param_buf;
         }
  
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c

index 38fe408..a464151 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -535,7 +535,7 @@ static int opal_recover_mce(struct pt_regs *regs,
         return recovered;
  }
  
-void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
+void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
  {
         panic_flush_kmsg_start();
  
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c

index adddde0..14befee 100644 (file)
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -219,17 +219,41 @@ static void pnv_prepare_going_down(void)
  
  static void  __noreturn pnv_restart(char *cmd)
  {
-       long rc = OPAL_BUSY;
+       long rc;
  
         pnv_prepare_going_down();
  
-       while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
-               rc = opal_cec_reboot();
-               if (rc == OPAL_BUSY_EVENT)
-                       opal_poll_events(NULL);
+       do {
+               if (!cmd)
+                       rc = opal_cec_reboot();
+               else if (strcmp(cmd, "full") == 0)
+                       rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
                 else
+                       rc = OPAL_UNSUPPORTED;
+
+               if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+                       /* Opal is busy wait for some time and retry */
+                       opal_poll_events(NULL);
                         mdelay(10);
-       }
+
+               } else  if (cmd && rc) {
+                       /* Unknown error while issuing reboot */
+                       if (rc == OPAL_UNSUPPORTED)
+                               pr_err("Unsupported '%s' reboot.\n", cmd);
+                       else
+                               pr_err("Unable to issue '%s' reboot. Err=%ld\n",
+                                      cmd, rc);
+                       pr_info("Forcing a cec-reboot\n");
+                       cmd = NULL;
+                       rc = OPAL_BUSY;
+
+               } else if (rc != OPAL_SUCCESS) {
+                       /* Unknown error while issuing cec-reboot */
+                       pr_err("Unable to reboot. Err=%ld\n", rc);
+               }
+
+       } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
+
         for (;;)
                 opal_poll_events(NULL);
  }
@@ -437,6 +461,16 @@ static unsigned long pnv_get_proc_freq(unsigned int cpu)
         return ret_freq;
  }
  
+static long pnv_machine_check_early(struct pt_regs *regs)
+{
+       long handled = 0;
+
+       if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+               handled = cur_cpu_spec->machine_check_early(regs);
+
+       return handled;
+}
+
  define_machine(powernv) {
         .name                   = "PowerNV",
         .probe                  = pnv_probe,
@@ -448,6 +482,7 @@ define_machine(powernv) {
         .machine_shutdown       = pnv_shutdown,
         .power_save             = NULL,
         .calibrate_decr         = generic_calibrate_decr,
+       .machine_check_early    = pnv_machine_check_early,
  #ifdef CONFIG_KEXEC_CORE
         .kexec_cpu_down         = pnv_kexec_cpu_down,
  #endif
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig

index 6f75255..24864b8 100644 (file)
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -49,7 +49,6 @@ config PS3_HTAB_SIZE
  config PS3_DYNAMIC_DMA
         depends on PPC_PS3
         bool "PS3 Platform dynamic DMA page table management"
-       default n
         help
           This option will enable kernel support to take advantage of the
           per device dynamic DMA page table management provided by the Cell
@@ -89,7 +88,6 @@ config PS3_SYS_MANAGER
  config PS3_REPOSITORY_WRITE
         bool "PS3 Repository write support" if PS3_ADVANCED
         depends on PPC_PS3
-       default n
         help
           Enables support for writing to the PS3 System Repository.
  
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c

index cdbfc5c..f5387ad 100644 (file)
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -664,7 +664,7 @@ static int update_flash_db(void)
         db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff);
  
         count = os_area_flash_write(db, sizeof(struct os_area_db), pos);
-       if (count < sizeof(struct os_area_db)) {
+       if (count < 0 || count < sizeof(struct os_area_db)) {
                 pr_debug("%s: os_area_flash_write failed %zd\n", __func__,
                          count);
                 error = count < 0 ? count : -EIO;
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c

index b548508..7746c2a 100644 (file)
--- a/arch/powerpc/platforms/ps3/spu.c
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -215,8 +215,7 @@ static int __init setup_areas(struct spu *spu)
                 goto fail_ioremap;
         }
  
-       spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
-               LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
+       spu->local_store = (__force void *)ioremap_wc(spu->local_store_phys, LS_SIZE);
  
         if (!spu->local_store) {
                 pr_debug("%s:%d: ioremap local_store failed\n",
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig

index 0c698fd..2e4bd32 100644 (file)
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -28,7 +28,6 @@ config PPC_PSERIES
  config PPC_SPLPAR
         depends on PPC_PSERIES
         bool "Support for shared-processor logical partitions"
-       default n
         help
           Enabling this option will make the kernel run more efficiently
           on logically-partitioned pSeries systems which use shared
@@ -99,7 +98,6 @@ config PPC_SMLPAR
         bool "Support for shared-memory logical partitions"
         depends on PPC_PSERIES
         select LPARCFG
-       default n
         help
           Select this option to enable shared memory partition support.
           With this option a system running in an LPAR can be given more
@@ -140,3 +138,10 @@ config IBMEBUS
         bool "Support for GX bus based adapters"
         help
           Bus device driver for GX bus based adapters.
+
+config PAPR_SCM
+       depends on PPC_PSERIES && MEMORY_HOTPLUG
+       select LIBNVDIMM
+       tristate "Support for the PAPR Storage Class Memory interface"
+       help
+         Enable access to hypervisor provided storage class memory.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile

index 7e89d5c..a43ec84 100644 (file)
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_KEXEC_CORE)      += kexec.o
  obj-$(CONFIG_PSERIES_ENERGY)   += pseries_energy.o
  
  obj-$(CONFIG_HOTPLUG_CPU)      += hotplug-cpu.o
-obj-$(CONFIG_MEMORY_HOTPLUG)   += hotplug-memory.o
+obj-$(CONFIG_MEMORY_HOTPLUG)   += hotplug-memory.o pmem.o
  
  obj-$(CONFIG_HVC_CONSOLE)      += hvconsole.o
  obj-$(CONFIG_HVCS)             += hvcserver.o
@@ -24,6 +24,7 @@ obj-$(CONFIG_IO_EVENT_IRQ)    += io_event_irq.o
  obj-$(CONFIG_LPARCFG)          += lparcfg.o
  obj-$(CONFIG_IBMVIO)           += vio.o
  obj-$(CONFIG_IBMEBUS)          += ibmebus.o
+obj-$(CONFIG_PAPR_SCM)         += papr_scm.o
  
  ifdef CONFIG_PPC_PSERIES
  obj-$(CONFIG_SUSPEND)          += suspend.o
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c

index a0b20c0..7625546 100644 (file)
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -32,8 +32,6 @@ static struct workqueue_struct *pseries_hp_wq;
  struct pseries_hp_work {
         struct work_struct work;
         struct pseries_hp_errorlog *errlog;
-       struct completion *hp_completion;
-       int *rc;
  };
  
  struct cc_workarea {
@@ -329,7 +327,7 @@ int dlpar_release_drc(u32 drc_index)
         return 0;
  }
  
-static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
  {
         int rc;
  
@@ -357,6 +355,10 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
         case PSERIES_HP_ELOG_RESOURCE_CPU:
                 rc = dlpar_cpu(hp_elog);
                 break;
+       case PSERIES_HP_ELOG_RESOURCE_PMEM:
+               rc = dlpar_hp_pmem(hp_elog);
+               break;
+
         default:
                 pr_warn_ratelimited("Invalid resource (%d) specified\n",
                                     hp_elog->resource);
@@ -371,20 +373,13 @@ static void pseries_hp_work_fn(struct work_struct *work)
         struct pseries_hp_work *hp_work =
                         container_of(work, struct pseries_hp_work, work);
  
-       if (hp_work->rc)
-               *(hp_work->rc) = handle_dlpar_errorlog(hp_work->errlog);
-       else
-               handle_dlpar_errorlog(hp_work->errlog);
-
-       if (hp_work->hp_completion)
-               complete(hp_work->hp_completion);
+       handle_dlpar_errorlog(hp_work->errlog);
  
         kfree(hp_work->errlog);
         kfree((void *)work);
  }
  
-void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
-                        struct completion *hotplug_done, int *rc)
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
  {
         struct pseries_hp_work *work;
         struct pseries_hp_errorlog *hp_errlog_copy;
@@ -397,13 +392,9 @@ void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
         if (work) {
                 INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
                 work->errlog = hp_errlog_copy;
-               work->hp_completion = hotplug_done;
-               work->rc = rc;
                 queue_work(pseries_hp_wq, (struct work_struct *)work);
         } else {
-               *rc = -ENOMEM;
                 kfree(hp_errlog_copy);
-               complete(hotplug_done);
         }
  }
  
@@ -521,18 +512,15 @@ static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog)
  static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
                            const char *buf, size_t count)
  {
-       struct pseries_hp_errorlog *hp_elog;
-       struct completion hotplug_done;
+       struct pseries_hp_errorlog hp_elog;
         char *argbuf;
         char *args;
         int rc;
  
         args = argbuf = kstrdup(buf, GFP_KERNEL);
-       hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
-       if (!hp_elog || !argbuf) {
+       if (!argbuf) {
                 pr_info("Could not allocate resources for DLPAR operation\n");
                 kfree(argbuf);
-               kfree(hp_elog);
                 return -ENOMEM;
         }
  
@@ -540,25 +528,22 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
          * Parse out the request from the user, this will be in the form:
          * <resource> <action> <id_type> <id>
          */
-       rc = dlpar_parse_resource(&args, hp_elog);
+       rc = dlpar_parse_resource(&args, &hp_elog);
         if (rc)
                 goto dlpar_store_out;
  
-       rc = dlpar_parse_action(&args, hp_elog);
+       rc = dlpar_parse_action(&args, &hp_elog);
         if (rc)
                 goto dlpar_store_out;
  
-       rc = dlpar_parse_id_type(&args, hp_elog);
+       rc = dlpar_parse_id_type(&args, &hp_elog);
         if (rc)
                 goto dlpar_store_out;
  
-       init_completion(&hotplug_done);
-       queue_hotplug_event(hp_elog, &hotplug_done, &rc);
-       wait_for_completion(&hotplug_done);
+       rc = handle_dlpar_errorlog(&hp_elog);
  
  dlpar_store_out:
         kfree(argbuf);
-       kfree(hp_elog);
  
         if (rc)
                 pr_err("Could not handle DLPAR request \"%s\"\n", buf);
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c

index 18014cd..ef65951 100644 (file)
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -149,7 +149,7 @@ static int dtl_start(struct dtl *dtl)
  
         /* Register our dtl buffer with the hypervisor. The HV expects the
          * buffer size to be passed in the second word of the buffer */
-       ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES;
+       ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
  
         hwcpu = get_hard_smp_processor_id(dtl->cpu);
         addr = __pa(dtl->buf);
@@ -184,7 +184,7 @@ static void dtl_stop(struct dtl *dtl)
  
  static u64 dtl_current_index(struct dtl *dtl)
  {
-       return lppaca_of(dtl->cpu).dtl_idx;
+       return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx);
  }
  #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
  
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c

index 823cb27..c9e5ca4 100644 (file)
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -438,7 +438,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
  /**
   * pseries_eeh_get_state - Retrieve PE state
   * @pe: EEH PE
- * @state: return value
+ * @delay: suggested time to wait if state is unavailable
   *
   * Retrieve the state of the specified PE. On RTAS compliant
   * pseries platform, there already has one dedicated RTAS function
@@ -448,7 +448,7 @@ static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
   * RTAS calls for the purpose, we need to try the new one and back
   * to the old one if the new one couldn't work properly.
   */
-static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
+static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
  {
         int config_addr;
         int ret;
@@ -499,7 +499,8 @@ static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
                 break;
         case 5:
                 if (rets[2]) {
-                       if (state) *state = rets[2];
+                       if (delay)
+                               *delay = rets[2];
                         result = EEH_STATE_UNAVAILABLE;
                 } else {
                         result = EEH_STATE_NOT_SUPPORT;
@@ -553,64 +554,6 @@ static int pseries_eeh_reset(struct eeh_pe *pe, int option)
         return ret;
  }
  
-/**
- * pseries_eeh_wait_state - Wait for PE state
- * @pe: EEH PE
- * @max_wait: maximal period in millisecond
- *
- * Wait for the state of associated PE. It might take some time
- * to retrieve the PE's state.
- */
-static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait)
-{
-       int ret;
-       int mwait;
-
-       /*
-        * According to PAPR, the state of PE might be temporarily
-        * unavailable. Under the circumstance, we have to wait
-        * for indicated time determined by firmware. The maximal
-        * wait time is 5 minutes, which is acquired from the original
-        * EEH implementation. Also, the original implementation
-        * also defined the minimal wait time as 1 second.
-        */
-#define EEH_STATE_MIN_WAIT_TIME        (1000)
-#define EEH_STATE_MAX_WAIT_TIME        (300 * 1000)
-
-       while (1) {
-               ret = pseries_eeh_get_state(pe, &mwait);
-
-               /*
-                * If the PE's state is temporarily unavailable,
-                * we have to wait for the specified time. Otherwise,
-                * the PE's state will be returned immediately.
-                */
-               if (ret != EEH_STATE_UNAVAILABLE)
-                       return ret;
-
-               if (max_wait <= 0) {
-                       pr_warn("%s: Timeout when getting PE's state (%d)\n",
-                               __func__, max_wait);
-                       return EEH_STATE_NOT_SUPPORT;
-               }
-
-               if (mwait <= 0) {
-                       pr_warn("%s: Firmware returned bad wait value %d\n",
-                               __func__, mwait);
-                       mwait = EEH_STATE_MIN_WAIT_TIME;
-               } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
-                       pr_warn("%s: Firmware returned too long wait value %d\n",
-                               __func__, mwait);
-                       mwait = EEH_STATE_MAX_WAIT_TIME;
-               }
-
-               max_wait -= mwait;
-               msleep(mwait);
-       }
-
-       return EEH_STATE_NOT_SUPPORT;
-}
-
  /**
   * pseries_eeh_get_log - Retrieve error log
   * @pe: EEH PE
@@ -849,7 +792,6 @@ static struct eeh_ops pseries_eeh_ops = {
         .get_pe_addr            = pseries_eeh_get_pe_addr,
         .get_state              = pseries_eeh_get_state,
         .reset                  = pseries_eeh_reset,
-       .wait_state             = pseries_eeh_wait_state,
         .get_log                = pseries_eeh_get_log,
         .configure_bridge       = pseries_eeh_configure_bridge,
         .err_inject             = NULL,
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c

index 6eeb0d4..446ef10 100644 (file)
--- a/arch/powerpc/platforms/pseries/event_sources.c
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -16,7 +16,8 @@
   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
   */
  
-#include <asm/prom.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
  
  #include "pseries.h"
  
@@ -24,34 +25,19 @@ void request_event_sources_irqs(struct device_node *np,
                                 irq_handler_t handler,
                                 const char *name)
  {
-       int i, index, count = 0;
-       struct of_phandle_args oirq;
-       unsigned int virqs[16];
+       int i, virq, rc;
  
-       /* First try to do a proper OF tree parsing */
-       for (index = 0; of_irq_parse_one(np, index, &oirq) == 0;
-            index++) {
-               if (count > 15)
-                       break;
-               virqs[count] = irq_create_of_mapping(&oirq);
-               if (!virqs[count]) {
-                       pr_err("event-sources: Unable to allocate "
-                              "interrupt number for %pOF\n",
-                              np);
-                       WARN_ON(1);
-               } else {
-                       count++;
-               }
-       }
+       for (i = 0; i < 16; i++) {
+               virq = of_irq_get(np, i);
+               if (virq < 0)
+                       return;
+               if (WARN(!virq, "event-sources: Unable to allocate "
+                               "interrupt number for %pOF\n", np))
+                       continue;
  
-       /* Now request them */
-       for (i = 0; i < count; i++) {
-               if (request_irq(virqs[i], handler, 0, name, NULL)) {
-                       pr_err("event-sources: Unable to request interrupt "
-                              "%d for %pOF\n", virqs[i], np);
-                       WARN_ON(1);
+               rc = request_irq(virq, handler, 0, name, NULL);
+               if (WARN(rc, "event-sources: Unable to request interrupt %d for %pOF\n",
+                   virq, np))
                         return;
-               }
         }
  }
-
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c

index a3bbeb4..608ecad 100644 (file)
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -65,6 +65,8 @@ hypertas_fw_features_table[] = {
         {FW_FEATURE_SET_MODE,           "hcall-set-mode"},
         {FW_FEATURE_BEST_ENERGY,        "hcall-best-energy-1*"},
         {FW_FEATURE_HPT_RESIZE,         "hcall-hpt-resize"},
+       {FW_FEATURE_BLOCK_REMOVE,       "hcall-block-remove"},
+       {FW_FEATURE_PAPR_SCM,           "hcall-scm"},
  };
  
  /* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c

index 6ef77ca..2f8e621 100644 (file)
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -287,7 +287,7 @@ static int pseries_add_processor(struct device_node *np)
  
         if (cpumask_empty(tmp)) {
                 printk(KERN_ERR "Unable to find space in cpu_present_mask for"
-                      " processor %s with %d thread(s)\n", np->name,
+                      " processor %pOFn with %d thread(s)\n", np,
                        nthreads);
                 goto out_unlock;
         }
@@ -481,8 +481,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
  
         if (rc) {
                 saved_rc = rc;
-               pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
-                       dn->name, rc, drc_index);
+               pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n",
+                       dn, rc, drc_index);
  
                 rc = dlpar_release_drc(drc_index);
                 if (!rc)
@@ -494,8 +494,8 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
         rc = dlpar_online_cpu(dn);
         if (rc) {
                 saved_rc = rc;
-               pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n",
-                       dn->name, rc, drc_index);
+               pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n",
+                       dn, rc, drc_index);
  
                 rc = dlpar_detach_node(dn);
                 if (!rc)
@@ -504,7 +504,7 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
                 return saved_rc;
         }
  
-       pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name,
+       pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn,
                  drc_index);
         return rc;
  }
@@ -570,19 +570,19 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
  {
         int rc;
  
-       pr_debug("Attempting to remove CPU %s, drc index: %x\n",
-                dn->name, drc_index);
+       pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n",
+                dn, drc_index);
  
         rc = dlpar_offline_cpu(dn);
         if (rc) {
-               pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc);
+               pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc);
                 return -EINVAL;
         }
  
         rc = dlpar_release_drc(drc_index);
         if (rc) {
-               pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
-                       drc_index, dn->name, rc);
+               pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
+                       drc_index, dn, rc);
                 dlpar_online_cpu(dn);
                 return rc;
         }
@@ -591,7 +591,7 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
         if (rc) {
                 int saved_rc = rc;
  
-               pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);
+               pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc);
  
                 rc = dlpar_acquire_drc(drc_index);
                 if (!rc)
@@ -662,8 +662,8 @@ static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
                 rc = of_property_read_u32(dn, "ibm,my-drc-index",
                                           &cpu_drcs[cpus_found - 1]);
                 if (rc) {
-                       pr_warn("Error occurred getting drc-index for %s\n",
-                               dn->name);
+                       pr_warn("Error occurred getting drc-index for %pOFn\n",
+                               dn);
                         of_node_put(dn);
                         return -1;
                 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c

index c1578f5..2b796da 100644 (file)
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -101,11 +101,12 @@ static struct property *dlpar_clone_property(struct property *prop,
         return new_prop;
  }
  
-static u32 find_aa_index(struct device_node *dr_node,
-                        struct property *ala_prop, const u32 *lmb_assoc)
+static bool find_aa_index(struct device_node *dr_node,
+                        struct property *ala_prop,
+                        const u32 *lmb_assoc, u32 *aa_index)
  {
-       u32 *assoc_arrays;
-       u32 aa_index;
+       u32 *assoc_arrays, new_prop_size;
+       struct property *new_prop;
         int aa_arrays, aa_array_entries, aa_array_sz;
         int i, index;
  
@@ -121,54 +122,48 @@ static u32 find_aa_index(struct device_node *dr_node,
         aa_array_entries = be32_to_cpu(assoc_arrays[1]);
         aa_array_sz = aa_array_entries * sizeof(u32);
  
-       aa_index = -1;
         for (i = 0; i < aa_arrays; i++) {
                 index = (i * aa_array_entries) + 2;
  
                 if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz))
                         continue;
  
-               aa_index = i;
-               break;
+               *aa_index = i;
+               return true;
         }
  
-       if (aa_index == -1) {
-               struct property *new_prop;
-               u32 new_prop_size;
-
-               new_prop_size = ala_prop->length + aa_array_sz;
-               new_prop = dlpar_clone_property(ala_prop, new_prop_size);
-               if (!new_prop)
-                       return -1;
-
-               assoc_arrays = new_prop->value;
+       new_prop_size = ala_prop->length + aa_array_sz;
+       new_prop = dlpar_clone_property(ala_prop, new_prop_size);
+       if (!new_prop)
+               return false;
  
-               /* increment the number of entries in the lookup array */
-               assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
+       assoc_arrays = new_prop->value;
  
-               /* copy the new associativity into the lookup array */
-               index = aa_arrays * aa_array_entries + 2;
-               memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
+       /* increment the number of entries in the lookup array */
+       assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
  
-               of_update_property(dr_node, new_prop);
+       /* copy the new associativity into the lookup array */
+       index = aa_arrays * aa_array_entries + 2;
+       memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
  
-               /*
-                * The associativity lookup array index for this lmb is
-                * number of entries - 1 since we added its associativity
-                * to the end of the lookup array.
-                */
-               aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
-       }
+       of_update_property(dr_node, new_prop);
  
-       return aa_index;
+       /*
+        * The associativity lookup array index for this lmb is
+        * number of entries - 1 since we added its associativity
+        * to the end of the lookup array.
+        */
+       *aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
+       return true;
  }
  
-static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
+static int update_lmb_associativity_index(struct drmem_lmb *lmb)
  {
         struct device_node *parent, *lmb_node, *dr_node;
         struct property *ala_prop;
         const u32 *lmb_assoc;
         u32 aa_index;
+       bool found;
  
         parent = of_find_node_by_path("/");
         if (!parent)
@@ -200,46 +195,17 @@ static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
                 return -ENODEV;
         }
  
-       aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc);
+       found = find_aa_index(dr_node, ala_prop, lmb_assoc, &aa_index);
  
         dlpar_free_cc_nodes(lmb_node);
-       return aa_index;
-}
  
-static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
-{
-       int rc, aa_index;
-
-       lmb->flags |= DRCONF_MEM_ASSIGNED;
-
-       aa_index = lookup_lmb_associativity_index(lmb);
-       if (aa_index < 0) {
-               pr_err("Couldn't find associativity index for drc index %x\n",
-                      lmb->drc_index);
-               return aa_index;
+       if (!found) {
+               pr_err("Could not find LMB associativity\n");
+               return -1;
         }
  
         lmb->aa_index = aa_index;
-
-       rtas_hp_event = true;
-       rc = drmem_update_dt();
-       rtas_hp_event = false;
-
-       return rc;
-}
-
-static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb)
-{
-       int rc;
-
-       lmb->flags &= ~DRCONF_MEM_ASSIGNED;
-       lmb->aa_index = 0xffffffff;
-
-       rtas_hp_event = true;
-       rc = drmem_update_dt();
-       rtas_hp_event = false;
-
-       return rc;
+       return 0;
  }
  
  static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
@@ -428,7 +394,9 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
         /* Update memory regions for memory remove */
         memblock_remove(lmb->base_addr, block_sz);
  
-       dlpar_remove_device_tree_lmb(lmb);
+       invalidate_lmb_associativity_index(lmb);
+       lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+
         return 0;
  }
  
@@ -688,10 +656,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
         if (lmb->flags & DRCONF_MEM_ASSIGNED)
                 return -EINVAL;
  
-       rc = dlpar_add_device_tree_lmb(lmb);
+       rc = update_lmb_associativity_index(lmb);
         if (rc) {
-               pr_err("Couldn't update device tree for drc index %x\n",
-                      lmb->drc_index);
                 dlpar_release_drc(lmb->drc_index);
                 return rc;
         }
@@ -704,14 +670,14 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
         /* Add the memory */
         rc = add_memory(nid, lmb->base_addr, block_sz);
         if (rc) {
-               dlpar_remove_device_tree_lmb(lmb);
+               invalidate_lmb_associativity_index(lmb);
                 return rc;
         }
  
         rc = dlpar_online_lmb(lmb);
         if (rc) {
                 remove_memory(nid, lmb->base_addr, block_sz);
-               dlpar_remove_device_tree_lmb(lmb);
+               invalidate_lmb_associativity_index(lmb);
         } else {
                 lmb->flags |= DRCONF_MEM_ASSIGNED;
         }
@@ -958,6 +924,12 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
                 break;
         }
  
+       if (!rc) {
+               rtas_hp_event = true;
+               rc = drmem_update_dt();
+               rtas_hp_event = false;
+       }
+
         unlock_device_hotplug();
         return rc;
  }
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c

index c7c1140..5b4a561 100644 (file)
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -404,7 +404,7 @@ static ssize_t name_show(struct device *dev,
         struct platform_device *ofdev;
  
         ofdev = to_platform_device(dev);
-       return sprintf(buf, "%s\n", ofdev->dev.of_node->name);
+       return sprintf(buf, "%pOFn\n", ofdev->dev.of_node);
  }
  static DEVICE_ATTR_RO(name);
  
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c

index d3992ce..32d4452 100644 (file)
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -48,6 +48,7 @@
  #include <asm/kexec.h>
  #include <asm/fadump.h>
  #include <asm/asm-prototypes.h>
+#include <asm/debugfs.h>
  
  #include "pseries.h"
  
@@ -417,6 +418,79 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
         BUG_ON(lpar_rc != H_SUCCESS);
  }
  
+
+/*
+ * As defined in the PAPR's section 14.5.4.1.8
+ * The control mask doesn't include the returned reference and change bit from
+ * the processed PTE.
+ */
+#define HBLKR_AVPN             0x0100000000000000UL
+#define HBLKR_CTRL_MASK                0xf800000000000000UL
+#define HBLKR_CTRL_SUCCESS     0x8000000000000000UL
+#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
+#define HBLKR_CTRL_ERRBUSY     0xa000000000000000UL
+
+/**
+ * H_BLOCK_REMOVE caller.
+ * @idx should point to the latest @param entry set with a PTEX.
+ * If PTE cannot be processed because another CPUs has already locked that
+ * group, those entries are put back in @param starting at index 1.
+ * If entries has to be retried and @retry_busy is set to true, these entries
+ * are retried until success. If @retry_busy is set to false, the returned
+ * is the number of entries yet to process.
+ */
+static unsigned long call_block_remove(unsigned long idx, unsigned long *param,
+                                      bool retry_busy)
+{
+       unsigned long i, rc, new_idx;
+       unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+       if (idx < 2) {
+               pr_warn("Unexpected empty call to H_BLOCK_REMOVE");
+               return 0;
+       }
+again:
+       new_idx = 0;
+       if (idx > PLPAR_HCALL9_BUFSIZE) {
+               pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx);
+               idx = PLPAR_HCALL9_BUFSIZE;
+       } else if (idx < PLPAR_HCALL9_BUFSIZE)
+               param[idx] = HBR_END;
+
+       rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf,
+                         param[0], /* AVA */
+                         param[1],  param[2],  param[3],  param[4], /* TS0-7 */
+                         param[5],  param[6],  param[7],  param[8]);
+       if (rc == H_SUCCESS)
+               return 0;
+
+       BUG_ON(rc != H_PARTIAL);
+
+       /* Check that the unprocessed entries were 'not found' or 'busy' */
+       for (i = 0; i < idx-1; i++) {
+               unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK;
+
+               if (ctrl == HBLKR_CTRL_ERRBUSY) {
+                       param[++new_idx] = param[i+1];
+                       continue;
+               }
+
+               BUG_ON(ctrl != HBLKR_CTRL_SUCCESS
+                      && ctrl != HBLKR_CTRL_ERRNOTFOUND);
+       }
+
+       /*
+        * If there were entries found busy, retry these entries if requested,
+        * of if all the entries have to be retried.
+        */
+       if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) {
+               idx = new_idx + 1;
+               goto again;
+       }
+
+       return new_idx;
+}
+
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  /*
   * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
@@ -424,17 +498,57 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
   */
  #define PPC64_HUGE_HPTE_BATCH 12
  
-static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
-                                            unsigned long *vpn, int count,
-                                            int psize, int ssize)
+static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn,
+                                     int count, int psize, int ssize)
  {
         unsigned long param[PLPAR_HCALL9_BUFSIZE];
-       int i = 0, pix = 0, rc;
-       unsigned long flags = 0;
-       int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+       unsigned long shift, current_vpgb, vpgb;
+       int i, pix = 0;
  
-       if (lock_tlbie)
-               spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+       shift = mmu_psize_defs[psize].shift;
+
+       for (i = 0; i < count; i++) {
+               /*
+                * Shifting 3 bits more on the right to get a
+                * 8 pages aligned virtual addresse.
+                */
+               vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3));
+               if (!pix || vpgb != current_vpgb) {
+                       /*
+                        * Need to start a new 8 pages block, flush
+                        * the current one if needed.
+                        */
+                       if (pix)
+                               (void)call_block_remove(pix, param, true);
+                       current_vpgb = vpgb;
+                       param[0] = hpte_encode_avpn(vpn[i], psize, ssize);
+                       pix = 1;
+               }
+
+               param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i];
+               if (pix == PLPAR_HCALL9_BUFSIZE) {
+                       pix = call_block_remove(pix, param, false);
+                       /*
+                        * pix = 0 means that all the entries were
+                        * removed, we can start a new block.
+                        * Otherwise, this means that there are entries
+                        * to retry, and pix points to latest one, so
+                        * we should increment it and try to continue
+                        * the same block.
+                        */
+                       if (pix)
+                               pix++;
+               }
+       }
+       if (pix)
+               (void)call_block_remove(pix, param, true);
+}
+
+static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn,
+                                    int count, int psize, int ssize)
+{
+       unsigned long param[PLPAR_HCALL9_BUFSIZE];
+       int i = 0, pix = 0, rc;
  
         for (i = 0; i < count; i++) {
  
@@ -462,6 +576,23 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
                                   param[6], param[7]);
                 BUG_ON(rc != H_SUCCESS);
         }
+}
+
+static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+                                                     unsigned long *vpn,
+                                                     int count, int psize,
+                                                     int ssize)
+{
+       unsigned long flags = 0;
+       int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+       if (lock_tlbie)
+               spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+       if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+               hugepage_block_invalidate(slot, vpn, count, psize, ssize);
+       else
+               hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
  
         if (lock_tlbie)
                 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
@@ -546,6 +677,86 @@ static int pSeries_lpar_hpte_removebolted(unsigned long ea,
         return 0;
  }
  
+
+static inline unsigned long compute_slot(real_pte_t pte,
+                                        unsigned long vpn,
+                                        unsigned long index,
+                                        unsigned long shift,
+                                        int ssize)
+{
+       unsigned long slot, hash, hidx;
+
+       hash = hpt_hash(vpn, shift, ssize);
+       hidx = __rpte_to_hidx(pte, index);
+       if (hidx & _PTEIDX_SECONDARY)
+               hash = ~hash;
+       slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+       slot += hidx & _PTEIDX_GROUP_IX;
+       return slot;
+}
+
+/**
+ * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are
+ * "all within the same naturally aligned 8 page virtual address block".
+ */
+static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
+                           unsigned long *param)
+{
+       unsigned long vpn;
+       unsigned long i, pix = 0;
+       unsigned long index, shift, slot, current_vpgb, vpgb;
+       real_pte_t pte;
+       int psize, ssize;
+
+       psize = batch->psize;
+       ssize = batch->ssize;
+
+       for (i = 0; i < number; i++) {
+               vpn = batch->vpn[i];
+               pte = batch->pte[i];
+               pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+                       /*
+                        * Shifting 3 bits more on the right to get a
+                        * 8 pages aligned virtual addresse.
+                        */
+                       vpgb = (vpn >> (shift - VPN_SHIFT + 3));
+                       if (!pix || vpgb != current_vpgb) {
+                               /*
+                                * Need to start a new 8 pages block, flush
+                                * the current one if needed.
+                                */
+                               if (pix)
+                                       (void)call_block_remove(pix, param,
+                                                               true);
+                               current_vpgb = vpgb;
+                               param[0] = hpte_encode_avpn(vpn, psize,
+                                                           ssize);
+                               pix = 1;
+                       }
+
+                       slot = compute_slot(pte, vpn, index, shift, ssize);
+                       param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot;
+
+                       if (pix == PLPAR_HCALL9_BUFSIZE) {
+                               pix = call_block_remove(pix, param, false);
+                               /*
+                                * pix = 0 means that all the entries were
+                                * removed, we can start a new block.
+                                * Otherwise, this means that there are entries
+                                * to retry, and pix points to latest one, so
+                                * we should increment it and try to continue
+                                * the same block.
+                                */
+                               if (pix)
+                                       pix++;
+                       }
+               } pte_iterate_hashed_end();
+       }
+
+       if (pix)
+               (void)call_block_remove(pix, param, true);
+}
+
  /*
   * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
   * lock.
@@ -558,13 +769,18 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
         struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
         int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
         unsigned long param[PLPAR_HCALL9_BUFSIZE];
-       unsigned long hash, index, shift, hidx, slot;
+       unsigned long index, shift, slot;
         real_pte_t pte;
         int psize, ssize;
  
         if (lock_tlbie)
                 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
  
+       if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
+               do_block_remove(number, batch, param);
+               goto out;
+       }
+
         psize = batch->psize;
         ssize = batch->ssize;
         pix = 0;
@@ -572,12 +788,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
                 vpn = batch->vpn[i];
                 pte = batch->pte[i];
                 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
-                       hash = hpt_hash(vpn, shift, ssize);
-                       hidx = __rpte_to_hidx(pte, index);
-                       if (hidx & _PTEIDX_SECONDARY)
-                               hash = ~hash;
-                       slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-                       slot += hidx & _PTEIDX_GROUP_IX;
+                       slot = compute_slot(pte, vpn, index, shift, ssize);
                         if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
                                 /*
                                  * lpar doesn't use the passed actual page size
@@ -608,6 +819,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
                 BUG_ON(rc != H_SUCCESS);
         }
  
+out:
         if (lock_tlbie)
                 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
  }
@@ -1028,3 +1240,56 @@ static int __init reserve_vrma_context_id(void)
         return 0;
  }
  machine_device_initcall(pseries, reserve_vrma_context_id);
+
+#ifdef CONFIG_DEBUG_FS
+/* debugfs file interface for vpa data */
+static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len,
+                             loff_t *pos)
+{
+       int cpu = (long)filp->private_data;
+       struct lppaca *lppaca = &lppaca_of(cpu);
+
+       return simple_read_from_buffer(buf, len, pos, lppaca,
+                               sizeof(struct lppaca));
+}
+
+static const struct file_operations vpa_fops = {
+       .open           = simple_open,
+       .read           = vpa_file_read,
+       .llseek         = default_llseek,
+};
+
+static int __init vpa_debugfs_init(void)
+{
+       char name[16];
+       long i;
+       static struct dentry *vpa_dir;
+
+       if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+               return 0;
+
+       vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
+       if (!vpa_dir) {
+               pr_warn("%s: can't create vpa root dir\n", __func__);
+               return -ENOMEM;
+       }
+
+       /* set up the per-cpu vpa file*/
+       for_each_possible_cpu(i) {
+               struct dentry *d;
+
+               sprintf(name, "cpu-%ld", i);
+
+               d = debugfs_create_file(name, 0400, vpa_dir, (void *)i,
+                                       &vpa_fops);
+               if (!d) {
+                       pr_warn("%s: can't create per-cpu vpa file\n",
+                                       __func__);
+                       return -ENOMEM;
+               }
+       }
+
+       return 0;
+}
+machine_arch_initcall(pseries, vpa_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c

index 7c872dc..8bd590a 100644 (file)
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -585,8 +585,7 @@ static ssize_t update_mpp(u64 *entitlement, u8 *weight)
  static ssize_t lparcfg_write(struct file *file, const char __user * buf,
                              size_t count, loff_t * off)
  {
-       int kbuf_sz = 64;
-       char kbuf[kbuf_sz];
+       char kbuf[64];
         char *tmp;
         u64 new_entitled, *new_entitled_ptr = &new_entitled;
         u8 new_weight, *new_weight_ptr = &new_weight;
@@ -595,7 +594,7 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
         if (!firmware_has_feature(FW_FEATURE_SPLPAR))
                 return -EINVAL;
  
-       if (count > kbuf_sz)
+       if (count > sizeof(kbuf))
                 return -EINVAL;
  
         if (copy_from_user(kbuf, buf, count))
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c

index f0e30dc..88925f8 100644 (file)
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -242,7 +242,7 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
  
  static void prrn_update_node(__be32 phandle)
  {
-       struct pseries_hp_errorlog *hp_elog;
+       struct pseries_hp_errorlog hp_elog;
         struct device_node *dn;
  
         /*
@@ -255,18 +255,12 @@ static void prrn_update_node(__be32 phandle)
                 return;
         }
  
-       hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
-       if(!hp_elog)
-               return;
-
-       hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
-       hp_elog->action = PSERIES_HP_ELOG_ACTION_READD;
-       hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
-       hp_elog->_drc_u.drc_index = phandle;
-
-       queue_hotplug_event(hp_elog, NULL, NULL);
+       hp_elog.resource = PSERIES_HP_ELOG_RESOURCE_MEM;
+       hp_elog.action = PSERIES_HP_ELOG_ACTION_READD;
+       hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+       hp_elog._drc_u.drc_index = phandle;
  
-       kfree(hp_elog);
+       handle_dlpar_errorlog(&hp_elog);
  }
  
  int pseries_devicetree_update(s32 scope)
@@ -366,6 +360,8 @@ static ssize_t migration_store(struct class *class,
         if (rc)
                 return rc;
  
+       stop_topology_update();
+
         do {
                 rc = rtas_ibm_suspend_me(streamid);
                 if (rc == -EAGAIN)
@@ -376,6 +372,9 @@ static ssize_t migration_store(struct class *class,
                 return rc;
  
         post_mobility_fixup();
+
+       start_topology_update();
+
         return count;
  }
  
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c

index b749694..8011b41 100644 (file)
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -203,7 +203,8 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
         /* Get the top level device in the PE */
         edev = pdn_to_eeh_dev(PCI_DN(dn));
         if (edev->pe)
-               edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
+               edev = list_first_entry(&edev->pe->edevs, struct eeh_dev,
+                                       entry);
         dn = pci_device_to_OF_node(edev->pdev);
         if (!dn)
                 return NULL;
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c

new file mode 100644 (file)

index 0000000..ee9372b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt)    "papr-scm: " fmt
+
+#include <linux/of.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/ndctl.h>
+#include <linux/sched.h>
+#include <linux/libnvdimm.h>
+#include <linux/platform_device.h>
+
+#include <asm/plpar_wrappers.h>
+
+#define BIND_ANY_ADDR (~0ul)
+
+#define PAPR_SCM_DIMM_CMD_MASK \
+       ((1ul << ND_CMD_GET_CONFIG_SIZE) | \
+        (1ul << ND_CMD_GET_CONFIG_DATA) | \
+        (1ul << ND_CMD_SET_CONFIG_DATA))
+
+struct papr_scm_priv {
+       struct platform_device *pdev;
+       struct device_node *dn;
+       uint32_t drc_index;
+       uint64_t blocks;
+       uint64_t block_size;
+       int metadata_size;
+
+       uint64_t bound_addr;
+
+       struct nvdimm_bus_descriptor bus_desc;
+       struct nvdimm_bus *bus;
+       struct nvdimm *nvdimm;
+       struct resource res;
+       struct nd_region *region;
+       struct nd_interleave_set nd_set;
+};
+
+static int drc_pmem_bind(struct papr_scm_priv *p)
+{
+       unsigned long ret[PLPAR_HCALL_BUFSIZE];
+       uint64_t rc, token;
+
+       /*
+        * When the hypervisor cannot map all the requested memory in a single
+        * hcall it returns H_BUSY and we call again with the token until
+        * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS
+        * leave the system in an undefined state, so we wait.
+        */
+       token = 0;
+
+       do {
+               rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
+                               p->blocks, BIND_ANY_ADDR, token);
+               token = be64_to_cpu(ret[0]);
+               cond_resched();
+       } while (rc == H_BUSY);
+
+       if (rc) {
+               dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
+               return -ENXIO;
+       }
+
+       p->bound_addr = be64_to_cpu(ret[1]);
+
+       dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
+
+       return 0;
+}
+
+static int drc_pmem_unbind(struct papr_scm_priv *p)
+{
+       unsigned long ret[PLPAR_HCALL_BUFSIZE];
+       uint64_t rc, token;
+
+       token = 0;
+
+       /* NB: unbind has the same retry requirements mentioned above */
+       do {
+               rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
+                               p->bound_addr, p->blocks, token);
+               token = be64_to_cpu(ret);
+               cond_resched();
+       } while (rc == H_BUSY);
+
+       if (rc)
+               dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
+
+       return !!rc;
+}
+
+static int papr_scm_meta_get(struct papr_scm_priv *p,
+                       struct nd_cmd_get_config_data_hdr *hdr)
+{
+       unsigned long data[PLPAR_HCALL_BUFSIZE];
+       int64_t ret;
+
+       if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+               return -EINVAL;
+
+       ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
+                       hdr->in_offset, 1);
+
+       if (ret == H_PARAMETER) /* bad DRC index */
+               return -ENODEV;
+       if (ret)
+               return -EINVAL; /* other invalid parameter */
+
+       hdr->out_buf[0] = data[0] & 0xff;
+
+       return 0;
+}
+
+static int papr_scm_meta_set(struct papr_scm_priv *p,
+                       struct nd_cmd_set_config_hdr *hdr)
+{
+       int64_t ret;
+
+       if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+               return -EINVAL;
+
+       ret = plpar_hcall_norets(H_SCM_WRITE_METADATA,
+                       p->drc_index, hdr->in_offset, hdr->in_buf[0], 1);
+
+       if (ret == H_PARAMETER) /* bad DRC index */
+               return -ENODEV;
+       if (ret)
+               return -EINVAL; /* other invalid parameter */
+
+       return 0;
+}
+
+int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
+               unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
+{
+       struct nd_cmd_get_config_size *get_size_hdr;
+       struct papr_scm_priv *p;
+
+       /* Only dimm-specific calls are supported atm */
+       if (!nvdimm)
+               return -EINVAL;
+
+       p = nvdimm_provider_data(nvdimm);
+
+       switch (cmd) {
+       case ND_CMD_GET_CONFIG_SIZE:
+               get_size_hdr = buf;
+
+               get_size_hdr->status = 0;
+               get_size_hdr->max_xfer = 1;
+               get_size_hdr->config_size = p->metadata_size;
+               *cmd_rc = 0;
+               break;
+
+       case ND_CMD_GET_CONFIG_DATA:
+               *cmd_rc = papr_scm_meta_get(p, buf);
+               break;
+
+       case ND_CMD_SET_CONFIG_DATA:
+               *cmd_rc = papr_scm_meta_set(p, buf);
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
+
+       return 0;
+}
+
+static const struct attribute_group *region_attr_groups[] = {
+       &nd_region_attribute_group,
+       &nd_device_attribute_group,
+       &nd_mapping_attribute_group,
+       &nd_numa_attribute_group,
+       NULL,
+};
+
+static const struct attribute_group *bus_attr_groups[] = {
+       &nvdimm_bus_attribute_group,
+       NULL,
+};
+
+static const struct attribute_group *papr_scm_dimm_groups[] = {
+       &nvdimm_attribute_group,
+       &nd_device_attribute_group,
+       NULL,
+};
+
+static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
+{
+       struct device *dev = &p->pdev->dev;
+       struct nd_mapping_desc mapping;
+       struct nd_region_desc ndr_desc;
+       unsigned long dimm_flags;
+
+       p->bus_desc.ndctl = papr_scm_ndctl;
+       p->bus_desc.module = THIS_MODULE;
+       p->bus_desc.of_node = p->pdev->dev.of_node;
+       p->bus_desc.attr_groups = bus_attr_groups;
+       p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
+
+       if (!p->bus_desc.provider_name)
+               return -ENOMEM;
+
+       p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
+       if (!p->bus) {
+               dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
+               return -ENXIO;
+       }
+
+       dimm_flags = 0;
+       set_bit(NDD_ALIASING, &dimm_flags);
+
+       p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
+                               dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+       if (!p->nvdimm) {
+               dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
+               goto err;
+       }
+
+       /* now add the region */
+
+       memset(&mapping, 0, sizeof(mapping));
+       mapping.nvdimm = p->nvdimm;
+       mapping.start = 0;
+       mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
+
+       memset(&ndr_desc, 0, sizeof(ndr_desc));
+       ndr_desc.attr_groups = region_attr_groups;
+       ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
+       ndr_desc.res = &p->res;
+       ndr_desc.of_node = p->dn;
+       ndr_desc.provider_data = p;
+       ndr_desc.mapping = &mapping;
+       ndr_desc.num_mappings = 1;
+       ndr_desc.nd_set = &p->nd_set;
+       set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+
+       p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+       if (!p->region) {
+               dev_err(dev, "Error registering region %pR from %pOF\n",
+                               ndr_desc.res, p->dn);
+               goto err;
+       }
+
+       return 0;
+
+err:   nvdimm_bus_unregister(p->bus);
+       kfree(p->bus_desc.provider_name);
+       return -ENXIO;
+}
+
+static int papr_scm_probe(struct platform_device *pdev)
+{
+       uint32_t drc_index, metadata_size, unit_cap[2];
+       struct device_node *dn = pdev->dev.of_node;
+       struct papr_scm_priv *p;
+       int rc;
+
+       /* check we have all the required DT properties */
+       if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
+               dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn);
+               return -ENODEV;
+       }
+
+       if (of_property_read_u32_array(dn, "ibm,unit-capacity", unit_cap, 2)) {
+               dev_err(&pdev->dev, "%pOF: missing unit-capacity!\n", dn);
+               return -ENODEV;
+       }
+
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+
+       /* optional DT properties */
+       of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);
+
+       p->dn = dn;
+       p->drc_index = drc_index;
+       p->block_size = unit_cap[0];
+       p->blocks     = unit_cap[1];
+
+       /* might be zero */
+       p->metadata_size = metadata_size;
+       p->pdev = pdev;
+
+       /* request the hypervisor to bind this region to somewhere in memory */
+       rc = drc_pmem_bind(p);
+       if (rc)
+               goto err;
+
+       /* setup the resource for the newly bound range */
+       p->res.start = p->bound_addr;
+       p->res.end   = p->bound_addr + p->blocks * p->block_size;
+       p->res.name  = pdev->name;
+       p->res.flags = IORESOURCE_MEM;
+
+       rc = papr_scm_nvdimm_init(p);
+       if (rc)
+               goto err2;
+
+       platform_set_drvdata(pdev, p);
+
+       return 0;
+
+err2:  drc_pmem_unbind(p);
+err:   kfree(p);
+       return rc;
+}
+
+static int papr_scm_remove(struct platform_device *pdev)
+{
+       struct papr_scm_priv *p = platform_get_drvdata(pdev);
+
+       nvdimm_bus_unregister(p->bus);
+       drc_pmem_unbind(p);
+       kfree(p);
+
+       return 0;
+}
+
+static const struct of_device_id papr_scm_match[] = {
+       { .compatible = "ibm,pmemory" },
+       { },
+};
+
+static struct platform_driver papr_scm_driver = {
+       .probe = papr_scm_probe,
+       .remove = papr_scm_remove,
+       .driver = {
+               .name = "papr_scm",
+               .owner = THIS_MODULE,
+               .of_match_table = papr_scm_match,
+       },
+};
+
+module_platform_driver(papr_scm_driver);
+MODULE_DEVICE_TABLE(of, papr_scm_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c

index eab9663..41d8a4d 100644 (file)
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -239,6 +239,7 @@ void __init pSeries_final_fixup(void)
  {
         pSeries_request_regions();
  
+       eeh_probe_devices();
         eeh_addr_cache_build();
  
  #ifdef CONFIG_PCI_IOV
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c

new file mode 100644 (file)

index 0000000..a27f40e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Handles hot and cold plug of persistent memory regions on pseries.
+ */
+
+#define pr_fmt(fmt)     "pseries-pmem: " fmt
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/sched.h>       /* for idle_task_exit */
+#include <linux/sched/hotplug.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/vdso_datapage.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
+
+#include "pseries.h"
+#include "offline_states.h"
+
+static struct device_node *pmem_node;
+
+static ssize_t pmem_drc_add_node(u32 drc_index)
+{
+       struct device_node *dn;
+       int rc;
+
+       pr_debug("Attempting to add pmem node, drc index: %x\n", drc_index);
+
+       rc = dlpar_acquire_drc(drc_index);
+       if (rc) {
+               pr_err("Failed to acquire DRC, rc: %d, drc index: %x\n",
+                       rc, drc_index);
+               return -EINVAL;
+       }
+
+       dn = dlpar_configure_connector(cpu_to_be32(drc_index), pmem_node);
+       if (!dn) {
+               pr_err("configure-connector failed for drc %x\n", drc_index);
+               dlpar_release_drc(drc_index);
+               return -EINVAL;
+       }
+
+       /* NB: The of reconfig notifier creates platform device from the node */
+       rc = dlpar_attach_node(dn, pmem_node);
+       if (rc) {
+               pr_err("Failed to attach node %s, rc: %d, drc index: %x\n",
+                       dn->name, rc, drc_index);
+
+               if (dlpar_release_drc(drc_index))
+                       dlpar_free_cc_nodes(dn);
+
+               return rc;
+       }
+
+       pr_info("Successfully added %pOF, drc index: %x\n", dn, drc_index);
+
+       return 0;
+}
+
+static ssize_t pmem_drc_remove_node(u32 drc_index)
+{
+       struct device_node *dn;
+       uint32_t index;
+       int rc;
+
+       for_each_child_of_node(pmem_node, dn) {
+               if (of_property_read_u32(dn, "ibm,my-drc-index", &index))
+                       continue;
+               if (index == drc_index)
+                       break;
+       }
+
+       if (!dn) {
+               pr_err("Attempting to remove unused DRC index %x\n", drc_index);
+               return -ENODEV;
+       }
+
+       pr_debug("Attempting to remove %pOF, drc index: %x\n", dn, drc_index);
+
+       /* * NB: tears down the ibm,pmemory device as a side-effect */
+       rc = dlpar_detach_node(dn);
+       if (rc)
+               return rc;
+
+       rc = dlpar_release_drc(drc_index);
+       if (rc) {
+               pr_err("Failed to release drc (%x) for CPU %s, rc: %d\n",
+                       drc_index, dn->name, rc);
+               dlpar_attach_node(dn, pmem_node);
+               return rc;
+       }
+
+       pr_info("Successfully removed PMEM with drc index: %x\n", drc_index);
+
+       return 0;
+}
+
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
+{
+       u32 count, drc_index;
+       int rc;
+
+       /* slim chance, but we might get a hotplug event while booting */
+       if (!pmem_node)
+               pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
+       if (!pmem_node) {
+               pr_err("Hotplug event for a pmem device, but none exists\n");
+               return -ENODEV;
+       }
+
+       if (hp_elog->id_type != PSERIES_HP_ELOG_ID_DRC_INDEX) {
+               pr_err("Unsupported hotplug event type %d\n",
+                               hp_elog->id_type);
+               return -EINVAL;
+       }
+
+       count = hp_elog->_drc_u.drc_count;
+       drc_index = hp_elog->_drc_u.drc_index;
+
+       lock_device_hotplug();
+
+       if (hp_elog->action == PSERIES_HP_ELOG_ACTION_ADD) {
+               rc = pmem_drc_add_node(drc_index);
+       } else if (hp_elog->action == PSERIES_HP_ELOG_ACTION_REMOVE) {
+               rc = pmem_drc_remove_node(drc_index);
+       } else {
+               pr_err("Unsupported hotplug action (%d)\n", hp_elog->action);
+               rc = -EINVAL;
+       }
+
+       unlock_device_hotplug();
+       return rc;
+}
+
+const struct of_device_id drc_pmem_match[] = {
+       { .type = "ibm,persistent-memory", },
+       {}
+};
+
+static int pseries_pmem_init(void)
+{
+       pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
+       if (!pmem_node)
+               return 0;
+
+       /*
+        * The generic OF bus probe/populate handles creating platform devices
+        * from the child (ibm,pmemory) nodes. The generic code registers an of
+        * reconfig notifier to handle the hot-add/remove cases too.
+        */
+       of_platform_bus_probe(pmem_node, drc_pmem_match, NULL);
+
+       return 0;
+}
+machine_arch_initcall(pseries, pseries_pmem_init);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h

index 60db2ee..7dee8c5 100644 (file)
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -24,6 +24,7 @@ struct pt_regs;
  
  extern int pSeries_system_reset_exception(struct pt_regs *regs);
  extern int pSeries_machine_check_exception(struct pt_regs *regs);
+extern long pseries_machine_check_realmode(struct pt_regs *regs);
  
  #ifdef CONFIG_SMP
  extern void smp_init_pseries(void);
@@ -59,15 +60,21 @@ extern int dlpar_detach_node(struct device_node *);
  extern int dlpar_acquire_drc(u32 drc_index);
  extern int dlpar_release_drc(u32 drc_index);
  
-void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
-                        struct completion *hotplug_done, int *rc);
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
+
  #ifdef CONFIG_MEMORY_HOTPLUG
  int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog);
  #else
  static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
  {
         return -EOPNOTSUPP;
  }
+static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
+{
+       return -EOPNOTSUPP;
+}
  #endif
  
  #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c

index 851ce32..d97d527 100644 (file)
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -27,6 +27,7 @@
  #include <asm/machdep.h>
  #include <asm/rtas.h>
  #include <asm/firmware.h>
+#include <asm/mce.h>
  
  #include "pseries.h"
  
@@ -50,6 +51,101 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
  static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
  static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
  
+/* RTAS pseries MCE errorlog section. */
+struct pseries_mc_errorlog {
+       __be32  fru_id;
+       __be32  proc_id;
+       u8      error_type;
+       /*
+        * sub_err_type (1 byte). Bit fields depends on error_type
+        *
+        *   MSB0
+        *   |
+        *   V
+        *   01234567
+        *   XXXXXXXX
+        *
+        * For error_type == MC_ERROR_TYPE_UE
+        *   XXXXXXXX
+        *   X          1: Permanent or Transient UE.
+        *    X         1: Effective address provided.
+        *     X        1: Logical address provided.
+        *      XX      2: Reserved.
+        *        XXX   3: Type of UE error.
+        *
+        * For error_type != MC_ERROR_TYPE_UE
+        *   XXXXXXXX
+        *   X          1: Effective address provided.
+        *    XXXXX     5: Reserved.
+        *         XX   2: Type of SLB/ERAT/TLB error.
+        */
+       u8      sub_err_type;
+       u8      reserved_1[6];
+       __be64  effective_address;
+       __be64  logical_address;
+} __packed;
+
+/* RTAS pseries MCE error types */
+#define MC_ERROR_TYPE_UE               0x00
+#define MC_ERROR_TYPE_SLB              0x01
+#define MC_ERROR_TYPE_ERAT             0x02
+#define MC_ERROR_TYPE_TLB              0x04
+#define MC_ERROR_TYPE_D_CACHE          0x05
+#define MC_ERROR_TYPE_I_CACHE          0x07
+
+/* RTAS pseries MCE error sub types */
+#define MC_ERROR_UE_INDETERMINATE              0
+#define MC_ERROR_UE_IFETCH                     1
+#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH     2
+#define MC_ERROR_UE_LOAD_STORE                 3
+#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+
+#define MC_ERROR_SLB_PARITY            0
+#define MC_ERROR_SLB_MULTIHIT          1
+#define MC_ERROR_SLB_INDETERMINATE     2
+
+#define MC_ERROR_ERAT_PARITY           1
+#define MC_ERROR_ERAT_MULTIHIT         2
+#define MC_ERROR_ERAT_INDETERMINATE    3
+
+#define MC_ERROR_TLB_PARITY            1
+#define MC_ERROR_TLB_MULTIHIT          2
+#define MC_ERROR_TLB_INDETERMINATE     3
+
+static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
+{
+       switch (mlog->error_type) {
+       case    MC_ERROR_TYPE_UE:
+               return (mlog->sub_err_type & 0x07);
+       case    MC_ERROR_TYPE_SLB:
+       case    MC_ERROR_TYPE_ERAT:
+       case    MC_ERROR_TYPE_TLB:
+               return (mlog->sub_err_type & 0x03);
+       default:
+               return 0;
+       }
+}
+
+static
+inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
+{
+       __be64 addr = 0;
+
+       switch (mlog->error_type) {
+       case    MC_ERROR_TYPE_UE:
+               if (mlog->sub_err_type & 0x40)
+                       addr = mlog->effective_address;
+               break;
+       case    MC_ERROR_TYPE_SLB:
+       case    MC_ERROR_TYPE_ERAT:
+       case    MC_ERROR_TYPE_TLB:
+               if (mlog->sub_err_type & 0x80)
+                       addr = mlog->effective_address;
+       default:
+               break;
+       }
+       return be64_to_cpu(addr);
+}
  
  /*
   * Enable the hotplug interrupt late because processing them may touch other
@@ -237,8 +333,9 @@ static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
          * hotplug events on the ras_log_buf to be handled by rtas_errd.
          */
         if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
-           hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU)
-               queue_hotplug_event(hp_elog, NULL, NULL);
+           hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
+           hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
+               queue_hotplug_event(hp_elog);
         else
                 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
  
@@ -427,6 +524,188 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
         return 0; /* need to perform reset */
  }
  
+#define VAL_TO_STRING(ar, val) \
+       (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
+
+static void pseries_print_mce_info(struct pt_regs *regs,
+                                  struct rtas_error_log *errp)
+{
+       const char *level, *sevstr;
+       struct pseries_errorlog *pseries_log;
+       struct pseries_mc_errorlog *mce_log;
+       u8 error_type, err_sub_type;
+       u64 addr;
+       u8 initiator = rtas_error_initiator(errp);
+       int disposition = rtas_error_disposition(errp);
+
+       static const char * const initiators[] = {
+               "Unknown",
+               "CPU",
+               "PCI",
+               "ISA",
+               "Memory",
+               "Power Mgmt",
+       };
+       static const char * const mc_err_types[] = {
+               "UE",
+               "SLB",
+               "ERAT",
+               "TLB",
+               "D-Cache",
+               "Unknown",
+               "I-Cache",
+       };
+       static const char * const mc_ue_types[] = {
+               "Indeterminate",
+               "Instruction fetch",
+               "Page table walk ifetch",
+               "Load/Store",
+               "Page table walk Load/Store",
+       };
+
+       /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
+       static const char * const mc_slb_types[] = {
+               "Parity",
+               "Multihit",
+               "Indeterminate",
+       };
+
+       /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
+       static const char * const mc_soft_types[] = {
+               "Unknown",
+               "Parity",
+               "Multihit",
+               "Indeterminate",
+       };
+
+       if (!rtas_error_extended(errp)) {
+               pr_err("Machine check interrupt: Missing extended error log\n");
+               return;
+       }
+
+       pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+       if (pseries_log == NULL)
+               return;
+
+       mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+
+       error_type = mce_log->error_type;
+       err_sub_type = rtas_mc_error_sub_type(mce_log);
+
+       switch (rtas_error_severity(errp)) {
+       case RTAS_SEVERITY_NO_ERROR:
+               level = KERN_INFO;
+               sevstr = "Harmless";
+               break;
+       case RTAS_SEVERITY_WARNING:
+               level = KERN_WARNING;
+               sevstr = "";
+               break;
+       case RTAS_SEVERITY_ERROR:
+       case RTAS_SEVERITY_ERROR_SYNC:
+               level = KERN_ERR;
+               sevstr = "Severe";
+               break;
+       case RTAS_SEVERITY_FATAL:
+       default:
+               level = KERN_ERR;
+               sevstr = "Fatal";
+               break;
+       }
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       /* Display faulty slb contents for SLB errors. */
+       if (error_type == MC_ERROR_TYPE_SLB)
+               slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
+
+       printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+              disposition == RTAS_DISP_FULLY_RECOVERED ?
+              "Recovered" : "Not recovered");
+       if (user_mode(regs)) {
+               printk("%s  NIP: [%016lx] PID: %d Comm: %s\n", level,
+                      regs->nip, current->pid, current->comm);
+       } else {
+               printk("%s  NIP [%016lx]: %pS\n", level, regs->nip,
+                      (void *)regs->nip);
+       }
+       printk("%s  Initiator: %s\n", level,
+              VAL_TO_STRING(initiators, initiator));
+
+       switch (error_type) {
+       case MC_ERROR_TYPE_UE:
+               printk("%s  Error type: %s [%s]\n", level,
+                      VAL_TO_STRING(mc_err_types, error_type),
+                      VAL_TO_STRING(mc_ue_types, err_sub_type));
+               break;
+       case MC_ERROR_TYPE_SLB:
+               printk("%s  Error type: %s [%s]\n", level,
+                      VAL_TO_STRING(mc_err_types, error_type),
+                      VAL_TO_STRING(mc_slb_types, err_sub_type));
+               break;
+       case MC_ERROR_TYPE_ERAT:
+       case MC_ERROR_TYPE_TLB:
+               printk("%s  Error type: %s [%s]\n", level,
+                      VAL_TO_STRING(mc_err_types, error_type),
+                      VAL_TO_STRING(mc_soft_types, err_sub_type));
+               break;
+       default:
+               printk("%s  Error type: %s\n", level,
+                      VAL_TO_STRING(mc_err_types, error_type));
+               break;
+       }
+
+       addr = rtas_mc_get_effective_addr(mce_log);
+       if (addr)
+               printk("%s    Effective address: %016llx\n", level, addr);
+}
+
+static int mce_handle_error(struct rtas_error_log *errp)
+{
+       struct pseries_errorlog *pseries_log;
+       struct pseries_mc_errorlog *mce_log;
+       int disposition = rtas_error_disposition(errp);
+       u8 error_type;
+
+       if (!rtas_error_extended(errp))
+               goto out;
+
+       pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+       if (pseries_log == NULL)
+               goto out;
+
+       mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+       error_type = mce_log->error_type;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       if (disposition == RTAS_DISP_NOT_RECOVERED) {
+               switch (error_type) {
+               case    MC_ERROR_TYPE_SLB:
+               case    MC_ERROR_TYPE_ERAT:
+                       /*
+                        * Store the old slb content in paca before flushing.
+                        * Print this when we go to virtual mode.
+                        * There are chances that we may hit MCE again if there
+                        * is a parity error on the SLB entry we trying to read
+                        * for saving. Hence limit the slb saving to single
+                        * level of recursion.
+                        */
+                       if (local_paca->in_mce == 1)
+                               slb_save_contents(local_paca->mce_faulty_slbs);
+                       flush_and_reload_slb();
+                       disposition = RTAS_DISP_FULLY_RECOVERED;
+                       rtas_set_disposition_recovered(errp);
+                       break;
+               default:
+                       break;
+               }
+       }
+#endif
+
+out:
+       return disposition;
+}
+
  /*
   * Process MCE rtas errlog event.
   */
@@ -452,8 +731,11 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
         int recovered = 0;
         int disposition = rtas_error_disposition(err);
  
+       pseries_print_mce_info(regs, err);
+
         if (!(regs->msr & MSR_RI)) {
                 /* If MSR_RI isn't set, we cannot recover */
+               pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
                 recovered = 0;
  
         } else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
@@ -503,11 +785,31 @@ int pSeries_machine_check_exception(struct pt_regs *regs)
         struct rtas_error_log *errp;
  
         if (fwnmi_active) {
-               errp = fwnmi_get_errinfo(regs);
                 fwnmi_release_errinfo();
+               errp = fwnmi_get_errlog();
                 if (errp && recover_mce(regs, errp))
                         return 1;
         }
  
         return 0;
  }
+
+long pseries_machine_check_realmode(struct pt_regs *regs)
+{
+       struct rtas_error_log *errp;
+       int disposition;
+
+       if (fwnmi_active) {
+               errp = fwnmi_get_errinfo(regs);
+               /*
+                * Call to fwnmi_release_errinfo() in real mode causes kernel
+                * to panic. Hence we will call it as soon as we go into
+                * virtual mode.
+                */
+               disposition = mce_handle_error(errp);
+               if (disposition == RTAS_DISP_FULLY_RECOVERED)
+                       return 1;
+       }
+
+       return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c

index ba1791f..0f553dc 100644 (file)
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -107,6 +107,10 @@ static void __init fwnmi_init(void)
         u8 *mce_data_buf;
         unsigned int i;
         int nr_cpus = num_possible_cpus();
+#ifdef CONFIG_PPC_BOOK3S_64
+       struct slb_entry *slb_ptr;
+       size_t size;
+#endif
  
         int ibm_nmi_register = rtas_token("ibm,nmi-register");
         if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
@@ -132,6 +136,15 @@ static void __init fwnmi_init(void)
                 paca_ptrs[i]->mce_data_buf = mce_data_buf +
                                                 (RTAS_ERROR_LOG_MAX * i);
         }
+
+#ifdef CONFIG_PPC_BOOK3S_64
+       /* Allocate per cpu slb area to save old slb contents during MCE */
+       size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
+       slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry),
+                                          ppc64_rma_size));
+       for_each_possible_cpu(i)
+               paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+#endif
  }
  
  static void pseries_8259_cascade(struct irq_desc *desc)
@@ -1017,6 +1030,7 @@ define_machine(pseries) {
         .calibrate_decr         = generic_calibrate_decr,
         .progress               = rtas_progress,
         .system_reset_exception = pSeries_system_reset_exception,
+       .machine_check_early    = pseries_machine_check_realmode,
         .machine_check_exception = pSeries_machine_check_exception,
  #ifdef CONFIG_KEXEC_CORE
         .machine_kexec          = pSeries_machine_kexec,
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c

index 49e04ec..88f1ad1 100644 (file)
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1349,7 +1349,6 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
         struct device_node *parent_node;
         const __be32 *prop;
         enum vio_dev_family family;
-       const char *of_node_name = of_node->name ? of_node->name : "<unknown>";
  
         /*
          * Determine if this node is a under the /vdevice node or under the
@@ -1362,24 +1361,24 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
                 else if (!strcmp(parent_node->type, "vdevice"))
                         family = VDEVICE;
                 else {
-                       pr_warn("%s: parent(%pOF) of %s not recognized.\n",
+                       pr_warn("%s: parent(%pOF) of %pOFn not recognized.\n",
                                         __func__,
                                         parent_node,
-                                       of_node_name);
+                                       of_node);
                         of_node_put(parent_node);
                         return NULL;
                 }
                 of_node_put(parent_node);
         } else {
-               pr_warn("%s: could not determine the parent of node %s.\n",
-                               __func__, of_node_name);
+               pr_warn("%s: could not determine the parent of node %pOFn.\n",
+                               __func__, of_node);
                 return NULL;
         }
  
         if (family == PFO) {
                 if (of_get_property(of_node, "interrupt-controller", NULL)) {
-                       pr_debug("%s: Skipping the interrupt controller %s.\n",
-                                       __func__, of_node_name);
+                       pr_debug("%s: Skipping the interrupt controller %pOFn.\n",
+                                       __func__, of_node);
                         return NULL;
                 }
         }
@@ -1399,15 +1398,15 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
                 if (of_node->type != NULL)
                         viodev->type = of_node->type;
                 else {
-                       pr_warn("%s: node %s is missing the 'device_type' "
-                                       "property.\n", __func__, of_node_name);
+                       pr_warn("%s: node %pOFn is missing the 'device_type' "
+                                       "property.\n", __func__, of_node);
                         goto out;
                 }
  
                 prop = of_get_property(of_node, "reg", NULL);
                 if (prop == NULL) {
-                       pr_warn("%s: node %s missing 'reg'\n",
-                                       __func__, of_node_name);
+                       pr_warn("%s: node %pOFn missing 'reg'\n",
+                                       __func__, of_node);
                         goto out;
                 }
                 unit_address = of_read_number(prop, 1);
@@ -1422,8 +1421,8 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
                 if (prop != NULL)
                         viodev->resource_id = of_read_number(prop, 1);
  
-               dev_set_name(&viodev->dev, "%s", of_node_name);
-               viodev->type = of_node_name;
+               dev_set_name(&viodev->dev, "%pOFn", of_node);
+               viodev->type = dev_name(&viodev->dev);
                 viodev->irq = 0;
         }
  
@@ -1694,7 +1693,7 @@ struct vio_dev *vio_find_node(struct device_node *vnode)
                 snprintf(kobj_name, sizeof(kobj_name), "%x",
                          (uint32_t)of_read_number(prop, 1));
         } else if (!strcmp(dev_type, "ibm,platform-facilities"))
-               snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name);
+               snprintf(kobj_name, sizeof(kobj_name), "%pOFn", vnode);
         else
                 return NULL;
  
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig

index bcef2ac..e0dbec7 100644 (file)
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -6,19 +6,16 @@
  config PPC4xx_PCI_EXPRESS
         bool
         depends on PCI && 4xx
-       default n
  
  config PPC4xx_HSTA_MSI
         bool
         depends on PCI_MSI
         depends on PCI && 4xx
-       default n
  
  config PPC4xx_MSI
         bool
         depends on PCI_MSI
         depends on PCI && 4xx
-       default n
  
  config PPC_MSI_BITMAP
         bool
@@ -37,11 +34,9 @@ config PPC_SCOM
  config SCOM_DEBUGFS
         bool "Expose SCOM controllers via debugfs"
         depends on PPC_SCOM && DEBUG_FS
-       default n
  
  config GE_FPGA
         bool
-       default n
  
  config FSL_CORENET_RCPM
         bool
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile

index f730539..2caa4de 100644 (file)
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -1,5 +1,4 @@
  # SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
  
  ccflags-$(CONFIG_PPC64)                := $(NO_MINIMAL_TOC)
  
@@ -56,8 +55,6 @@ obj-$(CONFIG_PPC_SCOM)                += scom.o
  
  obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS)  += udbg_memcons.o
  
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
  obj-$(CONFIG_PPC_XICS)         += xics/
  obj-$(CONFIG_PPC_XIVE)         += xive/
  
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c

index 00ccf3e..15cbdd4 100644 (file)
--- a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
@@ -107,11 +107,11 @@ int __init instantiate_cache_sram(struct platform_device *dev,
                 goto out_free;
         }
  
-       cache_sram->base_virt = ioremap_prot(cache_sram->base_phys,
-                               cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL);
+       cache_sram->base_virt = ioremap_coherent(cache_sram->base_phys,
+                                                cache_sram->size);
         if (!cache_sram->base_virt) {
-               dev_err(&dev->dev, "%pOF: ioremap_prot failed\n",
-                               dev->dev.of_node);
+               dev_err(&dev->dev, "%pOF: ioremap_coherent failed\n",
+                       dev->dev.of_node);
                 ret = -ENOMEM;
                 goto out_release;
         }
diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c

index 535cf1f..6300123 100644 (file)
--- a/arch/powerpc/sysdev/ipic.c
+++ b/arch/powerpc/sysdev/ipic.c
@@ -846,7 +846,7 @@ void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq)
  
  u32 ipic_get_mcp_status(void)
  {
-       return ipic_read(primary_ipic->regs, IPIC_SERSR);
+       return primary_ipic ? ipic_read(primary_ipic->regs, IPIC_SERSR) : 0;
  }
  
  void ipic_clear_mcp_status(u32 mask)
diff --git a/arch/powerpc/sysdev/xics/Makefile b/arch/powerpc/sysdev/xics/Makefile

index 5d438d9..ba1e311 100644 (file)
--- a/arch/powerpc/sysdev/xics/Makefile
+++ b/arch/powerpc/sysdev/xics/Makefile
@@ -1,5 +1,4 @@
  # SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
  
  obj-y                          += xics-common.o
  obj-$(CONFIG_PPC_ICP_NATIVE)   += icp-native.o
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig

index 70ee976..785c292 100644 (file)
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -1,17 +1,14 @@
  # SPDX-License-Identifier: GPL-2.0
  config PPC_XIVE
         bool
-       default n
         select PPC_SMP_MUXED_IPI
         select HARDIRQS_SW_RESEND
  
  config PPC_XIVE_NATIVE
         bool
-       default n
         select PPC_XIVE
         depends on PPC_POWERNV
  
  config PPC_XIVE_SPAPR
         bool
-       default n
         select PPC_XIVE
diff --git a/arch/powerpc/sysdev/xive/Makefile b/arch/powerpc/sysdev/xive/Makefile

index 536d6e5..dea2abc 100644 (file)
--- a/arch/powerpc/sysdev/xive/Makefile
+++ b/arch/powerpc/sysdev/xive/Makefile
@@ -1,4 +1,3 @@
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
  
  obj-y                          += common.o
  obj-$(CONFIG_PPC_XIVE_NATIVE)  += native.o
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c

index 959a2a6..9824074 100644 (file)
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1010,12 +1010,13 @@ static void xive_ipi_eoi(struct irq_data *d)
  {
         struct xive_cpu *xc = __this_cpu_read(xive_cpu);
  
-       DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
-                   d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
-
         /* Handle possible race with unplug and drop stale IPIs */
         if (!xc)
                 return;
+
+       DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
+                   d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
+
         xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data);
         xive_do_queue_eoi(xc);
  }
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c

index 5b20a67..1ca127d 100644 (file)
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -238,20 +238,11 @@ static bool xive_native_match(struct device_node *node)
  #ifdef CONFIG_SMP
  static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
  {
-       struct device_node *np;
-       unsigned int chip_id;
         s64 irq;
  
-       /* Find the chip ID */
-       np = of_get_cpu_node(cpu, NULL);
-       if (np) {
-               if (of_property_read_u32(np, "ibm,chip-id", &chip_id) < 0)
-                       chip_id = 0;
-       }
-
         /* Allocate an IPI and populate info about it */
         for (;;) {
-               irq = opal_xive_allocate_irq(chip_id);
+               irq = opal_xive_allocate_irq(xc->chip_id);
                 if (irq == OPAL_BUSY) {
                         msleep(OPAL_BUSY_DELAY_MS);
                         continue;
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile

index 1bc3abb..69e7fb4 100644 (file)
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -1,14 +1,15 @@
  # SPDX-License-Identifier: GPL-2.0
  # Makefile for xmon
  
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+# Disable clang warning for using setjmp without setjmp.h header
+subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
  
  GCOV_PROFILE := n
  UBSAN_SANITIZE := n
  
  # Disable ftrace for the entire directory
  ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst -mno-sched-epilog,,$(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)))
+KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
  
  ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
  
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c

index 4264aed..36b8dc4 100644 (file)
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2378,25 +2378,33 @@ static void dump_one_paca(int cpu)
         DUMP(p, cpu_start, "%#-*x");
         DUMP(p, kexec_state, "%#-*x");
  #ifdef CONFIG_PPC_BOOK3S_64
-       for (i = 0; i < SLB_NUM_BOLTED; i++) {
-               u64 esid, vsid;
+       if (!early_radix_enabled()) {
+               for (i = 0; i < SLB_NUM_BOLTED; i++) {
+                       u64 esid, vsid;
  
-               if (!p->slb_shadow_ptr)
-                       continue;
+                       if (!p->slb_shadow_ptr)
+                               continue;
+
+                       esid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].esid);
+                       vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid);
  
-               esid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].esid);
-               vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid);
+                       if (esid || vsid) {
+                               printf(" %-*s[%d] = 0x%016llx 0x%016llx\n",
+                                      22, "slb_shadow", i, esid, vsid);
+                       }
+               }
+               DUMP(p, vmalloc_sllp, "%#-*x");
+               DUMP(p, stab_rr, "%#-*x");
+               DUMP(p, slb_used_bitmap, "%#-*x");
+               DUMP(p, slb_kern_bitmap, "%#-*x");
  
-               if (esid || vsid) {
-                       printf(" %-*s[%d] = 0x%016llx 0x%016llx\n",
-                              22, "slb_shadow", i, esid, vsid);
+               if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+                       DUMP(p, slb_cache_ptr, "%#-*x");
+                       for (i = 0; i < SLB_CACHE_ENTRIES; i++)
+                               printf(" %-*s[%d] = 0x%016x\n",
+                                      22, "slb_cache", i, p->slb_cache[i]);
                 }
         }
-       DUMP(p, vmalloc_sllp, "%#-*x");
-       DUMP(p, slb_cache_ptr, "%#-*x");
-       for (i = 0; i < SLB_CACHE_ENTRIES; i++)
-               printf(" %-*s[%d] = 0x%016x\n",
-                      22, "slb_cache", i, p->slb_cache[i]);
  
         DUMP(p, rfi_flush_fallback_area, "%-*px");
  #endif
@@ -2412,7 +2420,9 @@ static void dump_one_paca(int cpu)
         DUMP(p, __current, "%-*px");
         DUMP(p, kstack, "%#-*llx");
         printf(" %-*s = 0x%016llx\n", 25, "kstack_base", p->kstack & ~(THREAD_SIZE - 1));
-       DUMP(p, stab_rr, "%#-*llx");
+#ifdef CONFIG_STACKPROTECTOR
+       DUMP(p, canary, "%#-*lx");
+#endif
         DUMP(p, saved_r1, "%#-*llx");
         DUMP(p, trap_save, "%#-*x");
         DUMP(p, irq_soft_mask, "%#-*x");
@@ -2444,11 +2454,15 @@ static void dump_one_paca(int cpu)
  
         DUMP(p, accounting.utime, "%#-*lx");
         DUMP(p, accounting.stime, "%#-*lx");
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
         DUMP(p, accounting.utime_scaled, "%#-*lx");
+#endif
         DUMP(p, accounting.starttime, "%#-*lx");
         DUMP(p, accounting.starttime_user, "%#-*lx");
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
         DUMP(p, accounting.startspurr, "%#-*lx");
         DUMP(p, accounting.utime_sspurr, "%#-*lx");
+#endif
         DUMP(p, accounting.steal_time, "%#-*lx");
  #undef DUMP
  
@@ -2988,15 +3002,17 @@ static void show_task(struct task_struct *tsk)
  #ifdef CONFIG_PPC_BOOK3S_64
  void format_pte(void *ptep, unsigned long pte)
  {
+       pte_t entry = __pte(pte);
+
         printf("ptep @ 0x%016lx = 0x%016lx\n", (unsigned long)ptep, pte);
         printf("Maps physical address = 0x%016lx\n", pte & PTE_RPN_MASK);
  
         printf("Flags = %s%s%s%s%s\n",
-              (pte & _PAGE_ACCESSED) ? "Accessed " : "",
-              (pte & _PAGE_DIRTY)    ? "Dirty " : "",
-              (pte & _PAGE_READ)     ? "Read " : "",
-              (pte & _PAGE_WRITE)    ? "Write " : "",
-              (pte & _PAGE_EXEC)     ? "Exec " : "");
+              pte_young(entry) ? "Accessed " : "",
+              pte_dirty(entry) ? "Dirty " : "",
+              pte_read(entry)  ? "Read " : "",
+              pte_write(entry) ? "Write " : "",
+              pte_exec(entry)  ? "Exec " : "");
  }
  
  static void show_pte(unsigned long addr)
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c

index 26789ad..cde370c 100644 (file)
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -64,7 +64,7 @@ static void sh_of_smp_probe(void)
  
         init_cpu_possible(cpumask_of(0));
  
-       for_each_node_by_type(np, "cpu") {
+       for_each_of_cpu_node(np) {
                 const __be32 *cell = of_get_property(np, "reg", NULL);
                 u64 id = -1;
                 if (cell) id = of_read_number(cell, of_n_addr_cells(np));
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h

index f71ef37..316faa0 100644 (file)
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -52,7 +52,12 @@ static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long
         return val;
  }
  
-#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+#define xchg(ptr,x)                                                    \
+({     __typeof__(*(ptr)) __ret;                                       \
+       __ret = (__typeof__(*(ptr)))                                    \
+               __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)));      \
+       __ret;                                                          \
+})
  
  void __xchg_called_with_bad_pointer(void);
  
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h

index d955c8d..1902db2 100644 (file)
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -24,9 +24,6 @@
  #include <linux/atomic.h>
  #include <linux/irqdomain.h>
  
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT        2
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT        1
-
  #define of_compat_cmp(s1, s2, l)       strncmp((s1), (s2), (l))
  #define of_prop_cmp(s1, s2)            strcasecmp((s1), (s2))
  #define of_node_cmp(s1, s2)            strcmp((s1), (s2))
diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h

index 4ff29b1..b1d4e2e 100644 (file)
--- a/arch/sparc/include/asm/switch_to_64.h
+++ b/arch/sparc/include/asm/switch_to_64.h
@@ -67,6 +67,7 @@ do {  save_and_clear_fpu();                                           \
  } while(0)
  
  void synchronize_user_stack(void);
-void fault_in_user_windows(void);
+struct pt_regs;
+void fault_in_user_windows(struct pt_regs *);
  
  #endif /* __SPARC64_SWITCH_TO_64_H */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c

index 6c08608..59eaf62 100644 (file)
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -36,6 +36,7 @@
  #include <linux/sysrq.h>
  #include <linux/nmi.h>
  #include <linux/context_tracking.h>
+#include <linux/signal.h>
  
  #include <linux/uaccess.h>
  #include <asm/page.h>
@@ -521,7 +522,12 @@ static void stack_unaligned(unsigned long sp)
         force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current);
  }
  
-void fault_in_user_windows(void)
+static const char uwfault32[] = KERN_INFO \
+       "%s[%d]: bad register window fault: SP %08lx (orig_sp %08lx) TPC %08lx O7 %08lx\n";
+static const char uwfault64[] = KERN_INFO \
+       "%s[%d]: bad register window fault: SP %016lx (orig_sp %016lx) TPC %08lx O7 %016lx\n";
+
+void fault_in_user_windows(struct pt_regs *regs)
  {
         struct thread_info *t = current_thread_info();
         unsigned long window;
@@ -534,9 +540,9 @@ void fault_in_user_windows(void)
                 do {
                         struct reg_window *rwin = &t->reg_window[window];
                         int winsize = sizeof(struct reg_window);
-                       unsigned long sp;
+                       unsigned long sp, orig_sp;
  
-                       sp = t->rwbuf_stkptrs[window];
+                       orig_sp = sp = t->rwbuf_stkptrs[window];
  
                         if (test_thread_64bit_stack(sp))
                                 sp += STACK_BIAS;
@@ -547,8 +553,16 @@ void fault_in_user_windows(void)
                                 stack_unaligned(sp);
  
                         if (unlikely(copy_to_user((char __user *)sp,
-                                                 rwin, winsize)))
+                                                 rwin, winsize))) {
+                               if (show_unhandled_signals)
+                                       printk_ratelimited(is_compat_task() ?
+                                                          uwfault32 : uwfault64,
+                                                          current->comm, current->pid,
+                                                          sp, orig_sp,
+                                                          regs->tpc,
+                                                          regs->u_regs[UREG_I7]);
                                 goto barf;
+                       }
                 } while (window--);
         }
         set_thread_wsaved(0);
@@ -556,8 +570,7 @@ void fault_in_user_windows(void)
  
  barf:
         set_thread_wsaved(window + 1);
-       user_exit();
-       do_exit(SIGILL);
+       force_sig(SIGSEGV, current);
  }
  
  asmlinkage long sparc_do_fork(unsigned long clone_flags,
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S

index 4073e2b..29aa34f 100644 (file)
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -39,6 +39,7 @@ __handle_preemption:
                  wrpr                   %g0, RTRAP_PSTATE_IRQOFF, %pstate
  
  __handle_user_windows:
+               add                     %sp, PTREGS_OFF, %o0
                 call                    fault_in_user_windows
  661:            wrpr                   %g0, RTRAP_PSTATE, %pstate
                 /* If userspace is using ADI, it could potentially pass
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c

index 44d379d..4c5b3fc 100644 (file)
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -371,7 +371,11 @@ static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs,
                 get_sigframe(ksig, regs, sigframe_size);
         
         if (invalid_frame_pointer(sf, sigframe_size)) {
-               do_exit(SIGILL);
+               if (show_unhandled_signals)
+                       pr_info("%s[%d] bad frame in setup_frame32: %08lx TPC %08lx O7 %08lx\n",
+                               current->comm, current->pid, (unsigned long)sf,
+                               regs->tpc, regs->u_regs[UREG_I7]);
+               force_sigsegv(ksig->sig, current);
                 return -EINVAL;
         }
  
@@ -501,7 +505,11 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs,
                 get_sigframe(ksig, regs, sigframe_size);
         
         if (invalid_frame_pointer(sf, sigframe_size)) {
-               do_exit(SIGILL);
+               if (show_unhandled_signals)
+                       pr_info("%s[%d] bad frame in setup_rt_frame32: %08lx TPC %08lx O7 %08lx\n",
+                               current->comm, current->pid, (unsigned long)sf,
+                               regs->tpc, regs->u_regs[UREG_I7]);
+               force_sigsegv(ksig->sig, current);
                 return -EINVAL;
         }
  
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c

index 48366e5..e9de180 100644 (file)
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -370,7 +370,11 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
                 get_sigframe(ksig, regs, sf_size);
  
         if (invalid_frame_pointer (sf)) {
-               do_exit(SIGILL);        /* won't return, actually */
+               if (show_unhandled_signals)
+                       pr_info("%s[%d] bad frame in setup_rt_frame: %016lx TPC %016lx O7 %016lx\n",
+                               current->comm, current->pid, (unsigned long)sf,
+                               regs->tpc, regs->u_regs[UREG_I7]);
+               force_sigsegv(ksig->sig, current);
                 return -EINVAL;
         }
  
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c

index f396048..39822f6 100644 (file)
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1383,6 +1383,7 @@ int __node_distance(int from, int to)
         }
         return numa_latency[from][to];
  }
+EXPORT_SYMBOL(__node_distance);
  
  static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
  {
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h

index 5f26962..67ed72f 100644 (file)
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -45,6 +45,8 @@ struct vcpu_data {
  
  #ifdef CONFIG_IRQ_REMAP
  
+extern raw_spinlock_t irq_2_ir_lock;
+
  extern bool irq_remapping_cap(enum irq_remap_cap cap);
  extern void set_irq_remapping_broken(void);
  extern int irq_remapping_prepare(void);
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c

index f39f3a0..7299dcb 100644 (file)
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -140,7 +140,7 @@ static void __init dtb_cpu_setup(void)
         int ret;
  
         version = GET_APIC_VERSION(apic_read(APIC_LVR));
-       for_each_node_by_type(dn, "cpu") {
+       for_each_of_cpu_node(dn) {
                 ret = of_property_read_u32(dn, "reg", &apic_id);
                 if (ret < 0) {
                         pr_warn("%pOF: missing local APIC ID\n", dn);
diff --git a/arch/xtensa/Makefile b/arch/xtensa/Makefile

index d67e30f..be060df 100644 (file)
--- a/arch/xtensa/Makefile
+++ b/arch/xtensa/Makefile
@@ -80,28 +80,18 @@ LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
  head-y         := arch/xtensa/kernel/head.o
  core-y         += arch/xtensa/kernel/ arch/xtensa/mm/
  core-y         += $(buildvar) $(buildplf)
+core-y                 += arch/xtensa/boot/dts/
  
  libs-y         += arch/xtensa/lib/ $(LIBGCC)
  drivers-$(CONFIG_OPROFILE)     += arch/xtensa/oprofile/
  
-ifneq ($(CONFIG_BUILTIN_DTB),"")
-core-$(CONFIG_OF) += arch/xtensa/boot/dts/
-endif
-
  boot           := arch/xtensa/boot
  
  all Image zImage uImage: vmlinux
         $(Q)$(MAKE) $(build)=$(boot) $@
  
-%.dtb:
-       $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-dtbs: scripts
-       $(Q)$(MAKE) $(build)=$(boot)/dts
-
  define archhelp
    @echo '* Image       - Kernel ELF image with reset vector'
    @echo '* zImage      - Compressed kernel image (arch/xtensa/boot/images/zImage.*)'
    @echo '* uImage      - U-Boot wrapped image'
-  @echo '  dtbs        - Build device tree blobs for enabled boards'
  endef
diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c

index 42285f3..820e873 100644 (file)
--- a/arch/xtensa/platforms/xtfpga/setup.c
+++ b/arch/xtensa/platforms/xtfpga/setup.c
@@ -94,7 +94,7 @@ static void __init xtfpga_clk_setup(struct device_node *np)
         u32 freq;
  
         if (!base) {
-               pr_err("%s: invalid address\n", np->name);
+               pr_err("%pOFn: invalid address\n", np);
                 return;
         }
  
@@ -103,12 +103,12 @@ static void __init xtfpga_clk_setup(struct device_node *np)
         clk = clk_register_fixed_rate(NULL, np->name, NULL, 0, freq);
  
         if (IS_ERR(clk)) {
-               pr_err("%s: clk registration failed\n", np->name);
+               pr_err("%pOFn: clk registration failed\n", np);
                 return;
         }
  
         if (of_clk_add_provider(np, of_clk_src_simple_get, clk)) {
-               pr_err("%s: clk provider registration failed\n", np->name);
+               pr_err("%pOFn: clk provider registration failed\n", np);
                 return;
         }
  }
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c

index 476b5a9..4b0d5fb 100644 (file)
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -792,24 +792,18 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
                  * queue, remove the entity from its old weight counter (if
                  * there is a counter associated with the entity).
                  */
-               if (prev_weight != new_weight) {
-                       if (bfqq) {
-                               root = &bfqd->queue_weights_tree;
-                               __bfq_weights_tree_remove(bfqd, bfqq, root);
-                       } else
-                               bfqd->num_active_groups--;
+               if (prev_weight != new_weight && bfqq) {
+                       root = &bfqd->queue_weights_tree;
+                       __bfq_weights_tree_remove(bfqd, bfqq, root);
                 }
                 entity->weight = new_weight;
                 /*
                  * Add the entity, if it is not a weight-raised queue,
                  * to the counter associated with its new weight.
                  */
-               if (prev_weight != new_weight) {
-                       if (bfqq && bfqq->wr_coeff == 1) {
-                               /* If we get here, root has been initialized. */
-                               bfq_weights_tree_add(bfqd, bfqq, root);
-                       } else
-                               bfqd->num_active_groups++;
+               if (prev_weight != new_weight && bfqq && bfqq->wr_coeff == 1) {
+                       /* If we get here, root has been initialized. */
+                       bfq_weights_tree_add(bfqd, bfqq, root);
                 }
  
                 new_st->wsum += entity->weight;
diff --git a/block/blk-core.c b/block/blk-core.c

index 3ed6072..bc6ea87 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2300,7 +2300,6 @@ generic_make_request_checks(struct bio *bio)
                 if (!q->limits.max_write_same_sectors)
                         goto not_supported;
                 break;
-       case REQ_OP_ZONE_REPORT:
         case REQ_OP_ZONE_RESET:
                 if (!blk_queue_is_zoned(q))
                         goto not_supported;
diff --git a/block/blk-lib.c b/block/blk-lib.c

index bbd4466..76f867e 100644 (file)
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -10,8 +10,7 @@
  
  #include "blk.h"
  
-static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
-               gfp_t gfp)
+struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
  {
         struct bio *new = bio_alloc(gfp, nr_pages);
  
@@ -63,7 +62,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
  
                 end_sect = sector + req_sects;
  
-               bio = next_bio(bio, 0, gfp_mask);
+               bio = blk_next_bio(bio, 0, gfp_mask);
                 bio->bi_iter.bi_sector = sector;
                 bio_set_dev(bio, bdev);
                 bio_set_op_attrs(bio, op, 0);
@@ -165,7 +164,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
         max_write_same_sectors = UINT_MAX >> 9;
  
         while (nr_sects) {
-               bio = next_bio(bio, 1, gfp_mask);
+               bio = blk_next_bio(bio, 1, gfp_mask);
                 bio->bi_iter.bi_sector = sector;
                 bio_set_dev(bio, bdev);
                 bio->bi_vcnt = 1;
@@ -241,7 +240,7 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
                 return -EOPNOTSUPP;
  
         while (nr_sects) {
-               bio = next_bio(bio, 0, gfp_mask);
+               bio = blk_next_bio(bio, 0, gfp_mask);
                 bio->bi_iter.bi_sector = sector;
                 bio_set_dev(bio, bdev);
                 bio->bi_opf = REQ_OP_WRITE_ZEROES;
@@ -292,8 +291,8 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
                 return -EPERM;
  
         while (nr_sects != 0) {
-               bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
-                              gfp_mask);
+               bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
+                                  gfp_mask);
                 bio->bi_iter.bi_sector = sector;
                 bio_set_dev(bio, bdev);
                 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c

index 41b86f5..10b284a 100644 (file)
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -283,7 +283,6 @@ static const char *const op_name[] = {
         REQ_OP_NAME(WRITE),
         REQ_OP_NAME(FLUSH),
         REQ_OP_NAME(DISCARD),
-       REQ_OP_NAME(ZONE_REPORT),
         REQ_OP_NAME(SECURE_ERASE),
         REQ_OP_NAME(ZONE_RESET),
         REQ_OP_NAME(WRITE_SAME),
diff --git a/block/blk-mq.c b/block/blk-mq.c

index dcf10e3..3f91c6e 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1850,8 +1850,6 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
  
         rq_qos_throttle(q, bio, NULL);
  
-       trace_block_getrq(q, bio, bio->bi_opf);
-
         rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
         if (unlikely(!rq)) {
                 rq_qos_cleanup(q, bio);
@@ -1860,6 +1858,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
                 return BLK_QC_T_NONE;
         }
  
+       trace_block_getrq(q, bio, bio->bi_opf);
+
         rq_qos_track(q, rq, bio);
  
         cookie = request_to_qc_t(data.hctx, rq);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index 3772671..0641533 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -300,6 +300,11 @@ static ssize_t queue_zoned_show(struct request_queue *q, char *page)
         }
  }
  
+static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
+{
+       return queue_var_show(blk_queue_nr_zones(q), page);
+}
+
  static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
  {
         return queue_var_show((blk_queue_nomerges(q) << 1) |
@@ -637,6 +642,11 @@ static struct queue_sysfs_entry queue_zoned_entry = {
         .show = queue_zoned_show,
  };
  
+static struct queue_sysfs_entry queue_nr_zones_entry = {
+       .attr = {.name = "nr_zones", .mode = 0444 },
+       .show = queue_nr_zones_show,
+};
+
  static struct queue_sysfs_entry queue_nomerges_entry = {
         .attr = {.name = "nomerges", .mode = 0644 },
         .show = queue_nomerges_show,
@@ -727,6 +737,7 @@ static struct attribute *default_attrs[] = {
         &queue_write_zeroes_max_entry.attr,
         &queue_nonrot_entry.attr,
         &queue_zoned_entry.attr,
+       &queue_nr_zones_entry.attr,
         &queue_nomerges_entry.attr,
         &queue_rq_affinity_entry.attr,
         &queue_iostats_entry.attr,
@@ -841,6 +852,8 @@ static void __blk_release_queue(struct work_struct *work)
         if (q->queue_tags)
                 __blk_queue_free_tags(q);
  
+       blk_queue_free_zone_bitmaps(q);
+
         if (!q->mq_ops) {
                 if (q->exit_rq_fn)
                         q->exit_rq_fn(q, q->fq->flush_rq);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c

index c461cf6..13ba201 100644 (file)
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -12,6 +12,9 @@
  #include <linux/module.h>
  #include <linux/rbtree.h>
  #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+
+#include "blk.h"
  
  static inline sector_t blk_zone_start(struct request_queue *q,
                                       sector_t sector)
@@ -63,14 +66,38 @@ void __blk_req_zone_write_unlock(struct request *rq)
  }
  EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
  
+static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
+                                            sector_t nr_sectors)
+{
+       unsigned long zone_sectors = blk_queue_zone_sectors(q);
+
+       return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
+}
+
+/**
+ * blkdev_nr_zones - Get number of zones
+ * @bdev:      Target block device
+ *
+ * Description:
+ *    Return the total number of zones of a zoned block device.
+ *    For a regular block device, the number of zones is always 0.
+ */
+unsigned int blkdev_nr_zones(struct block_device *bdev)
+{
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       if (!blk_queue_is_zoned(q))
+               return 0;
+
+       return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
+}
+EXPORT_SYMBOL_GPL(blkdev_nr_zones);
+
  /*
- * Check that a zone report belongs to the partition.
- * If yes, fix its start sector and write pointer, copy it in the
- * zone information array and return true. Return false otherwise.
+ * Check that a zone report belongs to this partition, and if yes, fix its start
+ * sector and write pointer and return true. Return false otherwise.
   */
-static bool blkdev_report_zone(struct block_device *bdev,
-                              struct blk_zone *rep,
-                              struct blk_zone *zone)
+static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep)
  {
         sector_t offset = get_start_sect(bdev);
  
@@ -85,11 +112,36 @@ static bool blkdev_report_zone(struct block_device *bdev,
                 rep->wp = rep->start + rep->len;
         else
                 rep->wp -= offset;
-       memcpy(zone, rep, sizeof(struct blk_zone));
-
         return true;
  }
  
+static int blk_report_zones(struct gendisk *disk, sector_t sector,
+                           struct blk_zone *zones, unsigned int *nr_zones,
+                           gfp_t gfp_mask)
+{
+       struct request_queue *q = disk->queue;
+       unsigned int z = 0, n, nrz = *nr_zones;
+       sector_t capacity = get_capacity(disk);
+       int ret;
+
+       while (z < nrz && sector < capacity) {
+               n = nrz - z;
+               ret = disk->fops->report_zones(disk, sector, &zones[z], &n,
+                                              gfp_mask);
+               if (ret)
+                       return ret;
+               if (!n)
+                       break;
+               sector += blk_queue_zone_sectors(q) * n;
+               z += n;
+       }
+
+       WARN_ON(z > *nr_zones);
+       *nr_zones = z;
+
+       return 0;
+}
+
  /**
   * blkdev_report_zones - Get zones information
   * @bdev:      Target block device
@@ -104,130 +156,46 @@ static bool blkdev_report_zone(struct block_device *bdev,
   *    requested by @nr_zones. The number of zones actually reported is
   *    returned in @nr_zones.
   */
-int blkdev_report_zones(struct block_device *bdev,
-                       sector_t sector,
-                       struct blk_zone *zones,
-                       unsigned int *nr_zones,
+int blkdev_report_zones(struct block_device *bdev, sector_t sector,
+                       struct blk_zone *zones, unsigned int *nr_zones,
                         gfp_t gfp_mask)
  {
         struct request_queue *q = bdev_get_queue(bdev);
-       struct blk_zone_report_hdr *hdr;
-       unsigned int nrz = *nr_zones;
-       struct page *page;
-       unsigned int nr_rep;
-       size_t rep_bytes;
-       unsigned int nr_pages;
-       struct bio *bio;
-       struct bio_vec *bv;
-       unsigned int i, n, nz;
-       unsigned int ofst;
-       void *addr;
+       unsigned int i, nrz;
         int ret;
  
-       if (!q)
-               return -ENXIO;
-
         if (!blk_queue_is_zoned(q))
                 return -EOPNOTSUPP;
  
-       if (!nrz)
-               return 0;
-
-       if (sector > bdev->bd_part->nr_sects) {
-               *nr_zones = 0;
-               return 0;
-       }
-
         /*
-        * The zone report has a header. So make room for it in the
-        * payload. Also make sure that the report fits in a single BIO
-        * that will not be split down the stack.
+        * A block device that advertized itself as zoned must have a
+        * report_zones method. If it does not have one defined, the device
+        * driver has a bug. So warn about that.
          */
-       rep_bytes = sizeof(struct blk_zone_report_hdr) +
-               sizeof(struct blk_zone) * nrz;
-       rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
-       if (rep_bytes > (queue_max_sectors(q) << 9))
-               rep_bytes = queue_max_sectors(q) << 9;
-
-       nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
-                        rep_bytes >> PAGE_SHIFT);
-       nr_pages = min_t(unsigned int, nr_pages,
-                        queue_max_segments(q));
-
-       bio = bio_alloc(gfp_mask, nr_pages);
-       if (!bio)
-               return -ENOMEM;
-
-       bio_set_dev(bio, bdev);
-       bio->bi_iter.bi_sector = blk_zone_start(q, sector);
-       bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
+       if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zones))
+               return -EOPNOTSUPP;
  
-       for (i = 0; i < nr_pages; i++) {
-               page = alloc_page(gfp_mask);
-               if (!page) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
-               if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
-                       __free_page(page);
-                       break;
-               }
+       if (!*nr_zones || sector >= bdev->bd_part->nr_sects) {
+               *nr_zones = 0;
+               return 0;
         }
  
-       if (i == 0)
-               ret = -ENOMEM;
-       else
-               ret = submit_bio_wait(bio);
+       nrz = min(*nr_zones,
+                 __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector));
+       ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector,
+                              zones, &nrz, gfp_mask);
         if (ret)
-               goto out;
-
-       /*
-        * Process the report result: skip the header and go through the
-        * reported zones to fixup and fixup the zone information for
-        * partitions. At the same time, return the zone information into
-        * the zone array.
-        */
-       n = 0;
-       nz = 0;
-       nr_rep = 0;
-       bio_for_each_segment_all(bv, bio, i) {
-
-               if (!bv->bv_page)
-                       break;
-
-               addr = kmap_atomic(bv->bv_page);
-
-               /* Get header in the first page */
-               ofst = 0;
-               if (!nr_rep) {
-                       hdr = addr;
-                       nr_rep = hdr->nr_zones;
-                       ofst = sizeof(struct blk_zone_report_hdr);
-               }
-
-               /* Fixup and report zones */
-               while (ofst < bv->bv_len &&
-                      n < nr_rep && nz < nrz) {
-                       if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
-                               nz++;
-                       ofst += sizeof(struct blk_zone);
-                       n++;
-               }
-
-               kunmap_atomic(addr);
+               return ret;
  
-               if (n >= nr_rep || nz >= nrz)
+       for (i = 0; i < nrz; i++) {
+               if (!blkdev_report_zone(bdev, zones))
                         break;
-
+               zones++;
         }
  
-       *nr_zones = nz;
-out:
-       bio_for_each_segment_all(bv, bio, i)
-               __free_page(bv->bv_page);
-       bio_put(bio);
+       *nr_zones = i;
  
-       return ret;
+       return 0;
  }
  EXPORT_SYMBOL_GPL(blkdev_report_zones);
  
@@ -250,16 +218,17 @@ int blkdev_reset_zones(struct block_device *bdev,
         struct request_queue *q = bdev_get_queue(bdev);
         sector_t zone_sectors;
         sector_t end_sector = sector + nr_sectors;
-       struct bio *bio;
+       struct bio *bio = NULL;
+       struct blk_plug plug;
         int ret;
  
-       if (!q)
-               return -ENXIO;
-
         if (!blk_queue_is_zoned(q))
                 return -EOPNOTSUPP;
  
-       if (end_sector > bdev->bd_part->nr_sects)
+       if (bdev_read_only(bdev))
+               return -EPERM;
+
+       if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
                 /* Out of range */
                 return -EINVAL;
  
@@ -272,19 +241,14 @@ int blkdev_reset_zones(struct block_device *bdev,
             end_sector != bdev->bd_part->nr_sects)
                 return -EINVAL;
  
+       blk_start_plug(&plug);
         while (sector < end_sector) {
  
-               bio = bio_alloc(gfp_mask, 0);
+               bio = blk_next_bio(bio, 0, gfp_mask);
                 bio->bi_iter.bi_sector = sector;
                 bio_set_dev(bio, bdev);
                 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
  
-               ret = submit_bio_wait(bio);
-               bio_put(bio);
-
-               if (ret)
-                       return ret;
-
                 sector += zone_sectors;
  
                 /* This may take a while, so be nice to others */
@@ -292,7 +256,12 @@ int blkdev_reset_zones(struct block_device *bdev,
  
         }
  
-       return 0;
+       ret = submit_bio_wait(bio);
+       bio_put(bio);
+
+       blk_finish_plug(&plug);
+
+       return ret;
  }
  EXPORT_SYMBOL_GPL(blkdev_reset_zones);
  
@@ -328,8 +297,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
         if (!rep.nr_zones)
                 return -EINVAL;
  
-       if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
-               return -ERANGE;
+       rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
  
         zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
                                GFP_KERNEL | __GFP_ZERO);
@@ -392,3 +360,138 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
         return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
                                   GFP_KERNEL);
  }
+
+static inline unsigned long *blk_alloc_zone_bitmap(int node,
+                                                  unsigned int nr_zones)
+{
+       return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
+                           GFP_NOIO, node);
+}
+
+/*
+ * Allocate an array of struct blk_zone to get nr_zones zone information.
+ * The allocated array may be smaller than nr_zones.
+ */
+static struct blk_zone *blk_alloc_zones(int node, unsigned int *nr_zones)
+{
+       size_t size = *nr_zones * sizeof(struct blk_zone);
+       struct page *page;
+       int order;
+
+       for (order = get_order(size); order > 0; order--) {
+               page = alloc_pages_node(node, GFP_NOIO | __GFP_ZERO, order);
+               if (page) {
+                       *nr_zones = min_t(unsigned int, *nr_zones,
+                               (PAGE_SIZE << order) / sizeof(struct blk_zone));
+                       return page_address(page);
+               }
+       }
+
+       return NULL;
+}
+
+void blk_queue_free_zone_bitmaps(struct request_queue *q)
+{
+       kfree(q->seq_zones_bitmap);
+       q->seq_zones_bitmap = NULL;
+       kfree(q->seq_zones_wlock);
+       q->seq_zones_wlock = NULL;
+}
+
+/**
+ * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
+ * @disk:      Target disk
+ *
+ * Helper function for low-level device drivers to (re) allocate and initialize
+ * a disk request queue zone bitmaps. This functions should normally be called
+ * within the disk ->revalidate method. For BIO based queues, no zone bitmap
+ * is allocated.
+ */
+int blk_revalidate_disk_zones(struct gendisk *disk)
+{
+       struct request_queue *q = disk->queue;
+       unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
+       unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
+       unsigned int i, rep_nr_zones = 0, z = 0, nrz;
+       struct blk_zone *zones = NULL;
+       sector_t sector = 0;
+       int ret = 0;
+
+       /*
+        * BIO based queues do not use a scheduler so only q->nr_zones
+        * needs to be updated so that the sysfs exposed value is correct.
+        */
+       if (!queue_is_rq_based(q)) {
+               q->nr_zones = nr_zones;
+               return 0;
+       }
+
+       if (!blk_queue_is_zoned(q) || !nr_zones) {
+               nr_zones = 0;
+               goto update;
+       }
+
+       /* Allocate bitmaps */
+       ret = -ENOMEM;
+       seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
+       if (!seq_zones_wlock)
+               goto out;
+       seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
+       if (!seq_zones_bitmap)
+               goto out;
+
+       /* Get zone information and initialize seq_zones_bitmap */
+       rep_nr_zones = nr_zones;
+       zones = blk_alloc_zones(q->node, &rep_nr_zones);
+       if (!zones)
+               goto out;
+
+       while (z < nr_zones) {
+               nrz = min(nr_zones - z, rep_nr_zones);
+               ret = blk_report_zones(disk, sector, zones, &nrz, GFP_NOIO);
+               if (ret)
+                       goto out;
+               if (!nrz)
+                       break;
+               for (i = 0; i < nrz; i++) {
+                       if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
+                               set_bit(z, seq_zones_bitmap);
+                       z++;
+               }
+               sector += nrz * blk_queue_zone_sectors(q);
+       }
+
+       if (WARN_ON(z != nr_zones)) {
+               ret = -EIO;
+               goto out;
+       }
+
+update:
+       /*
+        * Install the new bitmaps, making sure the queue is stopped and
+        * all I/Os are completed (i.e. a scheduler is not referencing the
+        * bitmaps).
+        */
+       blk_mq_freeze_queue(q);
+       q->nr_zones = nr_zones;
+       swap(q->seq_zones_wlock, seq_zones_wlock);
+       swap(q->seq_zones_bitmap, seq_zones_bitmap);
+       blk_mq_unfreeze_queue(q);
+
+out:
+       free_pages((unsigned long)zones,
+                  get_order(rep_nr_zones * sizeof(struct blk_zone)));
+       kfree(seq_zones_wlock);
+       kfree(seq_zones_bitmap);
+
+       if (ret) {
+               pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
+               blk_mq_freeze_queue(q);
+               blk_queue_free_zone_bitmaps(q);
+               blk_mq_unfreeze_queue(q);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
+
diff --git a/block/blk.h b/block/blk.h

index 3d2aecb..a1841b8 100644 (file)
--- a/block/blk.h
+++ b/block/blk.h
@@ -488,4 +488,12 @@ extern int blk_iolatency_init(struct request_queue *q);
  static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
  #endif
  
+struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+void blk_queue_free_zone_bitmaps(struct request_queue *q);
+#else
+static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
+#endif
+
  #endif /* BLK_INTERNAL_H */
diff --git a/block/ioctl.c b/block/ioctl.c

index 3884d81..4825c78 100644 (file)
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -532,6 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
                 return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
         case BLKRESETZONE:
                 return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
+       case BLKGETZONESZ:
+               return put_uint(arg, bdev_zone_sectors(bdev));
+       case BLKGETNRZONES:
+               return put_uint(arg, blkdev_nr_zones(bdev));
         case HDIO_GETGEO:
                 return blkdev_getgeo(bdev, argp);
         case BLKRAGET:
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c

index a9dd4ea..6e59464 100644 (file)
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4553,6 +4553,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
         /* These specific Samsung models/firmware-revs do not handle LPM well */
         { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
         { "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM, },
+       { "SAMSUNG MZ7TD256HAFV-000L9", "DXT02L5Q", ATA_HORKAGE_NOLPM, },
  
         /* devices that don't properly handle queued TRIM commands */
         { "Micron_M500IT_*",            "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h

index 34e0030..7685df4 100644 (file)
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -87,7 +87,9 @@ struct nullb {
  #ifdef CONFIG_BLK_DEV_ZONED
  int null_zone_init(struct nullb_device *dev);
  void null_zone_exit(struct nullb_device *dev);
-blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio);
+int null_zone_report(struct gendisk *disk, sector_t sector,
+                    struct blk_zone *zones, unsigned int *nr_zones,
+                    gfp_t gfp_mask);
  void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
                         unsigned int nr_sectors);
  void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
@@ -97,10 +99,11 @@ static inline int null_zone_init(struct nullb_device *dev)
         return -EINVAL;
  }
  static inline void null_zone_exit(struct nullb_device *dev) {}
-static inline blk_status_t null_zone_report(struct nullb *nullb,
-                                           struct bio *bio)
+static inline int null_zone_report(struct gendisk *disk, sector_t sector,
+                                  struct blk_zone *zones,
+                                  unsigned int *nr_zones, gfp_t gfp_mask)
  {
-       return BLK_STS_NOTSUPP;
+       return -EOPNOTSUPP;
  }
  static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
                                    unsigned int nr_sectors)
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c

index e945910..0933920 100644 (file)
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1129,34 +1129,12 @@ static void null_restart_queue_async(struct nullb *nullb)
                 blk_mq_start_stopped_hw_queues(q, true);
  }
  
-static bool cmd_report_zone(struct nullb *nullb, struct nullb_cmd *cmd)
-{
-       struct nullb_device *dev = cmd->nq->dev;
-
-       if (dev->queue_mode == NULL_Q_BIO) {
-               if (bio_op(cmd->bio) == REQ_OP_ZONE_REPORT) {
-                       cmd->error = null_zone_report(nullb, cmd->bio);
-                       return true;
-               }
-       } else {
-               if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) {
-                       cmd->error = null_zone_report(nullb, cmd->rq->bio);
-                       return true;
-               }
-       }
-
-       return false;
-}
-
  static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
  {
         struct nullb_device *dev = cmd->nq->dev;
         struct nullb *nullb = dev->nullb;
         int err = 0;
  
-       if (cmd_report_zone(nullb, cmd))
-               goto out;
-
         if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
                 struct request *rq = cmd->rq;
  
@@ -1443,6 +1421,7 @@ static const struct block_device_operations null_fops = {
         .owner =        THIS_MODULE,
         .open =         null_open,
         .release =      null_release,
+       .report_zones = null_zone_report,
  };
  
  static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
@@ -1549,6 +1528,13 @@ static int null_gendisk_register(struct nullb *nullb)
         disk->queue             = nullb->q;
         strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
  
+       if (nullb->dev->zoned) {
+               int ret = blk_revalidate_disk_zones(disk);
+
+               if (ret != 0)
+                       return ret;
+       }
+
         add_disk(disk);
         return 0;
  }
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c

index 7c6b86d..c0b0e4a 100644 (file)
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -48,54 +48,27 @@ void null_zone_exit(struct nullb_device *dev)
         kvfree(dev->zones);
  }
  
-static void null_zone_fill_bio(struct nullb_device *dev, struct bio *bio,
-                              unsigned int zno, unsigned int nr_zones)
+int null_zone_report(struct gendisk *disk, sector_t sector,
+                    struct blk_zone *zones, unsigned int *nr_zones,
+                    gfp_t gfp_mask)
  {
-       struct blk_zone_report_hdr *hdr = NULL;
-       struct bio_vec bvec;
-       struct bvec_iter iter;
-       void *addr;
-       unsigned int zones_to_cpy;
-
-       bio_for_each_segment(bvec, bio, iter) {
-               addr = kmap_atomic(bvec.bv_page);
-
-               zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone);
-
-               if (!hdr) {
-                       hdr = (struct blk_zone_report_hdr *)addr;
-                       hdr->nr_zones = nr_zones;
-                       zones_to_cpy--;
-                       addr += sizeof(struct blk_zone_report_hdr);
-               }
-
-               zones_to_cpy = min_t(unsigned int, zones_to_cpy, nr_zones);
-
-               memcpy(addr, &dev->zones[zno],
-                               zones_to_cpy * sizeof(struct blk_zone));
-
-               kunmap_atomic(addr);
+       struct nullb *nullb = disk->private_data;
+       struct nullb_device *dev = nullb->dev;
+       unsigned int zno, nrz = 0;
  
-               nr_zones -= zones_to_cpy;
-               zno += zones_to_cpy;
+       if (!dev->zoned)
+               /* Not a zoned null device */
+               return -EOPNOTSUPP;
  
-               if (!nr_zones)
-                       break;
+       zno = null_zone_no(dev, sector);
+       if (zno < dev->nr_zones) {
+               nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno);
+               memcpy(zones, &dev->zones[zno], nrz * sizeof(struct blk_zone));
         }
-}
  
-blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio)
-{
-       struct nullb_device *dev = nullb->dev;
-       unsigned int zno = null_zone_no(dev, bio->bi_iter.bi_sector);
-       unsigned int nr_zones = dev->nr_zones - zno;
-       unsigned int max_zones;
+       *nr_zones = nrz;
  
-       max_zones = (bio->bi_iter.bi_size / sizeof(struct blk_zone)) - 1;
-       nr_zones = min_t(unsigned int, nr_zones, max_zones);
-       null_zone_fill_bio(nullb->dev, bio, zno, nr_zones);
-
-       return BLK_STS_OK;
+       return 0;
  }
  
  void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c

index 7c5fc69..2459dcc 100644 (file)
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -3175,7 +3175,7 @@ static int skd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                 goto err_out;
         rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
         if (rc)
-               dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+               rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
         if (rc) {
                 dev_err(&pdev->dev, "DMA mask error %d\n", rc);
                 goto err_out_regions;
@@ -3364,7 +3364,7 @@ static int skd_pci_resume(struct pci_dev *pdev)
                 goto err_out;
         rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
         if (rc)
-               dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+               rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
         if (rc) {
                 dev_err(&pdev->dev, "DMA mask error %d\n", rc);
                 goto err_out_regions;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c

index 9eea83a..56452ca 100644 (file)
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2493,6 +2493,9 @@ static int blkfront_remove(struct xenbus_device *xbdev)
  
         dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
  
+       if (!info)
+               return 0;
+
         blkif_free(info, 0);
  
         mutex_lock(&info->mutex);
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c

index 1106c07..6004306 100644 (file)
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -191,8 +191,7 @@ static int z2_open(struct block_device *bdev, fmode_t mode)
                         vfree(vmalloc (size));
                 }
  
-               vaddr = (unsigned long) __ioremap (paddr, size, 
-                                                  _PAGE_WRITETHRU);
+               vaddr = (unsigned long)ioremap_wt(paddr, size);
  
  #else
                 vaddr = (unsigned long)z_remap_nocache_nonser(paddr, size);
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c

index 4552b06..f0404c6 100644 (file)
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -127,6 +127,16 @@ static int fsl_mc_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
         return 0;
  }
  
+static int fsl_mc_dma_configure(struct device *dev)
+{
+       struct device *dma_dev = dev;
+
+       while (dev_is_fsl_mc(dma_dev))
+               dma_dev = dma_dev->parent;
+
+       return of_dma_configure(dev, dma_dev->of_node, 0);
+}
+
  static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
                              char *buf)
  {
@@ -148,6 +158,7 @@ struct bus_type fsl_mc_bus_type = {
         .name = "fsl-mc",
         .match = fsl_mc_bus_match,
         .uevent = fsl_mc_bus_uevent,
+       .dma_configure  = fsl_mc_dma_configure,
         .dev_groups = fsl_mc_dev_groups,
  };
  EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
@@ -621,6 +632,7 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc,
                 mc_dev->icid = parent_mc_dev->icid;
                 mc_dev->dma_mask = FSL_MC_DEFAULT_DMA_MASK;
                 mc_dev->dev.dma_mask = &mc_dev->dma_mask;
+               mc_dev->dev.coherent_dma_mask = mc_dev->dma_mask;
                 dev_set_msi_domain(&mc_dev->dev,
                                    dev_get_msi_domain(&parent_mc_dev->dev));
         }
@@ -638,10 +650,6 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc,
                         goto error_cleanup_dev;
         }
  
-       /* Objects are coherent, unless 'no shareability' flag set. */
-       if (!(obj_desc->flags & FSL_MC_OBJ_FLAG_NO_MEM_SHAREABILITY))
-               arch_setup_dma_ops(&mc_dev->dev, 0, 0, NULL, true);
-
         /*
          * The device-specific probe callback will get invoked by device_add()
          */
@@ -698,8 +706,8 @@ static int parse_mc_ranges(struct device *dev,
         *ranges_start = of_get_property(mc_node, "ranges", &ranges_len);
         if (!(*ranges_start) || !ranges_len) {
                 dev_warn(dev,
-                        "missing or empty ranges property for device tree node '%s'\n",
-                        mc_node->name);
+                        "missing or empty ranges property for device tree node '%pOFn'\n",
+                        mc_node);
                 return 0;
         }
  
@@ -722,7 +730,7 @@ static int parse_mc_ranges(struct device *dev,
  
         tuple_len = range_tuple_cell_count * sizeof(__be32);
         if (ranges_len % tuple_len != 0) {
-               dev_err(dev, "malformed ranges property '%s'\n", mc_node->name);
+               dev_err(dev, "malformed ranges property '%pOFn'\n", mc_node);
                 return -EINVAL;
         }
  
diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c

index 70db4d5..5b2a11a 100644 (file)
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -1229,7 +1229,7 @@ mbus_parse_ranges(struct device_node *node,
         tuple_len = (*cell_count) * sizeof(__be32);
  
         if (ranges_len % tuple_len) {
-               pr_warn("malformed ranges entry '%s'\n", node->name);
+               pr_warn("malformed ranges entry '%pOFn'\n", node);
                 return -EINVAL;
         }
         return 0;
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c

index 757e85b..a5b8afe 100644 (file)
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -327,15 +327,15 @@ static int get_entry_track(int track)
  static int gdrom_get_last_session(struct cdrom_device_info *cd_info,
         struct cdrom_multisession *ms_info)
  {
-       int fentry, lentry, track, data, tocuse, err;
+       int fentry, lentry, track, data, err;
+
         if (!gd.toc)
                 return -ENOMEM;
-       tocuse = 1;
+
         /* Check if GD-ROM */
         err = gdrom_readtoc_cmd(gd.toc, 1);
         /* Not a GD-ROM so check if standard CD-ROM */
         if (err) {
-               tocuse = 0;
                 err = gdrom_readtoc_cmd(gd.toc, 0);
                 if (err) {
                         pr_info("Could not get CD table of contents\n");
@@ -794,7 +794,7 @@ static int probe_gdrom(struct platform_device *devptr)
         gd.gdrom_rq = blk_mq_init_sq_queue(&gd.tag_set, &gdrom_mq_ops, 1,
                                 BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
         if (IS_ERR(gd.gdrom_rq)) {
-               rc = PTR_ERR(gd.gdrom_rq);
+               err = PTR_ERR(gd.gdrom_rq);
                 gd.gdrom_rq = NULL;
                 goto probe_fail_requestq;
         }
diff --git a/drivers/clk/mvebu/clk-cpu.c b/drivers/clk/mvebu/clk-cpu.c

index 072aa38..3045067 100644 (file)
--- a/drivers/clk/mvebu/clk-cpu.c
+++ b/drivers/clk/mvebu/clk-cpu.c
@@ -183,7 +183,7 @@ static void __init of_cpu_clk_setup(struct device_node *node)
                 pr_warn("%s: pmu-dfs base register not set, dynamic frequency scaling not available\n",
                         __func__);
  
-       for_each_node_by_type(dn, "cpu")
+       for_each_of_cpu_node(dn)
                 ncpus++;
  
         cpuclk = kcalloc(ncpus, sizeof(*cpuclk), GFP_KERNEL);
@@ -194,7 +194,7 @@ static void __init of_cpu_clk_setup(struct device_node *node)
         if (WARN_ON(!clks))
                 goto clks_out;
  
-       for_each_node_by_type(dn, "cpu") {
+       for_each_of_cpu_node(dn) {
                 struct clk_init_data init;
                 struct clk *clk;
                 char *clk_name = kzalloc(5, GFP_KERNEL);
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c

index 2c98e02..3c0881a 100644 (file)
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -593,8 +593,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
  /******************** CPU err device********************************/
  static u32 cpc925_cpu_mask_disabled(void)
  {
-       struct device_node *cpus;
-       struct device_node *cpunode = NULL;
+       struct device_node *cpunode;
         static u32 mask = 0;
  
         /* use cached value if available */
@@ -603,20 +602,8 @@ static u32 cpc925_cpu_mask_disabled(void)
  
         mask = APIMASK_ADI0 | APIMASK_ADI1;
  
-       cpus = of_find_node_by_path("/cpus");
-       if (cpus == NULL) {
-               cpc925_printk(KERN_DEBUG, "No /cpus node !\n");
-               return 0;
-       }
-
-       while ((cpunode = of_get_next_child(cpus, cpunode)) != NULL) {
+       for_each_of_cpu_node(cpunode) {
                 const u32 *reg = of_get_property(cpunode, "reg", NULL);
-
-               if (strcmp(cpunode->type, "cpu")) {
-                       cpc925_printk(KERN_ERR, "Not a cpu node in /cpus: %s\n", cpunode->name);
-                       continue;
-               }
-
                 if (reg == NULL || *reg > 2) {
                         cpc925_printk(KERN_ERR, "Bad reg value at %pOF\n", cpunode);
                         continue;
@@ -633,9 +620,6 @@ static u32 cpc925_cpu_mask_disabled(void)
                                 "Assuming PI id is equal to CPU MPIC id!\n");
         }
  
-       of_node_put(cpunode);
-       of_node_put(cpus);
-
         return mask;
  }
  
diff --git a/drivers/firmware/scpi_pm_domain.c b/drivers/firmware/scpi_pm_domain.c

index f395dec..390aa13 100644 (file)
--- a/drivers/firmware/scpi_pm_domain.c
+++ b/drivers/firmware/scpi_pm_domain.c
@@ -121,7 +121,7 @@ static int scpi_pm_domain_probe(struct platform_device *pdev)
  
                 scpi_pd->domain = i;
                 scpi_pd->ops = scpi_ops;
-               sprintf(scpi_pd->name, "%s.%d", np->name, i);
+               sprintf(scpi_pd->name, "%pOFn.%d", np, i);
                 scpi_pd->genpd.name = scpi_pd->name;
                 scpi_pd->genpd.power_off = scpi_pd_power_off;
                 scpi_pd->genpd.power_on = scpi_pd_power_on;
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c

index 2d45d1d..643f5ed 100644 (file)
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1446,8 +1446,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
         }
  
         /* The CEC module handles HDMI hotplug detection */
-       cec_np = of_find_compatible_node(np->parent, NULL,
-                                        "mediatek,mt8173-cec");
+       cec_np = of_get_compatible_child(np->parent, "mediatek,mt8173-cec");
         if (!cec_np) {
                 dev_err(dev, "Failed to find CEC node\n");
                 return -EINVAL;
@@ -1457,8 +1456,10 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
         if (!cec_pdev) {
                 dev_err(hdmi->dev, "Waiting for CEC device %pOF\n",
                         cec_np);
+               of_node_put(cec_np);
                 return -EPROBE_DEFER;
         }
+       of_node_put(cec_np);
         hdmi->cec_dev = &cec_pdev->dev;
  
         /*
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c

index da1363a..93d70f4 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -633,8 +633,7 @@ static int adreno_get_legacy_pwrlevels(struct device *dev)
         struct device_node *child, *node;
         int ret;
  
-       node = of_find_compatible_node(dev->of_node, NULL,
-               "qcom,gpu-pwrlevels");
+       node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels");
         if (!node) {
                 dev_err(dev, "Could not find the GPU powerlevels\n");
                 return -ENXIO;
@@ -655,6 +654,8 @@ static int adreno_get_legacy_pwrlevels(struct device *dev)
                         dev_pm_opp_add(dev, val, 0);
         }
  
+       of_node_put(node);
+
         return 0;
  }
  
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig

index 83e6d99..d9a2571 100644 (file)
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -186,6 +186,19 @@ config INTEL_IOMMU
           and include PCI device scope covered by these DMA
           remapping devices.
  
+config INTEL_IOMMU_DEBUGFS
+       bool "Export Intel IOMMU internals in Debugfs"
+       depends on INTEL_IOMMU && IOMMU_DEBUGFS
+       help
+         !!!WARNING!!!
+
+         DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!!
+
+         Expose Intel IOMMU internals in Debugfs.
+
+         This option is -NOT- intended for production environments, and should
+         only be enabled for debugging Intel IOMMU.
+
  config INTEL_IOMMU_SVM
         bool "Support for Shared Virtual Memory with Intel IOMMU"
         depends on INTEL_IOMMU && X86
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile

index ab5eba6..a158a68 100644 (file)
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
  obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
  obj-$(CONFIG_DMAR_TABLE) += dmar.o
  obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
+obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o
  obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
  obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
  obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c

index bee0dfb..1167ff0 100644 (file)
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3083,6 +3083,8 @@ static bool amd_iommu_capable(enum iommu_cap cap)
                 return (irq_remapping_enabled == 1);
         case IOMMU_CAP_NOEXEC:
                 return false;
+       default:
+               break;
         }
  
         return false;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c

index 3931c7d..bb2cd29 100644 (file)
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1719,7 +1719,7 @@ static const struct attribute_group *amd_iommu_groups[] = {
         NULL,
  };
  
-static int iommu_init_pci(struct amd_iommu *iommu)
+static int __init iommu_init_pci(struct amd_iommu *iommu)
  {
         int cap_ptr = iommu->cap_ptr;
         u32 range, misc, low, high;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c

index 5059d09..6947ccf 100644 (file)
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * IOMMU API for ARM architected SMMUv3 implementations.
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
   * Copyright (C) 2015 ARM Limited
   *
   * Author: Will Deacon <will.deacon@arm.com>
@@ -567,7 +556,8 @@ struct arm_smmu_device {
  
         int                             gerr_irq;
         int                             combined_irq;
-       atomic_t                        sync_nr;
+       u32                             sync_nr;
+       u8                              prev_cmd_opcode;
  
         unsigned long                   ias; /* IPA */
         unsigned long                   oas; /* PA */
@@ -611,6 +601,7 @@ struct arm_smmu_domain {
         struct mutex                    init_mutex; /* Protects smmu pointer */
  
         struct io_pgtable_ops           *pgtbl_ops;
+       bool                            non_strict;
  
         enum arm_smmu_domain_stage      stage;
         union {
@@ -708,7 +699,7 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
  }
  
  /*
- * Wait for the SMMU to consume items. If drain is true, wait until the queue
+ * Wait for the SMMU to consume items. If sync is true, wait until the queue
   * is empty. Otherwise, wait until there is at least one free slot.
   */
  static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
@@ -901,6 +892,8 @@ static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
         struct arm_smmu_queue *q = &smmu->cmdq.q;
         bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
  
+       smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
+
         while (queue_insert_raw(q, cmd) == -ENOSPC) {
                 if (queue_poll_cons(q, false, wfe))
                         dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
@@ -948,15 +941,21 @@ static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
         struct arm_smmu_cmdq_ent ent = {
                 .opcode = CMDQ_OP_CMD_SYNC,
                 .sync   = {
-                       .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
                         .msiaddr = virt_to_phys(&smmu->sync_count),
                 },
         };
  
-       arm_smmu_cmdq_build_cmd(cmd, &ent);
-
         spin_lock_irqsave(&smmu->cmdq.lock, flags);
-       arm_smmu_cmdq_insert_cmd(smmu, cmd);
+
+       /* Piggy-back on the previous command if it's a SYNC */
+       if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
+               ent.sync.msidata = smmu->sync_nr;
+       } else {
+               ent.sync.msidata = ++smmu->sync_nr;
+               arm_smmu_cmdq_build_cmd(cmd, &ent);
+               arm_smmu_cmdq_insert_cmd(smmu, cmd);
+       }
+
         spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
  
         return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
@@ -1372,15 +1371,11 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
  }
  
  /* IO_PGTABLE API */
-static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
-{
-       arm_smmu_cmdq_issue_sync(smmu);
-}
-
  static void arm_smmu_tlb_sync(void *cookie)
  {
         struct arm_smmu_domain *smmu_domain = cookie;
-       __arm_smmu_tlb_sync(smmu_domain->smmu);
+
+       arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
  }
  
  static void arm_smmu_tlb_inv_context(void *cookie)
@@ -1398,8 +1393,14 @@ static void arm_smmu_tlb_inv_context(void *cookie)
                 cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
         }
  
+       /*
+        * NOTE: when io-pgtable is in non-strict mode, we may get here with
+        * PTEs previously cleared by unmaps on the current CPU not yet visible
+        * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
+        * to guarantee those are observed before the TLBI. Do be careful, 007.
+        */
         arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-       __arm_smmu_tlb_sync(smmu);
+       arm_smmu_cmdq_issue_sync(smmu);
  }
  
  static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
@@ -1624,6 +1625,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
         if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
  
+       if (smmu_domain->non_strict)
+               pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
         if (!pgtbl_ops)
                 return -ENOMEM;
@@ -1772,12 +1776,20 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
         return ops->unmap(ops, iova, size);
  }
  
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+       if (smmu_domain->smmu)
+               arm_smmu_tlb_inv_context(smmu_domain);
+}
+
  static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
  {
         struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
  
         if (smmu)
-               __arm_smmu_tlb_sync(smmu);
+               arm_smmu_cmdq_issue_sync(smmu);
  }
  
  static phys_addr_t
@@ -1917,15 +1929,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
  {
         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
  
-       if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-               return -EINVAL;
-
-       switch (attr) {
-       case DOMAIN_ATTR_NESTING:
-               *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
-               return 0;
+       switch (domain->type) {
+       case IOMMU_DOMAIN_UNMANAGED:
+               switch (attr) {
+               case DOMAIN_ATTR_NESTING:
+                       *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+                       return 0;
+               default:
+                       return -ENODEV;
+               }
+               break;
+       case IOMMU_DOMAIN_DMA:
+               switch (attr) {
+               case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+                       *(int *)data = smmu_domain->non_strict;
+                       return 0;
+               default:
+                       return -ENODEV;
+               }
+               break;
         default:
-               return -ENODEV;
+               return -EINVAL;
         }
  }
  
@@ -1935,26 +1959,37 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
         int ret = 0;
         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
  
-       if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-               return -EINVAL;
-
         mutex_lock(&smmu_domain->init_mutex);
  
-       switch (attr) {
-       case DOMAIN_ATTR_NESTING:
-               if (smmu_domain->smmu) {
-                       ret = -EPERM;
-                       goto out_unlock;
+       switch (domain->type) {
+       case IOMMU_DOMAIN_UNMANAGED:
+               switch (attr) {
+               case DOMAIN_ATTR_NESTING:
+                       if (smmu_domain->smmu) {
+                               ret = -EPERM;
+                               goto out_unlock;
+                       }
+
+                       if (*(int *)data)
+                               smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+                       else
+                               smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+                       break;
+               default:
+                       ret = -ENODEV;
+               }
+               break;
+       case IOMMU_DOMAIN_DMA:
+               switch(attr) {
+               case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+                       smmu_domain->non_strict = *(int *)data;
+                       break;
+               default:
+                       ret = -ENODEV;
                 }
-
-               if (*(int *)data)
-                       smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
-               else
-                       smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
                 break;
         default:
-               ret = -ENODEV;
+               ret = -EINVAL;
         }
  
  out_unlock:
@@ -1999,7 +2034,7 @@ static struct iommu_ops arm_smmu_ops = {
         .attach_dev             = arm_smmu_attach_dev,
         .map                    = arm_smmu_map,
         .unmap                  = arm_smmu_unmap,
-       .flush_iotlb_all        = arm_smmu_iotlb_sync,
+       .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
         .iotlb_sync             = arm_smmu_iotlb_sync,
         .iova_to_phys           = arm_smmu_iova_to_phys,
         .add_device             = arm_smmu_add_device,
@@ -2180,7 +2215,6 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
  {
         int ret;
  
-       atomic_set(&smmu->sync_nr, 0);
         ret = arm_smmu_init_queues(smmu);
         if (ret)
                 return ret;
@@ -2353,8 +2387,8 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
         irq = smmu->combined_irq;
         if (irq) {
                 /*
-                * Cavium ThunderX2 implementation doesn't not support unique
-                * irq lines. Use single irq line for all the SMMUv3 interrupts.
+                * Cavium ThunderX2 implementation doesn't support unique irq
+                * lines. Use a single irq line for all the SMMUv3 interrupts.
                  */
                 ret = devm_request_threaded_irq(smmu->dev, irq,
                                         arm_smmu_combined_irq_handler,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c

index fd1b80e..5a28ae8 100644 (file)
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -52,6 +52,7 @@
  #include <linux/spinlock.h>
  
  #include <linux/amba/bus.h>
+#include <linux/fsl/mc.h>
  
  #include "io-pgtable.h"
  #include "arm-smmu-regs.h"
@@ -246,6 +247,7 @@ struct arm_smmu_domain {
         const struct iommu_gather_ops   *tlb_ops;
         struct arm_smmu_cfg             cfg;
         enum arm_smmu_domain_stage      stage;
+       bool                            non_strict;
         struct mutex                    init_mutex; /* Protects smmu pointer */
         spinlock_t                      cb_lock; /* Serialises ATS1* ops and TLB syncs */
         struct iommu_domain             domain;
@@ -447,7 +449,11 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie)
         struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
         void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
  
-       writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
+       /*
+        * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
+        * cleared by the current CPU are visible to the SMMU before the TLBI.
+        */
+       writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
         arm_smmu_tlb_sync_context(cookie);
  }
  
@@ -457,7 +463,8 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)
         struct arm_smmu_device *smmu = smmu_domain->smmu;
         void __iomem *base = ARM_SMMU_GR0(smmu);
  
-       writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+       /* NOTE: see above */
+       writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
         arm_smmu_tlb_sync_global(smmu);
  }
  
@@ -469,6 +476,9 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
         bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
         void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
  
+       if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+               wmb();
+
         if (stage1) {
                 reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
  
@@ -510,6 +520,9 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
         struct arm_smmu_domain *smmu_domain = cookie;
         void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
  
+       if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+               wmb();
+
         writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
  }
  
@@ -863,6 +876,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
         if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
                 pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
  
+       if (smmu_domain->non_strict)
+               pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
         smmu_domain->smmu = smmu;
         pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
         if (!pgtbl_ops) {
@@ -1252,6 +1268,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
         return ops->unmap(ops, iova, size);
  }
  
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+       struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+       if (smmu_domain->tlb_ops)
+               smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+}
+
  static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
  {
         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1459,6 +1483,8 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
  
         if (dev_is_pci(dev))
                 group = pci_device_group(dev);
+       else if (dev_is_fsl_mc(dev))
+               group = fsl_mc_device_group(dev);
         else
                 group = generic_device_group(dev);
  
@@ -1470,15 +1496,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
  {
         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
  
-       if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-               return -EINVAL;
-
-       switch (attr) {
-       case DOMAIN_ATTR_NESTING:
-               *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
-               return 0;
+       switch(domain->type) {
+       case IOMMU_DOMAIN_UNMANAGED:
+               switch (attr) {
+               case DOMAIN_ATTR_NESTING:
+                       *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+                       return 0;
+               default:
+                       return -ENODEV;
+               }
+               break;
+       case IOMMU_DOMAIN_DMA:
+               switch (attr) {
+               case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+                       *(int *)data = smmu_domain->non_strict;
+                       return 0;
+               default:
+                       return -ENODEV;
+               }
+               break;
         default:
-               return -ENODEV;
+               return -EINVAL;
         }
  }
  
@@ -1488,28 +1526,38 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
         int ret = 0;
         struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
  
-       if (domain->type != IOMMU_DOMAIN_UNMANAGED)
-               return -EINVAL;
-
         mutex_lock(&smmu_domain->init_mutex);
  
-       switch (attr) {
-       case DOMAIN_ATTR_NESTING:
-               if (smmu_domain->smmu) {
-                       ret = -EPERM;
-                       goto out_unlock;
+       switch(domain->type) {
+       case IOMMU_DOMAIN_UNMANAGED:
+               switch (attr) {
+               case DOMAIN_ATTR_NESTING:
+                       if (smmu_domain->smmu) {
+                               ret = -EPERM;
+                               goto out_unlock;
+                       }
+
+                       if (*(int *)data)
+                               smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+                       else
+                               smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+                       break;
+               default:
+                       ret = -ENODEV;
+               }
+               break;
+       case IOMMU_DOMAIN_DMA:
+               switch (attr) {
+               case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+                       smmu_domain->non_strict = *(int *)data;
+                       break;
+               default:
+                       ret = -ENODEV;
                 }
-
-               if (*(int *)data)
-                       smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
-               else
-                       smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
                 break;
         default:
-               ret = -ENODEV;
+               ret = -EINVAL;
         }
-
  out_unlock:
         mutex_unlock(&smmu_domain->init_mutex);
         return ret;
@@ -1562,7 +1610,7 @@ static struct iommu_ops arm_smmu_ops = {
         .attach_dev             = arm_smmu_attach_dev,
         .map                    = arm_smmu_map,
         .unmap                  = arm_smmu_unmap,
-       .flush_iotlb_all        = arm_smmu_iotlb_sync,
+       .flush_iotlb_all        = arm_smmu_flush_iotlb_all,
         .iotlb_sync             = arm_smmu_iotlb_sync,
         .iova_to_phys           = arm_smmu_iova_to_phys,
         .add_device             = arm_smmu_add_device,
@@ -2036,6 +2084,10 @@ static void arm_smmu_bus_init(void)
                 bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
         }
  #endif
+#ifdef CONFIG_FSL_MC_BUS
+       if (!iommu_present(&fsl_mc_bus_type))
+               bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
+#endif
  }
  
  static int arm_smmu_device_probe(struct platform_device *pdev)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c

index 511ff9a..d1b0475 100644 (file)
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -55,6 +55,9 @@ struct iommu_dma_cookie {
         };
         struct list_head                msi_page_list;
         spinlock_t                      msi_lock;
+
+       /* Domain for flush queue callback; NULL if flush queue not in use */
+       struct iommu_domain             *fq_domain;
  };
  
  static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
@@ -257,6 +260,20 @@ static int iova_reserve_iommu_regions(struct device *dev,
         return ret;
  }
  
+static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
+{
+       struct iommu_dma_cookie *cookie;
+       struct iommu_domain *domain;
+
+       cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
+       domain = cookie->fq_domain;
+       /*
+        * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
+        * implies that ops->flush_iotlb_all must be non-NULL.
+        */
+       domain->ops->flush_iotlb_all(domain);
+}
+
  /**
   * iommu_dma_init_domain - Initialise a DMA mapping domain
   * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -275,6 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
         struct iommu_dma_cookie *cookie = domain->iova_cookie;
         struct iova_domain *iovad = &cookie->iovad;
         unsigned long order, base_pfn, end_pfn;
+       int attr;
  
         if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
                 return -EINVAL;
@@ -308,6 +326,13 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
         }
  
         init_iova_domain(iovad, 1UL << order, base_pfn);
+
+       if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
+                       DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
+               cookie->fq_domain = domain;
+               init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL);
+       }
+
         if (!dev)
                 return 0;
  
@@ -393,6 +418,9 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
         /* The MSI case is only ever cleaning up its most recent allocation */
         if (cookie->type == IOMMU_DMA_MSI_COOKIE)
                 cookie->msi_iova -= size;
+       else if (cookie->fq_domain)     /* non-strict mode */
+               queue_iova(iovad, iova_pfn(iovad, iova),
+                               size >> iova_shift(iovad), 0);
         else
                 free_iova_fast(iovad, iova_pfn(iovad, iova),
                                 size >> iova_shift(iovad));
@@ -408,7 +436,9 @@ static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
         dma_addr -= iova_off;
         size = iova_align(iovad, size + iova_off);
  
-       WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
+       WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
+       if (!cookie->fq_domain)
+               iommu_tlb_sync(domain);
         iommu_dma_free_iova(cookie, dma_addr, size);
  }
  
@@ -491,7 +521,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count,
  void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
                 dma_addr_t *handle)
  {
-       __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
+       __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size);
         __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
         *handle = IOMMU_MAPPING_ERROR;
  }
@@ -518,7 +548,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
                 unsigned long attrs, int prot, dma_addr_t *handle,
                 void (*flush_page)(struct device *, const void *, phys_addr_t))
  {
-       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+       struct iommu_domain *domain = iommu_get_dma_domain(dev);
         struct iommu_dma_cookie *cookie = domain->iova_cookie;
         struct iova_domain *iovad = &cookie->iovad;
         struct page **pages;
@@ -606,9 +636,8 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
  }
  
  static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
-               size_t size, int prot)
+               size_t size, int prot, struct iommu_domain *domain)
  {
-       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
         struct iommu_dma_cookie *cookie = domain->iova_cookie;
         size_t iova_off = 0;
         dma_addr_t iova;
@@ -632,13 +661,14 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
  dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
                 unsigned long offset, size_t size, int prot)
  {
-       return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot);
+       return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot,
+                       iommu_get_dma_domain(dev));
  }
  
  void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
                 enum dma_data_direction dir, unsigned long attrs)
  {
-       __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
+       __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size);
  }
  
  /*
@@ -726,7 +756,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents)
  int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
                 int nents, int prot)
  {
-       struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+       struct iommu_domain *domain = iommu_get_dma_domain(dev);
         struct iommu_dma_cookie *cookie = domain->iova_cookie;
         struct iova_domain *iovad = &cookie->iovad;
         struct scatterlist *s, *prev = NULL;
@@ -811,20 +841,21 @@ void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
                 sg = tmp;
         }
         end = sg_dma_address(sg) + sg_dma_len(sg);
-       __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
+       __iommu_dma_unmap(iommu_get_dma_domain(dev), start, end - start);
  }
  
  dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
                 size_t size, enum dma_data_direction dir, unsigned long attrs)
  {
         return __iommu_dma_map(dev, phys, size,
-                       dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO);
+                       dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
+                       iommu_get_dma_domain(dev));
  }
  
  void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
                 size_t size, enum dma_data_direction dir, unsigned long attrs)
  {
-       __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
+       __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size);
  }
  
  int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -850,7 +881,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
         if (!msi_page)
                 return NULL;
  
-       iova = __iommu_dma_map(dev, msi_addr, size, prot);
+       iova = __iommu_dma_map(dev, msi_addr, size, prot, domain);
         if (iommu_dma_mapping_error(dev, iova))
                 goto out_free_page;
  
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c

index 8540625..1b955ae 100644 (file)
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -543,7 +543,7 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)
                 return ~(u32)0;
         }
  
-       for_each_node_by_type(node, "cpu") {
+       for_each_of_cpu_node(node) {
                 prop = of_get_property(node, "reg", &len);
                 for (i = 0; i < len / sizeof(u32); i++) {
                         if (be32_to_cpup(&prop[i]) == vcpu) {
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c

index f089136..9b528cf 100644 (file)
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -814,6 +814,55 @@ static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool en
         return 0;
  }
  
+static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
+{
+       struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&dma_domain->domain_lock, flags);
+       /* Ensure domain is inactive i.e. DMA should be disabled for the domain */
+       if (dma_domain->enabled) {
+               pr_debug("Can't set geometry attributes as domain is active\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return  -EBUSY;
+       }
+
+       /* Ensure that the geometry has been set for the domain */
+       if (!dma_domain->geom_size) {
+               pr_debug("Please configure geometry before setting the number of windows\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return -EINVAL;
+       }
+
+       /*
+        * Ensure we have valid window count i.e. it should be less than
+        * maximum permissible limit and should be a power of two.
+        */
+       if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
+               pr_debug("Invalid window count\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return -EINVAL;
+       }
+
+       ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
+                                      w_count > 1 ? w_count : 0);
+       if (!ret) {
+               kfree(dma_domain->win_arr);
+               dma_domain->win_arr = kcalloc(w_count,
+                                             sizeof(*dma_domain->win_arr),
+                                             GFP_ATOMIC);
+               if (!dma_domain->win_arr) {
+                       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+                       return -ENOMEM;
+               }
+               dma_domain->win_cnt = w_count;
+       }
+       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+       return ret;
+}
+
  static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
                                     enum iommu_attr attr_type, void *data)
  {
@@ -830,6 +879,9 @@ static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
         case DOMAIN_ATTR_FSL_PAMU_ENABLE:
                 ret = configure_domain_dma_state(dma_domain, *(int *)data);
                 break;
+       case DOMAIN_ATTR_WINDOWS:
+               ret = fsl_pamu_set_windows(domain, *(u32 *)data);
+               break;
         default:
                 pr_debug("Unsupported attribute type\n");
                 ret = -EINVAL;
@@ -856,6 +908,9 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain,
         case DOMAIN_ATTR_FSL_PAMUV1:
                 *(int *)data = DOMAIN_ATTR_FSL_PAMUV1;
                 break;
+       case DOMAIN_ATTR_WINDOWS:
+               *(u32 *)data = dma_domain->win_cnt;
+               break;
         default:
                 pr_debug("Unsupported attribute type\n");
                 ret = -EINVAL;
@@ -916,13 +971,13 @@ static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev)
  static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
  {
         struct pci_controller *pci_ctl;
-       bool pci_endpt_partioning;
+       bool pci_endpt_partitioning;
         struct iommu_group *group = NULL;
  
         pci_ctl = pci_bus_to_host(pdev->bus);
-       pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
+       pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl);
         /* We can partition PCIe devices so assign device group to the device */
-       if (pci_endpt_partioning) {
+       if (pci_endpt_partitioning) {
                 group = pci_device_group(&pdev->dev);
  
                 /*
@@ -994,62 +1049,6 @@ static void fsl_pamu_remove_device(struct device *dev)
         iommu_group_remove_device(dev);
  }
  
-static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
-{
-       struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-       unsigned long flags;
-       int ret;
-
-       spin_lock_irqsave(&dma_domain->domain_lock, flags);
-       /* Ensure domain is inactive i.e. DMA should be disabled for the domain */
-       if (dma_domain->enabled) {
-               pr_debug("Can't set geometry attributes as domain is active\n");
-               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-               return  -EBUSY;
-       }
-
-       /* Ensure that the geometry has been set for the domain */
-       if (!dma_domain->geom_size) {
-               pr_debug("Please configure geometry before setting the number of windows\n");
-               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-               return -EINVAL;
-       }
-
-       /*
-        * Ensure we have valid window count i.e. it should be less than
-        * maximum permissible limit and should be a power of two.
-        */
-       if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
-               pr_debug("Invalid window count\n");
-               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-               return -EINVAL;
-       }
-
-       ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
-                                      w_count > 1 ? w_count : 0);
-       if (!ret) {
-               kfree(dma_domain->win_arr);
-               dma_domain->win_arr = kcalloc(w_count,
-                                             sizeof(*dma_domain->win_arr),
-                                             GFP_ATOMIC);
-               if (!dma_domain->win_arr) {
-                       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-                       return -ENOMEM;
-               }
-               dma_domain->win_cnt = w_count;
-       }
-       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-
-       return ret;
-}
-
-static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
-{
-       struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-
-       return dma_domain->win_cnt;
-}
-
  static const struct iommu_ops fsl_pamu_ops = {
         .capable        = fsl_pamu_capable,
         .domain_alloc   = fsl_pamu_domain_alloc,
@@ -1058,8 +1057,6 @@ static const struct iommu_ops fsl_pamu_ops = {
         .detach_dev     = fsl_pamu_detach_device,
         .domain_window_enable = fsl_pamu_window_enable,
         .domain_window_disable = fsl_pamu_window_disable,
-       .domain_get_windows = fsl_pamu_get_windows,
-       .domain_set_windows = fsl_pamu_set_windows,
         .iova_to_phys   = fsl_pamu_iova_to_phys,
         .domain_set_attr = fsl_pamu_set_domain_attr,
         .domain_get_attr = fsl_pamu_get_domain_attr,
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c

new file mode 100644 (file)

index 0000000..7fabf9b
--- /dev/null
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2018 Intel Corporation.
+ *
+ * Authors: Gayatri Kammela <gayatri.kammela@intel.com>
+ *         Sohil Mehta <sohil.mehta@intel.com>
+ *         Jacob Pan <jacob.jun.pan@linux.intel.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+#include <linux/pci.h>
+
+#include <asm/irq_remapping.h>
+
+struct iommu_regset {
+       int offset;
+       const char *regs;
+};
+
+#define IOMMU_REGSET_ENTRY(_reg_)                                      \
+       { DMAR_##_reg_##_REG, __stringify(_reg_) }
+static const struct iommu_regset iommu_regs[] = {
+       IOMMU_REGSET_ENTRY(VER),
+       IOMMU_REGSET_ENTRY(CAP),
+       IOMMU_REGSET_ENTRY(ECAP),
+       IOMMU_REGSET_ENTRY(GCMD),
+       IOMMU_REGSET_ENTRY(GSTS),
+       IOMMU_REGSET_ENTRY(RTADDR),
+       IOMMU_REGSET_ENTRY(CCMD),
+       IOMMU_REGSET_ENTRY(FSTS),
+       IOMMU_REGSET_ENTRY(FECTL),
+       IOMMU_REGSET_ENTRY(FEDATA),
+       IOMMU_REGSET_ENTRY(FEADDR),
+       IOMMU_REGSET_ENTRY(FEUADDR),
+       IOMMU_REGSET_ENTRY(AFLOG),
+       IOMMU_REGSET_ENTRY(PMEN),
+       IOMMU_REGSET_ENTRY(PLMBASE),
+       IOMMU_REGSET_ENTRY(PLMLIMIT),
+       IOMMU_REGSET_ENTRY(PHMBASE),
+       IOMMU_REGSET_ENTRY(PHMLIMIT),
+       IOMMU_REGSET_ENTRY(IQH),
+       IOMMU_REGSET_ENTRY(IQT),
+       IOMMU_REGSET_ENTRY(IQA),
+       IOMMU_REGSET_ENTRY(ICS),
+       IOMMU_REGSET_ENTRY(IRTA),
+       IOMMU_REGSET_ENTRY(PQH),
+       IOMMU_REGSET_ENTRY(PQT),
+       IOMMU_REGSET_ENTRY(PQA),
+       IOMMU_REGSET_ENTRY(PRS),
+       IOMMU_REGSET_ENTRY(PECTL),
+       IOMMU_REGSET_ENTRY(PEDATA),
+       IOMMU_REGSET_ENTRY(PEADDR),
+       IOMMU_REGSET_ENTRY(PEUADDR),
+       IOMMU_REGSET_ENTRY(MTRRCAP),
+       IOMMU_REGSET_ENTRY(MTRRDEF),
+       IOMMU_REGSET_ENTRY(MTRR_FIX64K_00000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX16K_80000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX16K_A0000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX4K_C0000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX4K_C8000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX4K_D0000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX4K_D8000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX4K_E0000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX4K_E8000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX4K_F0000),
+       IOMMU_REGSET_ENTRY(MTRR_FIX4K_F8000),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE0),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK0),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE1),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK1),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE2),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK2),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE3),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK3),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE4),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK4),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE5),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK5),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE6),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK6),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE7),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK7),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE8),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK8),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSBASE9),
+       IOMMU_REGSET_ENTRY(MTRR_PHYSMASK9),
+       IOMMU_REGSET_ENTRY(VCCAP),
+       IOMMU_REGSET_ENTRY(VCMD),
+       IOMMU_REGSET_ENTRY(VCRSP),
+};
+
+static int iommu_regset_show(struct seq_file *m, void *unused)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       unsigned long flag;
+       int i, ret = 0;
+       u64 value;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               if (!drhd->reg_base_addr) {
+                       seq_puts(m, "IOMMU: Invalid base address\n");
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               seq_printf(m, "IOMMU: %s Register Base Address: %llx\n",
+                          iommu->name, drhd->reg_base_addr);
+               seq_puts(m, "Name\t\t\tOffset\t\tContents\n");
+               /*
+                * Publish the contents of the 64-bit hardware registers
+                * by adding the offset to the pointer (virtual address).
+                */
+               raw_spin_lock_irqsave(&iommu->register_lock, flag);
+               for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) {
+                       value = dmar_readq(iommu->reg + iommu_regs[i].offset);
+                       seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
+                                  iommu_regs[i].regs, iommu_regs[i].offset,
+                                  value);
+               }
+               raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+               seq_putc(m, '\n');
+       }
+out:
+       rcu_read_unlock();
+
+       return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_regset);
+
+static void ctx_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu,
+                              int bus)
+{
+       struct context_entry *context;
+       int devfn;
+
+       seq_printf(m, " Context Table Entries for Bus: %d\n", bus);
+       seq_puts(m, "  Entry\tB:D.F\tHigh\tLow\n");
+
+       for (devfn = 0; devfn < 256; devfn++) {
+               context = iommu_context_addr(iommu, bus, devfn, 0);
+               if (!context)
+                       return;
+
+               if (!context_present(context))
+                       continue;
+
+               seq_printf(m, "  %-5d\t%02x:%02x.%x\t%-6llx\t%llx\n", devfn,
+                          bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+                          context[0].hi, context[0].lo);
+       }
+}
+
+static void root_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu)
+{
+       unsigned long flags;
+       int bus;
+
+       spin_lock_irqsave(&iommu->lock, flags);
+       seq_printf(m, "IOMMU %s: Root Table Address:%llx\n", iommu->name,
+                  (u64)virt_to_phys(iommu->root_entry));
+       seq_puts(m, "Root Table Entries:\n");
+
+       for (bus = 0; bus < 256; bus++) {
+               if (!(iommu->root_entry[bus].lo & 1))
+                       continue;
+
+               seq_printf(m, " Bus: %d H: %llx L: %llx\n", bus,
+                          iommu->root_entry[bus].hi,
+                          iommu->root_entry[bus].lo);
+
+               ctx_tbl_entry_show(m, iommu, bus);
+               seq_putc(m, '\n');
+       }
+       spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static int dmar_translation_struct_show(struct seq_file *m, void *unused)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               root_tbl_entry_show(m, iommu);
+               seq_putc(m, '\n');
+       }
+       rcu_read_unlock();
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dmar_translation_struct);
+
+#ifdef CONFIG_IRQ_REMAP
+static void ir_tbl_remap_entry_show(struct seq_file *m,
+                                   struct intel_iommu *iommu)
+{
+       struct irte *ri_entry;
+       unsigned long flags;
+       int idx;
+
+       seq_puts(m, " Entry SrcID   DstID    Vct IRTE_high\t\tIRTE_low\n");
+
+       raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+       for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) {
+               ri_entry = &iommu->ir_table->base[idx];
+               if (!ri_entry->present || ri_entry->p_pst)
+                       continue;
+
+               seq_printf(m, " %-5d %02x:%02x.%01x %08x %02x  %016llx\t%016llx\n",
+                          idx, PCI_BUS_NUM(ri_entry->sid),
+                          PCI_SLOT(ri_entry->sid), PCI_FUNC(ri_entry->sid),
+                          ri_entry->dest_id, ri_entry->vector,
+                          ri_entry->high, ri_entry->low);
+       }
+       raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+}
+
+static void ir_tbl_posted_entry_show(struct seq_file *m,
+                                    struct intel_iommu *iommu)
+{
+       struct irte *pi_entry;
+       unsigned long flags;
+       int idx;
+
+       seq_puts(m, " Entry SrcID   PDA_high PDA_low  Vct IRTE_high\t\tIRTE_low\n");
+
+       raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+       for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) {
+               pi_entry = &iommu->ir_table->base[idx];
+               if (!pi_entry->present || !pi_entry->p_pst)
+                       continue;
+
+               seq_printf(m, " %-5d %02x:%02x.%01x %08x %08x %02x  %016llx\t%016llx\n",
+                          idx, PCI_BUS_NUM(pi_entry->sid),
+                          PCI_SLOT(pi_entry->sid), PCI_FUNC(pi_entry->sid),
+                          pi_entry->pda_h, pi_entry->pda_l << 6,
+                          pi_entry->vector, pi_entry->high,
+                          pi_entry->low);
+       }
+       raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+}
+
+/*
+ * For active IOMMUs go through the Interrupt remapping
+ * table and print valid entries in a table format for
+ * Remapped and Posted Interrupts.
+ */
+static int ir_translation_struct_show(struct seq_file *m, void *unused)
+{
+       struct dmar_drhd_unit *drhd;
+       struct intel_iommu *iommu;
+       u64 irta;
+
+       rcu_read_lock();
+       for_each_active_iommu(iommu, drhd) {
+               if (!ecap_ir_support(iommu->ecap))
+                       continue;
+
+               seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n",
+                          iommu->name);
+
+               if (iommu->ir_table) {
+                       irta = virt_to_phys(iommu->ir_table->base);
+                       seq_printf(m, " IR table address:%llx\n", irta);
+                       ir_tbl_remap_entry_show(m, iommu);
+               } else {
+                       seq_puts(m, "Interrupt Remapping is not enabled\n");
+               }
+               seq_putc(m, '\n');
+       }
+
+       seq_puts(m, "****\n\n");
+
+       for_each_active_iommu(iommu, drhd) {
+               if (!cap_pi_support(iommu->cap))
+                       continue;
+
+               seq_printf(m, "Posted Interrupt supported on IOMMU: %s\n",
+                          iommu->name);
+
+               if (iommu->ir_table) {
+                       irta = virt_to_phys(iommu->ir_table->base);
+                       seq_printf(m, " IR table address:%llx\n", irta);
+                       ir_tbl_posted_entry_show(m, iommu);
+               } else {
+                       seq_puts(m, "Interrupt Remapping is not enabled\n");
+               }
+               seq_putc(m, '\n');
+       }
+       rcu_read_unlock();
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(ir_translation_struct);
+#endif
+
+void __init intel_iommu_debugfs_init(void)
+{
+       struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
+                                               iommu_debugfs_dir);
+
+       debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL,
+                           &iommu_regset_fops);
+       debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
+                           NULL, &dmar_translation_struct_fops);
+#ifdef CONFIG_IRQ_REMAP
+       debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
+                           NULL, &ir_translation_struct_fops);
+#endif
+}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c

index 76f0a5d..f3ccf02 100644 (file)
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -185,16 +185,6 @@ static int rwbf_quirk;
  static int force_on = 0;
  int intel_iommu_tboot_noforce;
  
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-       u64     lo;
-       u64     hi;
-};
  #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
  
  /*
@@ -220,21 +210,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
  
         return re->hi & VTD_PAGE_MASK;
  }
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-       u64 lo;
-       u64 hi;
-};
  
  static inline void context_clear_pasid_enable(struct context_entry *context)
  {
@@ -261,7 +236,7 @@ static inline bool __context_present(struct context_entry *context)
         return (context->lo & 1);
  }
  
-static inline bool context_present(struct context_entry *context)
+bool context_present(struct context_entry *context)
  {
         return context_pasid_enabled(context) ?
              __context_present(context) :
@@ -788,8 +763,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
  }
  
-static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
-                                                      u8 bus, u8 devfn, int alloc)
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+                                        u8 devfn, int alloc)
  {
         struct root_entry *root = &iommu->root_entry[bus];
         struct context_entry *context;
@@ -4860,6 +4835,7 @@ int __init intel_iommu_init(void)
         cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
                           intel_iommu_cpu_dead);
         intel_iommu_enabled = 1;
+       intel_iommu_debugfs_init();
  
         return 0;
  
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c

index 967450b..c2d6c11 100644 (file)
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
   * in single-threaded environment with interrupt disabled, so no need to tabke
   * the dmar_global_lock.
   */
-static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
+DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
  static const struct irq_domain_ops intel_ir_domain_ops;
  
  static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c

index b5948ba..445c3bd 100644 (file)
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -587,6 +587,7 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
         }
  
         io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
+       io_pgtable_tlb_sync(&data->iop);
         return size;
  }
  
@@ -642,6 +643,13 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
                                 io_pgtable_tlb_sync(iop);
                                 ptep = iopte_deref(pte[i], lvl);
                                 __arm_v7s_free_table(ptep, lvl + 1, data);
+                       } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
+                               /*
+                                * Order the PTE update against queueing the IOVA, to
+                                * guarantee that a flush callback from a different CPU
+                                * has observed it before the TLBIALL can be issued.
+                                */
+                               smp_wmb();
                         } else {
                                 io_pgtable_tlb_add_flush(iop, iova, blk_size,
                                                          blk_size, true);
@@ -712,7 +720,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
                             IO_PGTABLE_QUIRK_NO_PERMS |
                             IO_PGTABLE_QUIRK_TLBI_ON_MAP |
                             IO_PGTABLE_QUIRK_ARM_MTK_4GB |
-                           IO_PGTABLE_QUIRK_NO_DMA))
+                           IO_PGTABLE_QUIRK_NO_DMA |
+                           IO_PGTABLE_QUIRK_NON_STRICT))
                 return NULL;
  
         /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c

index 88641b4..237cacd 100644 (file)
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -574,13 +574,13 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
                         return 0;
  
                 tablep = iopte_deref(pte, data);
+       } else if (unmap_idx >= 0) {
+               io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
+               io_pgtable_tlb_sync(&data->iop);
+               return size;
         }
  
-       if (unmap_idx < 0)
-               return __arm_lpae_unmap(data, iova, size, lvl, tablep);
-
-       io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
-       return size;
+       return __arm_lpae_unmap(data, iova, size, lvl, tablep);
  }
  
  static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
@@ -610,6 +610,13 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
                         io_pgtable_tlb_sync(iop);
                         ptep = iopte_deref(pte, data);
                         __arm_lpae_free_pgtable(data, lvl + 1, ptep);
+               } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
+                       /*
+                        * Order the PTE update against queueing the IOVA, to
+                        * guarantee that a flush callback from a different CPU
+                        * has observed it before the TLBIALL can be issued.
+                        */
+                       smp_wmb();
                 } else {
                         io_pgtable_tlb_add_flush(iop, iova, size, size, true);
                 }
@@ -772,7 +779,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
         u64 reg;
         struct arm_lpae_io_pgtable *data;
  
-       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA))
+       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
+                           IO_PGTABLE_QUIRK_NON_STRICT))
                 return NULL;
  
         data = arm_lpae_alloc_pgtable(cfg);
@@ -864,7 +872,8 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
         struct arm_lpae_io_pgtable *data;
  
         /* The NS quirk doesn't apply at stage 2 */
-       if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA)
+       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA |
+                           IO_PGTABLE_QUIRK_NON_STRICT))
                 return NULL;
  
         data = arm_lpae_alloc_pgtable(cfg);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h

index 2df7909..47d5ae5 100644 (file)
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -71,12 +71,17 @@ struct io_pgtable_cfg {
          *      be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
          *      software-emulated IOMMU), such that pagetable updates need not
          *      be treated as explicit DMA data.
+        *
+        * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
+        *      on unmap, for DMA domains using the flush queue mechanism for
+        *      delayed invalidation.
          */
         #define IO_PGTABLE_QUIRK_ARM_NS         BIT(0)
         #define IO_PGTABLE_QUIRK_NO_PERMS       BIT(1)
         #define IO_PGTABLE_QUIRK_TLBI_ON_MAP    BIT(2)
         #define IO_PGTABLE_QUIRK_ARM_MTK_4GB    BIT(3)
         #define IO_PGTABLE_QUIRK_NO_DMA         BIT(4)
+       #define IO_PGTABLE_QUIRK_NON_STRICT     BIT(5)
         unsigned long                   quirks;
         unsigned long                   pgsize_bitmap;
         unsigned int                    ias;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c

index 8c15c59..edbdf5d 100644 (file)
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -32,6 +32,7 @@
  #include <linux/pci.h>
  #include <linux/bitops.h>
  #include <linux/property.h>
+#include <linux/fsl/mc.h>
  #include <trace/events/iommu.h>
  
  static struct kset *iommu_group_kset;
@@ -41,6 +42,7 @@ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
  #else
  static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
  #endif
+static bool iommu_dma_strict __read_mostly = true;
  
  struct iommu_callback_data {
         const struct iommu_ops *ops;
@@ -131,6 +133,12 @@ static int __init iommu_set_def_domain_type(char *str)
  }
  early_param("iommu.passthrough", iommu_set_def_domain_type);
  
+static int __init iommu_dma_setup(char *str)
+{
+       return kstrtobool(str, &iommu_dma_strict);
+}
+early_param("iommu.strict", iommu_dma_setup);
+
  static ssize_t iommu_group_attr_show(struct kobject *kobj,
                                      struct attribute *__attr, char *buf)
  {
@@ -1024,6 +1032,18 @@ struct iommu_group *pci_device_group(struct device *dev)
         return iommu_group_alloc();
  }
  
+/* Get the IOMMU group for device on fsl-mc bus */
+struct iommu_group *fsl_mc_device_group(struct device *dev)
+{
+       struct device *cont_dev = fsl_mc_cont_dev(dev);
+       struct iommu_group *group;
+
+       group = iommu_group_get(cont_dev);
+       if (!group)
+               group = iommu_group_alloc();
+       return group;
+}
+
  /**
   * iommu_group_get_for_dev - Find or create the IOMMU group for a device
   * @dev: target device
@@ -1072,6 +1092,13 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
                 group->default_domain = dom;
                 if (!group->domain)
                         group->domain = dom;
+
+               if (dom && !iommu_dma_strict) {
+                       int attr = 1;
+                       iommu_domain_set_attr(dom,
+                                             DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+                                             &attr);
+               }
         }
  
         ret = iommu_group_add_device(group, dev);
@@ -1416,7 +1443,16 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
  EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
  
  /*
- * IOMMU groups are really the natrual working unit of the IOMMU, but
+ * For IOMMU_DOMAIN_DMA implementations which already provide their own
+ * guarantees that the group and its default domain are valid and correct.
+ */
+struct iommu_domain *iommu_get_dma_domain(struct device *dev)
+{
+       return dev->iommu_group->default_domain;
+}
+
+/*
+ * IOMMU groups are really the natural working unit of the IOMMU, but
   * the IOMMU API works on domains and devices.  Bridge that gap by
   * iterating over the devices in a group.  Ideally we'd have a single
   * device which represents the requestor ID of the group, but we also
@@ -1796,7 +1832,6 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
         struct iommu_domain_geometry *geometry;
         bool *paging;
         int ret = 0;
-       u32 *count;
  
         switch (attr) {
         case DOMAIN_ATTR_GEOMETRY:
@@ -1807,15 +1842,6 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
         case DOMAIN_ATTR_PAGING:
                 paging  = data;
                 *paging = (domain->pgsize_bitmap != 0UL);
-               break;
-       case DOMAIN_ATTR_WINDOWS:
-               count = data;
-
-               if (domain->ops->domain_get_windows != NULL)
-                       *count = domain->ops->domain_get_windows(domain);
-               else
-                       ret = -ENODEV;
-
                 break;
         default:
                 if (!domain->ops->domain_get_attr)
@@ -1832,18 +1858,8 @@ int iommu_domain_set_attr(struct iommu_domain *domain,
                           enum iommu_attr attr, void *data)
  {
         int ret = 0;
-       u32 *count;
  
         switch (attr) {
-       case DOMAIN_ATTR_WINDOWS:
-               count = data;
-
-               if (domain->ops->domain_set_windows != NULL)
-                       ret = domain->ops->domain_set_windows(domain, *count);
-               else
-                       ret = -ENODEV;
-
-               break;
         default:
                 if (domain->ops->domain_set_attr == NULL)
                         return -EINVAL;
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c

index 83fe262..f8d3ba2 100644 (file)
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -56,6 +56,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
         iovad->granule = granule;
         iovad->start_pfn = start_pfn;
         iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
+       iovad->max32_alloc_size = iovad->dma_32bit_pfn;
         iovad->flush_cb = NULL;
         iovad->fq = NULL;
         iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
@@ -139,8 +140,10 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
  
         cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
         if (free->pfn_hi < iovad->dma_32bit_pfn &&
-           free->pfn_lo >= cached_iova->pfn_lo)
+           free->pfn_lo >= cached_iova->pfn_lo) {
                 iovad->cached32_node = rb_next(&free->node);
+               iovad->max32_alloc_size = iovad->dma_32bit_pfn;
+       }
  
         cached_iova = rb_entry(iovad->cached_node, struct iova, node);
         if (free->pfn_lo >= cached_iova->pfn_lo)
@@ -190,6 +193,10 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
  
         /* Walk the tree backwards */
         spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+       if (limit_pfn <= iovad->dma_32bit_pfn &&
+                       size >= iovad->max32_alloc_size)
+               goto iova32_full;
+
         curr = __get_cached_rbnode(iovad, limit_pfn);
         curr_iova = rb_entry(curr, struct iova, node);
         do {
@@ -200,10 +207,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
                 curr_iova = rb_entry(curr, struct iova, node);
         } while (curr && new_pfn <= curr_iova->pfn_hi);
  
-       if (limit_pfn < size || new_pfn < iovad->start_pfn) {
-               spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-               return -ENOMEM;
-       }
+       if (limit_pfn < size || new_pfn < iovad->start_pfn)
+               goto iova32_full;
  
         /* pfn_lo will point to size aligned address if size_aligned is set */
         new->pfn_lo = new_pfn;
@@ -214,9 +219,12 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
         __cached_rbnode_insert_update(iovad, new);
  
         spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-
-
         return 0;
+
+iova32_full:
+       iovad->max32_alloc_size = size;
+       spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+       return -ENOMEM;
  }
  
  static struct kmem_cache *iova_cache;
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c

index 22b94f8..b98a031 100644 (file)
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * IPMMU VMSA
   *
   * Copyright (C) 2014 Renesas Electronics Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
   */
  
  #include <linux/bitmap.h>
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c

index f7787e7..c5dd630 100644 (file)
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -24,6 +24,7 @@
  #include <linux/of_iommu.h>
  #include <linux/of_pci.h>
  #include <linux/slab.h>
+#include <linux/fsl/mc.h>
  
  #define NO_IOMMU       1
  
@@ -132,9 +133,8 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
         struct of_phandle_args iommu_spec = { .args_count = 1 };
         int err;
  
-       err = of_pci_map_rid(info->np, alias, "iommu-map",
-                            "iommu-map-mask", &iommu_spec.np,
-                            iommu_spec.args);
+       err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask",
+                        &iommu_spec.np, iommu_spec.args);
         if (err)
                 return err == -ENODEV ? NO_IOMMU : err;
  
@@ -143,6 +143,23 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
         return err;
  }
  
+static int of_fsl_mc_iommu_init(struct fsl_mc_device *mc_dev,
+                               struct device_node *master_np)
+{
+       struct of_phandle_args iommu_spec = { .args_count = 1 };
+       int err;
+
+       err = of_map_rid(master_np, mc_dev->icid, "iommu-map",
+                        "iommu-map-mask", &iommu_spec.np,
+                        iommu_spec.args);
+       if (err)
+               return err == -ENODEV ? NO_IOMMU : err;
+
+       err = of_iommu_xlate(&mc_dev->dev, &iommu_spec);
+       of_node_put(iommu_spec.np);
+       return err;
+}
+
  const struct iommu_ops *of_iommu_configure(struct device *dev,
                                            struct device_node *master_np)
  {
@@ -174,6 +191,8 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
  
                 err = pci_for_each_dma_alias(to_pci_dev(dev),
                                              of_pci_iommu_init, &info);
+       } else if (dev_is_fsl_mc(dev)) {
+               err = of_fsl_mc_iommu_init(to_fsl_mc_device(dev), master_np);
         } else {
                 struct of_phandle_args iommu_spec;
                 int idx = 0;
diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c

index ca623e6..fca3164 100644 (file)
--- a/drivers/macintosh/adb-iop.c
+++ b/drivers/macintosh/adb-iop.c
@@ -20,13 +20,13 @@
  #include <linux/init.h>
  #include <linux/proc_fs.h>
  
-#include <asm/macintosh.h> 
-#include <asm/macints.h> 
+#include <asm/macintosh.h>
+#include <asm/macints.h>
  #include <asm/mac_iop.h>
  #include <asm/mac_oss.h>
  #include <asm/adb_iop.h>
  
-#include <linux/adb.h> 
+#include <linux/adb.h>
  
  /*#define DEBUG_ADB_IOP*/
  
@@ -38,9 +38,9 @@ static unsigned char *reply_ptr;
  #endif
  
  static enum adb_iop_state {
-    idle,
-    sending,
-    awaiting_reply
+       idle,
+       sending,
+       awaiting_reply
  } adb_iop_state;
  
  static void adb_iop_start(void);
@@ -66,7 +66,8 @@ static void adb_iop_end_req(struct adb_request *req, int state)
  {
         req->complete = 1;
         current_req = req->next;
-       if (req->done) (*req->done)(req);
+       if (req->done)
+               (*req->done)(req);
         adb_iop_state = state;
  }
  
@@ -100,7 +101,7 @@ static void adb_iop_complete(struct iop_msg *msg)
  
  static void adb_iop_listen(struct iop_msg *msg)
  {
-       struct adb_iopmsg *amsg = (struct adb_iopmsg *) msg->message;
+       struct adb_iopmsg *amsg = (struct adb_iopmsg *)msg->message;
         struct adb_request *req;
         unsigned long flags;
  #ifdef DEBUG_ADB_IOP
@@ -113,9 +114,9 @@ static void adb_iop_listen(struct iop_msg *msg)
  
  #ifdef DEBUG_ADB_IOP
         printk("adb_iop_listen %p: rcvd packet, %d bytes: %02X %02X", req,
-               (uint) amsg->count + 2, (uint) amsg->flags, (uint) amsg->cmd);
+              (uint)amsg->count + 2, (uint)amsg->flags, (uint)amsg->cmd);
         for (i = 0; i < amsg->count; i++)
-               printk(" %02X", (uint) amsg->data[i]);
+               printk(" %02X", (uint)amsg->data[i]);
         printk("\n");
  #endif
  
@@ -168,14 +169,15 @@ static void adb_iop_start(void)
  
         /* get the packet to send */
         req = current_req;
-       if (!req) return;
+       if (!req)
+               return;
  
         local_irq_save(flags);
  
  #ifdef DEBUG_ADB_IOP
         printk("adb_iop_start %p: sending packet, %d bytes:", req, req->nbytes);
-       for (i = 0 ; i < req->nbytes ; i++)
-               printk(" %02X", (uint) req->data[i]);
+       for (i = 0; i < req->nbytes; i++)
+               printk(" %02X", (uint)req->data[i]);
         printk("\n");
  #endif
  
@@ -196,19 +198,20 @@ static void adb_iop_start(void)
         /* Now send it. The IOP manager will call adb_iop_complete */
         /* when the packet has been sent.                          */
  
-       iop_send_message(ADB_IOP, ADB_CHAN, req,
-                        sizeof(amsg), (__u8 *) &amsg, adb_iop_complete);
+       iop_send_message(ADB_IOP, ADB_CHAN, req, sizeof(amsg), (__u8 *)&amsg,
+                        adb_iop_complete);
  }
  
  int adb_iop_probe(void)
  {
-       if (!iop_ism_present) return -ENODEV;
+       if (!iop_ism_present)
+               return -ENODEV;
         return 0;
  }
  
  int adb_iop_init(void)
  {
-       printk("adb: IOP ISM driver v0.4 for Unified ADB.\n");
+       pr_info("adb: IOP ISM driver v0.4 for Unified ADB\n");
         iop_listen(ADB_IOP, ADB_CHAN, adb_iop_listen, "ADB");
         return 0;
  }
@@ -218,10 +221,12 @@ int adb_iop_send_request(struct adb_request *req, int sync)
         int err;
  
         err = adb_iop_write(req);
-       if (err) return err;
+       if (err)
+               return err;
  
         if (sync) {
-               while (!req->complete) adb_iop_poll();
+               while (!req->complete)
+                       adb_iop_poll();
         }
         return 0;
  }
@@ -251,7 +256,9 @@ static int adb_iop_write(struct adb_request *req)
         }
  
         local_irq_restore(flags);
-       if (adb_iop_state == idle) adb_iop_start();
+
+       if (adb_iop_state == idle)
+               adb_iop_start();
         return 0;
  }
  
@@ -263,7 +270,8 @@ int adb_iop_autopoll(int devs)
  
  void adb_iop_poll(void)
  {
-       if (adb_iop_state == idle) adb_iop_start();
+       if (adb_iop_state == idle)
+               adb_iop_start();
         iop_ism_irq_poll(ADB_IOP);
  }
  
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c

index 76e98f0..e49d1f2 100644 (file)
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -203,15 +203,15 @@ static int adb_scan_bus(void)
         }
  
         /* Now fill in the handler_id field of the adb_handler entries. */
-       pr_debug("adb devices:\n");
         for (i = 1; i < 16; i++) {
                 if (adb_handler[i].original_address == 0)
                         continue;
                 adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1,
                             (i << 4) | 0xf);
                 adb_handler[i].handler_id = req.reply[2];
-               pr_debug(" [%d]: %d %x\n", i, adb_handler[i].original_address,
-                        adb_handler[i].handler_id);
+               printk(KERN_DEBUG "adb device [%d]: %d 0x%X\n", i,
+                      adb_handler[i].original_address,
+                      adb_handler[i].handler_id);
                 devmask |= 1 << i;
         }
         return devmask;
@@ -579,6 +579,8 @@ adb_try_handler_change(int address, int new_id)
         mutex_lock(&adb_handler_mutex);
         ret = try_handler_change(address, new_id);
         mutex_unlock(&adb_handler_mutex);
+       if (ret)
+               pr_debug("adb handler change: [%d] 0x%X\n", address, new_id);
         return ret;
  }
  EXPORT_SYMBOL(adb_try_handler_change);
diff --git a/drivers/macintosh/adbhid.c b/drivers/macintosh/adbhid.c

index a261892..75482ee 100644 (file)
--- a/drivers/macintosh/adbhid.c
+++ b/drivers/macintosh/adbhid.c
@@ -757,6 +757,7 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
         struct input_dev *input_dev;
         int err;
         int i;
+       char *keyboard_type;
  
         if (adbhid[id]) {
                 pr_err("Trying to reregister ADB HID on ID %d\n", id);
@@ -798,24 +799,23 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
  
                 memcpy(hid->keycode, adb_to_linux_keycodes, sizeof(adb_to_linux_keycodes));
  
-               pr_info("Detected ADB keyboard, type ");
                 switch (original_handler_id) {
                 default:
-                       pr_cont("<unknown>.\n");
+                       keyboard_type = "<unknown>";
                         input_dev->id.version = ADB_KEYBOARD_UNKNOWN;
                         break;
  
                 case 0x01: case 0x02: case 0x03: case 0x06: case 0x08:
                 case 0x0C: case 0x10: case 0x18: case 0x1B: case 0x1C:
                 case 0xC0: case 0xC3: case 0xC6:
-                       pr_cont("ANSI.\n");
+                       keyboard_type = "ANSI";
                         input_dev->id.version = ADB_KEYBOARD_ANSI;
                         break;
  
                 case 0x04: case 0x05: case 0x07: case 0x09: case 0x0D:
                 case 0x11: case 0x14: case 0x19: case 0x1D: case 0xC1:
                 case 0xC4: case 0xC7:
-                       pr_cont("ISO, swapping keys.\n");
+                       keyboard_type = "ISO, swapping keys";
                         input_dev->id.version = ADB_KEYBOARD_ISO;
                         i = hid->keycode[10];
                         hid->keycode[10] = hid->keycode[50];
@@ -824,10 +824,11 @@ adbhid_input_register(int id, int default_id, int original_handler_id,
  
                 case 0x12: case 0x15: case 0x16: case 0x17: case 0x1A:
                 case 0x1E: case 0xC2: case 0xC5: case 0xC8: case 0xC9:
-                       pr_cont("JIS.\n");
+                       keyboard_type = "JIS";
                         input_dev->id.version = ADB_KEYBOARD_JIS;
                         break;
                 }
+               pr_info("Detected ADB keyboard, type %s.\n", keyboard_type);
  
                 for (i = 0; i < 128; i++)
                         if (hid->keycode[i])
@@ -972,16 +973,13 @@ adbhid_probe(void)
                    ->get it to send separate codes for left and right shift,
                    control, option keys */
  #if 0          /* handler 5 doesn't send separate codes for R modifiers */
-               if (adb_try_handler_change(id, 5))
-                       printk("ADB keyboard at %d, handler set to 5\n", id);
-               else
+               if (!adb_try_handler_change(id, 5))
  #endif
-               if (adb_try_handler_change(id, 3))
-                       printk("ADB keyboard at %d, handler set to 3\n", id);
-               else
-                       printk("ADB keyboard at %d, handler 1\n", id);
+               adb_try_handler_change(id, 3);
  
                 adb_get_infos(id, &default_id, &cur_handler_id);
+               printk(KERN_DEBUG "ADB keyboard at %d has handler 0x%X\n",
+                      id, cur_handler_id);
                 reg |= adbhid_input_reregister(id, default_id, org_handler_id,
                                                cur_handler_id, 0);
         }
@@ -999,48 +997,44 @@ adbhid_probe(void)
         for (i = 0; i < mouse_ids.nids; i++) {
                 int id = mouse_ids.id[i];
                 int mouse_kind;
+               char *desc = "standard";
  
                 adb_get_infos(id, &default_id, &org_handler_id);
  
                 if (adb_try_handler_change(id, 4)) {
-                       printk("ADB mouse at %d, handler set to 4", id);
                         mouse_kind = ADBMOUSE_EXTENDED;
                 }
                 else if (adb_try_handler_change(id, 0x2F)) {
-                       printk("ADB mouse at %d, handler set to 0x2F", id);
                         mouse_kind = ADBMOUSE_MICROSPEED;
                 }
                 else if (adb_try_handler_change(id, 0x42)) {
-                       printk("ADB mouse at %d, handler set to 0x42", id);
                         mouse_kind = ADBMOUSE_TRACKBALLPRO;
                 }
                 else if (adb_try_handler_change(id, 0x66)) {
-                       printk("ADB mouse at %d, handler set to 0x66", id);
                         mouse_kind = ADBMOUSE_MICROSPEED;
                 }
                 else if (adb_try_handler_change(id, 0x5F)) {
-                       printk("ADB mouse at %d, handler set to 0x5F", id);
                         mouse_kind = ADBMOUSE_MICROSPEED;
                 }
                 else if (adb_try_handler_change(id, 3)) {
-                       printk("ADB mouse at %d, handler set to 3", id);
                         mouse_kind = ADBMOUSE_MS_A3;
                 }
                 else if (adb_try_handler_change(id, 2)) {
-                       printk("ADB mouse at %d, handler set to 2", id);
                         mouse_kind = ADBMOUSE_STANDARD_200;
                 }
                 else {
-                       printk("ADB mouse at %d, handler 1", id);
                         mouse_kind = ADBMOUSE_STANDARD_100;
                 }
  
                 if ((mouse_kind == ADBMOUSE_TRACKBALLPRO)
                     || (mouse_kind == ADBMOUSE_MICROSPEED)) {
+                       desc = "Microspeed/MacPoint or compatible";
                         init_microspeed(id);
                 } else if (mouse_kind == ADBMOUSE_MS_A3) {
+                       desc = "Mouse Systems A3 Mouse or compatible";
                         init_ms_a3(id);
                 } else if (mouse_kind ==  ADBMOUSE_EXTENDED) {
+                       desc = "extended";
                         /*
                          * Register 1 is usually used for device
                          * identification.  Here, we try to identify
@@ -1054,32 +1048,36 @@ adbhid_probe(void)
                             (req.reply[1] == 0x9a) && ((req.reply[2] == 0x21)
                                 || (req.reply[2] == 0x20))) {
                                 mouse_kind = ADBMOUSE_TRACKBALL;
+                               desc = "trackman/mouseman";
                                 init_trackball(id);
                         }
                         else if ((req.reply_len >= 4) &&
                             (req.reply[1] == 0x74) && (req.reply[2] == 0x70) &&
                             (req.reply[3] == 0x61) && (req.reply[4] == 0x64)) {
                                 mouse_kind = ADBMOUSE_TRACKPAD;
+                               desc = "trackpad";
                                 init_trackpad(id);
                         }
                         else if ((req.reply_len >= 4) &&
                             (req.reply[1] == 0x4b) && (req.reply[2] == 0x4d) &&
                             (req.reply[3] == 0x4c) && (req.reply[4] == 0x31)) {
                                 mouse_kind = ADBMOUSE_TURBOMOUSE5;
+                               desc = "TurboMouse 5";
                                 init_turbomouse(id);
                         }
                         else if ((req.reply_len == 9) &&
                             (req.reply[1] == 0x4b) && (req.reply[2] == 0x4f) &&
                             (req.reply[3] == 0x49) && (req.reply[4] == 0x54)) {
                                 if (adb_try_handler_change(id, 0x42)) {
-                                       pr_cont("\nADB MacAlly 2-button mouse at %d, handler set to 0x42", id);
                                         mouse_kind = ADBMOUSE_MACALLY2;
+                                       desc = "MacAlly 2-button";
                                 }
                         }
                 }
-               pr_cont("\n");
  
                 adb_get_infos(id, &default_id, &cur_handler_id);
+               printk(KERN_DEBUG "ADB mouse (%s) at %d has handler 0x%X\n",
+                      desc, id, cur_handler_id);
                 reg |= adbhid_input_reregister(id, default_id, org_handler_id,
                                                cur_handler_id, mouse_kind);
         }
@@ -1092,12 +1090,10 @@ init_trackpad(int id)
         struct adb_request req;
         unsigned char r1_buffer[8];
  
-       pr_cont(" (trackpad)");
-
         adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1,
                     ADB_READREG(id,1));
         if (req.reply_len < 8)
-           pr_cont("bad length for reg. 1\n");
+               pr_err("%s: bad length for reg. 1\n", __func__);
         else
         {
             memcpy(r1_buffer, &req.reply[1], 8);
@@ -1145,8 +1141,6 @@ init_trackball(int id)
  {
         struct adb_request req;
  
-       pr_cont(" (trackman/mouseman)");
-
         adb_request(&req, NULL, ADBREQ_SYNC, 3,
         ADB_WRITEREG(id,1), 00,0x81);
  
@@ -1177,8 +1171,6 @@ init_turbomouse(int id)
  {
         struct adb_request req;
  
-       pr_cont(" (TurboMouse 5)");
-
         adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(id));
  
         adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(3));
@@ -1213,8 +1205,6 @@ init_microspeed(int id)
  {
         struct adb_request req;
  
-       pr_cont(" (Microspeed/MacPoint or compatible)");
-
         adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(id));
  
         /* This will initialize mice using the Microspeed, MacPoint and
@@ -1253,7 +1243,6 @@ init_ms_a3(int id)
  {
         struct adb_request req;
  
-       pr_cont(" (Mouse Systems A3 Mouse, or compatible)");
         adb_request(&req, NULL, ADBREQ_SYNC, 3,
         ADB_WRITEREG(id, 0x2),
             0x00,
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c

index 0707482..17d3bc9 100644 (file)
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -360,9 +360,10 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
                                                struct macio_dev *in_bay,
                                                struct resource *parent_res)
  {
+       char name[MAX_NODE_NAME_SIZE + 1];
         struct macio_dev *dev;
         const u32 *reg;
-       
+
         if (np == NULL)
                 return NULL;
  
@@ -402,6 +403,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
  #endif
  
         /* MacIO itself has a different reg, we use it's PCI base */
+       snprintf(name, sizeof(name), "%pOFn", np);
         if (np == chip->of_node) {
                 dev_set_name(&dev->ofdev.dev, "%1d.%08x:%.*s",
                              chip->lbus.index,
@@ -410,12 +412,12 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
  #else
                         0, /* NuBus may want to do something better here */
  #endif
-                       MAX_NODE_NAME_SIZE, np->name);
+                       MAX_NODE_NAME_SIZE, name);
         } else {
                 reg = of_get_property(np, "reg", NULL);
                 dev_set_name(&dev->ofdev.dev, "%1d.%08x:%.*s",
                              chip->lbus.index,
-                            reg ? *reg : 0, MAX_NODE_NAME_SIZE, np->name);
+                            reg ? *reg : 0, MAX_NODE_NAME_SIZE, name);
         }
  
         /* Setup interrupts & resources */
diff --git a/drivers/macintosh/macio_sysfs.c b/drivers/macintosh/macio_sysfs.c

index ca4fcff..d2451e5 100644 (file)
--- a/drivers/macintosh/macio_sysfs.c
+++ b/drivers/macintosh/macio_sysfs.c
@@ -58,7 +58,13 @@ static ssize_t devspec_show(struct device *dev,
  static DEVICE_ATTR_RO(modalias);
  static DEVICE_ATTR_RO(devspec);
  
-macio_config_of_attr (name, "%s\n");
+static ssize_t name_show(struct device *dev,
+                        struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "%pOFn\n", dev->of_node);
+}
+static DEVICE_ATTR_RO(name);
+
  macio_config_of_attr (type, "%s\n");
  
  static struct attribute *macio_dev_attrs[] = {
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c

index 98dd702..bbec6ac 100644 (file)
--- a/drivers/macintosh/via-cuda.c
+++ b/drivers/macintosh/via-cuda.c
@@ -766,3 +766,38 @@ cuda_input(unsigned char *buf, int nb)
                        buf, nb, false);
      }
  }
+
+/* Offset between Unix time (1970-based) and Mac time (1904-based) */
+#define RTC_OFFSET     2082844800
+
+time64_t cuda_get_time(void)
+{
+       struct adb_request req;
+       u32 now;
+
+       if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
+               return 0;
+       while (!req.complete)
+               cuda_poll();
+       if (req.reply_len != 7)
+               pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+       now = (req.reply[3] << 24) + (req.reply[4] << 16) +
+             (req.reply[5] << 8) + req.reply[6];
+       return (time64_t)now - RTC_OFFSET;
+}
+
+int cuda_set_rtc_time(struct rtc_time *tm)
+{
+       u32 now;
+       struct adb_request req;
+
+       now = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
+       if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
+                        now >> 24, now >> 16, now >> 8, now) < 0)
+               return -ENXIO;
+       while (!req.complete)
+               cuda_poll();
+       if ((req.reply_len != 3) && (req.reply_len != 7))
+               pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+       return 0;
+}
diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c

index cf6f7d5..ac824d7 100644 (file)
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -12,7 +12,7 @@
   *
   * 1999-08-02 (jmt) - Initial rewrite for Unified ADB.
   * 2000-03-29 Tony Mantler <tonym@mac.linux-m68k.org>
- *                             - Big overhaul, should actually work now.
+ *            - Big overhaul, should actually work now.
   * 2006-12-31 Finn Thain - Another overhaul.
   *
   * Suggested reading:
@@ -23,7 +23,7 @@
   * Apple's "ADB Analyzer" bus sniffer is invaluable:
   *   ftp://ftp.apple.com/developer/Tool_Chest/Devices_-_Hardware/Apple_Desktop_Bus/
   */
- 
+
  #include <stdarg.h>
  #include <linux/types.h>
  #include <linux/errno.h>
@@ -77,7 +77,7 @@ static volatile unsigned char *via;
  #define ST_ODD         0x20            /* ADB state: odd data byte */
  #define ST_IDLE                0x30            /* ADB state: idle, nothing to send */
  
-static int  macii_init_via(void);
+static int macii_init_via(void);
  static void macii_start(void);
  static irqreturn_t macii_interrupt(int irq, void *arg);
  static void macii_queue_poll(void);
@@ -120,31 +120,15 @@ static int srq_asserted;     /* have to poll for the device that asserted it */
  static int command_byte;         /* the most recent command byte transmitted */
  static int autopoll_devs;      /* bits set are device addresses to be polled */
  
-/* Sanity check for request queue. Doesn't check for cycles. */
-static int request_is_queued(struct adb_request *req) {
-       struct adb_request *cur;
-       unsigned long flags;
-       local_irq_save(flags);
-       cur = current_req;
-       while (cur) {
-               if (cur == req) {
-                       local_irq_restore(flags);
-                       return 1;
-               }
-               cur = cur->next;
-       }
-       local_irq_restore(flags);
-       return 0;
-}
-
  /* Check for MacII style ADB */
  static int macii_probe(void)
  {
-       if (macintosh_config->adb_type != MAC_ADB_II) return -ENODEV;
+       if (macintosh_config->adb_type != MAC_ADB_II)
+               return -ENODEV;
  
         via = via1;
  
-       printk("adb: Mac II ADB Driver v1.0 for Unified ADB\n");
+       pr_info("adb: Mac II ADB Driver v1.0 for Unified ADB\n");
         return 0;
  }
  
@@ -153,15 +137,17 @@ int macii_init(void)
  {
         unsigned long flags;
         int err;
-       
+
         local_irq_save(flags);
-       
+
         err = macii_init_via();
-       if (err) goto out;
+       if (err)
+               goto out;
  
         err = request_irq(IRQ_MAC_ADB, macii_interrupt, 0, "ADB",
                           macii_interrupt);
-       if (err) goto out;
+       if (err)
+               goto out;
  
         macii_state = idle;
  out:
@@ -169,7 +155,7 @@ out:
         return err;
  }
  
-/* initialize the hardware */  
+/* initialize the hardware */
  static int macii_init_via(void)
  {
         unsigned char x;
@@ -179,7 +165,7 @@ static int macii_init_via(void)
  
         /* Set up state: idle */
         via[B] |= ST_IDLE;
-       last_status = via[B] & (ST_MASK|CTLR_IRQ);
+       last_status = via[B] & (ST_MASK | CTLR_IRQ);
  
         /* Shift register on input */
         via[ACR] = (via[ACR] & ~SR_CTRL) | SR_EXT;
@@ -205,7 +191,8 @@ static void macii_queue_poll(void)
         int next_device;
         static struct adb_request req;
  
-       if (!autopoll_devs) return;
+       if (!autopoll_devs)
+               return;
  
         device_mask = (1 << (((command_byte & 0xF0) >> 4) + 1)) - 1;
         if (autopoll_devs & ~device_mask)
@@ -213,10 +200,7 @@ static void macii_queue_poll(void)
         else
                 next_device = ffs(autopoll_devs) - 1;
  
-       BUG_ON(request_is_queued(&req));
-
-       adb_request(&req, NULL, ADBREQ_NOSEND, 1,
-                   ADB_READREG(next_device, 0));
+       adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_READREG(next_device, 0));
  
         req.sent = 0;
         req.complete = 0;
@@ -235,45 +219,47 @@ static void macii_queue_poll(void)
  static int macii_send_request(struct adb_request *req, int sync)
  {
         int err;
-       unsigned long flags;
  
-       BUG_ON(request_is_queued(req));
-
-       local_irq_save(flags);
         err = macii_write(req);
-       local_irq_restore(flags);
+       if (err)
+               return err;
  
-       if (!err && sync) {
-               while (!req->complete) {
+       if (sync)
+               while (!req->complete)
                         macii_poll();
-               }
-               BUG_ON(request_is_queued(req));
-       }
  
-       return err;
+       return 0;
  }
  
  /* Send an ADB request (append to request queue) */
  static int macii_write(struct adb_request *req)
  {
+       unsigned long flags;
+
         if (req->nbytes < 2 || req->data[0] != ADB_PACKET || req->nbytes > 15) {
                 req->complete = 1;
                 return -EINVAL;
         }
-       
+
         req->next = NULL;
         req->sent = 0;
         req->complete = 0;
         req->reply_len = 0;
  
+       local_irq_save(flags);
+
         if (current_req != NULL) {
                 last_req->next = req;
                 last_req = req;
         } else {
                 current_req = req;
                 last_req = req;
-               if (macii_state == idle) macii_start();
+               if (macii_state == idle)
+                       macii_start();
         }
+
+       local_irq_restore(flags);
+
         return 0;
  }
  
@@ -287,7 +273,8 @@ static int macii_autopoll(int devs)
         /* bit 1 == device 1, and so on. */
         autopoll_devs = devs & 0xFFFE;
  
-       if (!autopoll_devs) return 0;
+       if (!autopoll_devs)
+               return 0;
  
         local_irq_save(flags);
  
@@ -304,7 +291,8 @@ static int macii_autopoll(int devs)
         return err;
  }
  
-static inline int need_autopoll(void) {
+static inline int need_autopoll(void)
+{
         /* Was the last command Talk Reg 0
          * and is the target on the autopoll list?
          */
@@ -317,21 +305,17 @@ static inline int need_autopoll(void) {
  /* Prod the chip without interrupts */
  static void macii_poll(void)
  {
-       disable_irq(IRQ_MAC_ADB);
         macii_interrupt(0, NULL);
-       enable_irq(IRQ_MAC_ADB);
  }
  
  /* Reset the bus */
  static int macii_reset_bus(void)
  {
         static struct adb_request req;
-       
-       if (request_is_queued(&req))
-               return 0;
  
         /* Command = 0, Address = ignored */
-       adb_request(&req, NULL, 0, 1, ADB_BUSRESET);
+       adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_BUSRESET);
+       macii_send_request(&req, 1);
  
         /* Don't want any more requests during the Global Reset low time. */
         udelay(3000);
@@ -346,10 +330,6 @@ static void macii_start(void)
  
         req = current_req;
  
-       BUG_ON(req == NULL);
-
-       BUG_ON(macii_state != idle);
-
         /* Now send it. Be careful though, that first byte of the request
          * is actually ADB_PACKET; the real data begins at index 1!
          * And req->nbytes is the number of bytes of real data plus one.
@@ -375,7 +355,7 @@ static void macii_start(void)
   * to be activity on the ADB bus. The chip will poll to achieve this.
   *
   * The basic ADB state machine was left unchanged from the original MacII code
- * by Alan Cox, which was based on the CUDA driver for PowerMac. 
+ * by Alan Cox, which was based on the CUDA driver for PowerMac.
   * The syntax of the ADB status lines is totally different on MacII,
   * though. MacII uses the states Command -> Even -> Odd -> Even ->...-> Idle
   * for sending and Idle -> Even -> Odd -> Even ->...-> Idle for receiving.
@@ -387,164 +367,166 @@ static void macii_start(void)
  static irqreturn_t macii_interrupt(int irq, void *arg)
  {
         int x;
-       static int entered;
         struct adb_request *req;
+       unsigned long flags;
+
+       local_irq_save(flags);
  
         if (!arg) {
                 /* Clear the SR IRQ flag when polling. */
                 if (via[IFR] & SR_INT)
                         via[IFR] = SR_INT;
-               else
+               else {
+                       local_irq_restore(flags);
                         return IRQ_NONE;
+               }
         }
  
-       BUG_ON(entered++);
-
         last_status = status;
-       status = via[B] & (ST_MASK|CTLR_IRQ);
+       status = via[B] & (ST_MASK | CTLR_IRQ);
  
         switch (macii_state) {
-               case idle:
-                       if (reading_reply) {
-                               reply_ptr = current_req->reply;
-                       } else {
-                               BUG_ON(current_req != NULL);
-                               reply_ptr = reply_buf;
-                       }
+       case idle:
+               if (reading_reply) {
+                       reply_ptr = current_req->reply;
+               } else {
+                       WARN_ON(current_req);
+                       reply_ptr = reply_buf;
+               }
  
-                       x = via[SR];
+               x = via[SR];
  
-                       if ((status & CTLR_IRQ) && (x == 0xFF)) {
-                               /* Bus timeout without SRQ sequence:
-                                *     data is "FF" while CTLR_IRQ is "H"
-                                */
-                               reply_len = 0;
-                               srq_asserted = 0;
-                               macii_state = read_done;
-                       } else {
-                               macii_state = reading;
-                               *reply_ptr = x;
-                               reply_len = 1;
-                       }
+               if ((status & CTLR_IRQ) && (x == 0xFF)) {
+                       /* Bus timeout without SRQ sequence:
+                        *     data is "FF" while CTLR_IRQ is "H"
+                        */
+                       reply_len = 0;
+                       srq_asserted = 0;
+                       macii_state = read_done;
+               } else {
+                       macii_state = reading;
+                       *reply_ptr = x;
+                       reply_len = 1;
+               }
  
-                       /* set ADB state = even for first data byte */
-                       via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
-                       break;
+               /* set ADB state = even for first data byte */
+               via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
+               break;
  
-               case sending:
-                       req = current_req;
-                       if (data_index >= req->nbytes) {
-                               req->sent = 1;
-                               macii_state = idle;
-
-                               if (req->reply_expected) {
-                                       reading_reply = 1;
-                               } else {
-                                       req->complete = 1;
-                                       current_req = req->next;
-                                       if (req->done) (*req->done)(req);
-
-                                       if (current_req)
-                                               macii_start();
-                                       else
-                                               if (need_autopoll())
-                                                       macii_autopoll(autopoll_devs);
-                               }
+       case sending:
+               req = current_req;
+               if (data_index >= req->nbytes) {
+                       req->sent = 1;
+                       macii_state = idle;
  
-                               if (macii_state == idle) {
-                                       /* reset to shift in */
-                                       via[ACR] &= ~SR_OUT;
-                                       x = via[SR];
-                                       /* set ADB state idle - might get SRQ */
-                                       via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
-                               }
+                       if (req->reply_expected) {
+                               reading_reply = 1;
                         } else {
-                               via[SR] = req->data[data_index++];
-
-                               if ( (via[B] & ST_MASK) == ST_CMD ) {
-                                       /* just sent the command byte, set to EVEN */
-                                       via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
-                               } else {
-                                       /* invert state bits, toggle ODD/EVEN */
-                                       via[B] ^= ST_MASK;
-                               }
-                       }
-                       break;
-
-               case reading:
-                       x = via[SR];
-                       BUG_ON((status & ST_MASK) == ST_CMD ||
-                              (status & ST_MASK) == ST_IDLE);
-
-                       /* Bus timeout with SRQ sequence:
-                        *     data is "XX FF"      while CTLR_IRQ is "L L"
-                        * End of packet without SRQ sequence:
-                        *     data is "XX...YY 00" while CTLR_IRQ is "L...H L"
-                        * End of packet SRQ sequence:
-                        *     data is "XX...YY 00" while CTLR_IRQ is "L...L L"
-                        * (where XX is the first response byte and
-                        * YY is the last byte of valid response data.)
-                        */
+                               req->complete = 1;
+                               current_req = req->next;
+                               if (req->done)
+                                       (*req->done)(req);
  
-                       srq_asserted = 0;
-                       if (!(status & CTLR_IRQ)) {
-                               if (x == 0xFF) {
-                                       if (!(last_status & CTLR_IRQ)) {
-                                               macii_state = read_done;
-                                               reply_len = 0;
-                                               srq_asserted = 1;
-                                       }
-                               } else if (x == 0x00) {
-                                       macii_state = read_done;
-                                       if (!(last_status & CTLR_IRQ))
-                                               srq_asserted = 1;
-                               }
+                               if (current_req)
+                                       macii_start();
+                               else if (need_autopoll())
+                                       macii_autopoll(autopoll_devs);
                         }
  
-                       if (macii_state == reading) {
-                               BUG_ON(reply_len > 15);
-                               reply_ptr++;
-                               *reply_ptr = x;
-                               reply_len++;
+                       if (macii_state == idle) {
+                               /* reset to shift in */
+                               via[ACR] &= ~SR_OUT;
+                               x = via[SR];
+                               /* set ADB state idle - might get SRQ */
+                               via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
                         }
+               } else {
+                       via[SR] = req->data[data_index++];
  
-                       /* invert state bits, toggle ODD/EVEN */
-                       via[B] ^= ST_MASK;
-                       break;
+                       if ((via[B] & ST_MASK) == ST_CMD) {
+                               /* just sent the command byte, set to EVEN */
+                               via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
+                       } else {
+                               /* invert state bits, toggle ODD/EVEN */
+                               via[B] ^= ST_MASK;
+                       }
+               }
+               break;
  
-               case read_done:
-                       x = via[SR];
+       case reading:
+               x = via[SR];
+               WARN_ON((status & ST_MASK) == ST_CMD ||
+                       (status & ST_MASK) == ST_IDLE);
+
+               /* Bus timeout with SRQ sequence:
+                *     data is "XX FF"      while CTLR_IRQ is "L L"
+                * End of packet without SRQ sequence:
+                *     data is "XX...YY 00" while CTLR_IRQ is "L...H L"
+                * End of packet SRQ sequence:
+                *     data is "XX...YY 00" while CTLR_IRQ is "L...L L"
+                * (where XX is the first response byte and
+                * YY is the last byte of valid response data.)
+                */
  
-                       if (reading_reply) {
-                               reading_reply = 0;
-                               req = current_req;
-                               req->reply_len = reply_len;
-                               req->complete = 1;
-                               current_req = req->next;
-                               if (req->done) (*req->done)(req);
-                       } else if (reply_len && autopoll_devs)
-                               adb_input(reply_buf, reply_len, 0);
+               srq_asserted = 0;
+               if (!(status & CTLR_IRQ)) {
+                       if (x == 0xFF) {
+                               if (!(last_status & CTLR_IRQ)) {
+                                       macii_state = read_done;
+                                       reply_len = 0;
+                                       srq_asserted = 1;
+                               }
+                       } else if (x == 0x00) {
+                               macii_state = read_done;
+                               if (!(last_status & CTLR_IRQ))
+                                       srq_asserted = 1;
+                       }
+               }
  
-                       macii_state = idle;
+               if (macii_state == reading &&
+                   reply_len < ARRAY_SIZE(reply_buf)) {
+                       reply_ptr++;
+                       *reply_ptr = x;
+                       reply_len++;
+               }
  
-                       /* SRQ seen before, initiate poll now */
-                       if (srq_asserted)
-                               macii_queue_poll();
+               /* invert state bits, toggle ODD/EVEN */
+               via[B] ^= ST_MASK;
+               break;
  
-                       if (current_req)
-                               macii_start();
-                       else
-                               if (need_autopoll())
-                                       macii_autopoll(autopoll_devs);
+       case read_done:
+               x = via[SR];
  
-                       if (macii_state == idle)
-                               via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
-                       break;
+               if (reading_reply) {
+                       reading_reply = 0;
+                       req = current_req;
+                       req->reply_len = reply_len;
+                       req->complete = 1;
+                       current_req = req->next;
+                       if (req->done)
+                               (*req->done)(req);
+               } else if (reply_len && autopoll_devs)
+                       adb_input(reply_buf, reply_len, 0);
+
+               macii_state = idle;
+
+               /* SRQ seen before, initiate poll now */
+               if (srq_asserted)
+                       macii_queue_poll();
+
+               if (current_req)
+                       macii_start();
+               else if (need_autopoll())
+                       macii_autopoll(autopoll_devs);
+
+               if (macii_state == idle)
+                       via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
+               break;
  
-               default:
+       default:
                 break;
         }
  
-       entered--;
+       local_irq_restore(flags);
         return IRQ_HANDLED;
  }
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c

index d72c450..60f57e2 100644 (file)
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -1737,6 +1737,39 @@ pmu_enable_irled(int on)
         pmu_wait_complete(&req);
  }
  
+/* Offset between Unix time (1970-based) and Mac time (1904-based) */
+#define RTC_OFFSET     2082844800
+
+time64_t pmu_get_time(void)
+{
+       struct adb_request req;
+       u32 now;
+
+       if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
+               return 0;
+       pmu_wait_complete(&req);
+       if (req.reply_len != 4)
+               pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+       now = (req.reply[0] << 24) + (req.reply[1] << 16) +
+             (req.reply[2] << 8) + req.reply[3];
+       return (time64_t)now - RTC_OFFSET;
+}
+
+int pmu_set_rtc_time(struct rtc_time *tm)
+{
+       u32 now;
+       struct adb_request req;
+
+       now = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
+       if (pmu_request(&req, NULL, 5, PMU_SET_RTC,
+                       now >> 24, now >> 16, now >> 8, now) < 0)
+               return -ENXIO;
+       pmu_wait_complete(&req);
+       if (req.reply_len != 0)
+               pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+       return 0;
+}
+
  void
  pmu_restart(void)
  {
diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c

index d174c74..86d6546 100644 (file)
--- a/drivers/macintosh/windfarm_smu_controls.c
+++ b/drivers/macintosh/windfarm_smu_controls.c
@@ -277,7 +277,7 @@ static int __init smu_controls_init(void)
                 fct = smu_fan_create(fan, 0);
                 if (fct == NULL) {
                         printk(KERN_WARNING "windfarm: Failed to create SMU "
-                              "RPM fan %s\n", fan->name);
+                              "RPM fan %pOFn\n", fan);
                         continue;
                 }
                 list_add(&fct->link, &smu_fans);
@@ -296,7 +296,7 @@ static int __init smu_controls_init(void)
                 fct = smu_fan_create(fan, 1);
                 if (fct == NULL) {
                         printk(KERN_WARNING "windfarm: Failed to create SMU "
-                              "PWM fan %s\n", fan->name);
+                              "PWM fan %pOFn\n", fan);
                         continue;
                 }
                 list_add(&fct->link, &smu_fans);
diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c

index da7f4fc..a0f61eb 100644 (file)
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -22,14 +22,6 @@
  
  #define VERSION "1.0"
  
-#define DEBUG
-
-#ifdef DEBUG
-#define DBG(args...)   printk(args)
-#else
-#define DBG(args...)   do { } while(0)
-#endif
-
  /* If the cache is older than 800ms we'll refetch it */
  #define MAX_AGE                msecs_to_jiffies(800)
  
@@ -106,13 +98,10 @@ struct smu_sdbp_header *smu_sat_get_sdb_partition(unsigned int sat_id, int id,
                 buf[i+2] = data[3];
                 buf[i+3] = data[2];
         }
-#ifdef DEBUG
-       DBG(KERN_DEBUG "sat %d partition %x:", sat_id, id);
-       for (i = 0; i < len; ++i)
-               DBG(" %x", buf[i]);
-       DBG("\n");
-#endif
  
+       printk(KERN_DEBUG "sat %d partition %x:", sat_id, id);
+       print_hex_dump(KERN_DEBUG, "  ", DUMP_PREFIX_OFFSET,
+                      16, 1, buf, len, false);
         if (size)
                 *size = len;
         return (struct smu_sdbp_header *) buf;
@@ -132,13 +121,13 @@ static int wf_sat_read_cache(struct wf_sat *sat)
         if (err < 0)
                 return err;
         sat->last_read = jiffies;
+
  #ifdef LOTSA_DEBUG
         {
                 int i;
-               DBG(KERN_DEBUG "wf_sat_get: data is");
-               for (i = 0; i < 16; ++i)
-                       DBG(" %.2x", sat->cache[i]);
-               DBG("\n");
+               printk(KERN_DEBUG "wf_sat_get: data is");
+               print_hex_dump(KERN_DEBUG, "  ", DUMP_PREFIX_OFFSET,
+                              16, 1, sat->cache, 16, false);
         }
  #endif
         return 0;
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig

index 8b8c123..3db2225 100644 (file)
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -215,17 +215,6 @@ config BLK_DEV_DM
  
           If unsure, say N.
  
-config DM_MQ_DEFAULT
-       bool "request-based DM: use blk-mq I/O path by default"
-       depends on BLK_DEV_DM
-       ---help---
-         This option enables the blk-mq based I/O path for request-based
-         DM devices by default.  With the option the dm_mod.use_blk_mq
-         module/boot option defaults to Y, without it to N, but it can
-         still be overriden either way.
-
-         If unsure say N.
-
  config DM_DEBUG
         bool "Device mapper debugging support"
         depends on BLK_DEV_DM
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c

index 1b5b9ad..b61aac0 100644 (file)
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1200,7 +1200,7 @@ static void queue_demotion(struct smq_policy *mq)
         struct policy_work work;
         struct entry *e;
  
-       if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
+       if (WARN_ON_ONCE(!mq->migrations_allowed))
                 return;
  
         e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h

index 7d480c9..224d445 100644 (file)
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -112,18 +112,8 @@ struct mapped_device {
  
         struct dm_stats stats;
  
-       struct kthread_worker kworker;
-       struct task_struct *kworker_task;
-
-       /* for request-based merge heuristic in dm_request_fn() */
-       unsigned seq_rq_merge_deadline_usecs;
-       int last_rq_rw;
-       sector_t last_rq_pos;
-       ktime_t last_rq_start_time;
-
         /* for blk-mq request-based DM support */
         struct blk_mq_tag_set *tag_set;
-       bool use_blk_mq:1;
         bool init_tio_pdu:1;
  
         struct srcu_struct io_barrier;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c

index 0481223..b8eec51 100644 (file)
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2661,6 +2661,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
  static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
  {
         struct crypt_config *cc;
+       const char *devname = dm_table_device_name(ti->table);
         int key_size;
         unsigned int align_mask;
         unsigned long long tmpll;
@@ -2806,18 +2807,22 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
         }
  
         ret = -ENOMEM;
-       cc->io_queue = alloc_workqueue("kcryptd_io", WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+       cc->io_queue = alloc_workqueue("kcryptd_io/%s",
+                                      WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+                                      1, devname);
         if (!cc->io_queue) {
                 ti->error = "Couldn't create kcryptd io queue";
                 goto bad;
         }
  
         if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
-               cc->crypt_queue = alloc_workqueue("kcryptd", WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+               cc->crypt_queue = alloc_workqueue("kcryptd/%s",
+                                                 WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+                                                 1, devname);
         else
-               cc->crypt_queue = alloc_workqueue("kcryptd",
+               cc->crypt_queue = alloc_workqueue("kcryptd/%s",
                                                   WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
-                                                 num_online_cpus());
+                                                 num_online_cpus(), devname);
         if (!cc->crypt_queue) {
                 ti->error = "Couldn't create kcryptd queue";
                 goto bad;
@@ -2826,7 +2831,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
         spin_lock_init(&cc->write_thread_lock);
         cc->write_tree = RB_ROOT;
  
-       cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
+       cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write/%s", devname);
         if (IS_ERR(cc->write_thread)) {
                 ret = PTR_ERR(cc->write_thread);
                 cc->write_thread = NULL;
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c

index 32aabe2..3cb97fa 100644 (file)
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -315,10 +315,6 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
         if (bio_op(bio) == REQ_OP_ZONE_RESET)
                 goto map_bio;
  
-       /* We need to remap reported zones, so remember the BIO iter */
-       if (bio_op(bio) == REQ_OP_ZONE_REPORT)
-               goto map_bio;
-
         /* Are we alive ? */
         elapsed = (jiffies - fc->start_time) / HZ;
         if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
@@ -380,11 +376,6 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
         if (bio_op(bio) == REQ_OP_ZONE_RESET)
                 return DM_ENDIO_DONE;
  
-       if (bio_op(bio) == REQ_OP_ZONE_REPORT) {
-               dm_remap_zone_report(ti, bio, fc->start);
-               return DM_ENDIO_DONE;
-       }
-
         if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
                 if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
                     all_corrupt_bio_flags_match(bio, fc)) {
@@ -457,6 +448,26 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
         return 0;
  }
  
+#ifdef CONFIG_BLK_DEV_ZONED
+static int flakey_report_zones(struct dm_target *ti, sector_t sector,
+                              struct blk_zone *zones, unsigned int *nr_zones,
+                              gfp_t gfp_mask)
+{
+       struct flakey_c *fc = ti->private;
+       int ret;
+
+       /* Do report and remap it */
+       ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector),
+                                 zones, nr_zones, gfp_mask);
+       if (ret != 0)
+               return ret;
+
+       if (*nr_zones)
+               dm_remap_zone_report(ti, fc->start, zones, nr_zones);
+       return 0;
+}
+#endif
+
  static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
  {
         struct flakey_c *fc = ti->private;
@@ -469,6 +480,7 @@ static struct target_type flakey_target = {
         .version = {1, 5, 0},
  #ifdef CONFIG_BLK_DEV_ZONED
         .features = DM_TARGET_ZONED_HM,
+       .report_zones = flakey_report_zones,
  #endif
         .module = THIS_MODULE,
         .ctr    = flakey_ctr,
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c

index b810ea7..f666778 100644 (file)
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1720,8 +1720,7 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla
  }
  
  static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel,
-                      int ioctl_flags,
-                      struct dm_ioctl **param, int *param_flags)
+                      int ioctl_flags, struct dm_ioctl **param, int *param_flags)
  {
         struct dm_ioctl *dmi;
         int secure_data;
@@ -1762,18 +1761,13 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
  
         *param_flags |= DM_PARAMS_MALLOC;
  
-       if (copy_from_user(dmi, user, param_kernel->data_size))
-               goto bad;
+       /* Copy from param_kernel (which was already copied from user) */
+       memcpy(dmi, param_kernel, minimum_data_size);
  
-data_copied:
-       /*
-        * Abort if something changed the ioctl data while it was being copied.
-        */
-       if (dmi->data_size != param_kernel->data_size) {
-               DMERR("rejecting ioctl: data size modified while processing parameters");
+       if (copy_from_user(&dmi->data, (char __user *)user + minimum_data_size,
+                          param_kernel->data_size - minimum_data_size))
                 goto bad;
-       }
-
+data_copied:
         /* Wipe the user buffer so we do not return it to userspace */
         if (secure_data && clear_user(user, param_kernel->data_size))
                 goto bad;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c

index 2f7c44a..8d7ddee 100644 (file)
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -102,19 +102,6 @@ static int linear_map(struct dm_target *ti, struct bio *bio)
         return DM_MAPIO_REMAPPED;
  }
  
-#ifdef CONFIG_BLK_DEV_ZONED
-static int linear_end_io(struct dm_target *ti, struct bio *bio,
-                        blk_status_t *error)
-{
-       struct linear_c *lc = ti->private;
-
-       if (!*error && bio_op(bio) == REQ_OP_ZONE_REPORT)
-               dm_remap_zone_report(ti, bio, lc->start);
-
-       return DM_ENDIO_DONE;
-}
-#endif
-
  static void linear_status(struct dm_target *ti, status_type_t type,
                           unsigned status_flags, char *result, unsigned maxlen)
  {
@@ -148,6 +135,26 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
         return 0;
  }
  
+#ifdef CONFIG_BLK_DEV_ZONED
+static int linear_report_zones(struct dm_target *ti, sector_t sector,
+                              struct blk_zone *zones, unsigned int *nr_zones,
+                              gfp_t gfp_mask)
+{
+       struct linear_c *lc = (struct linear_c *) ti->private;
+       int ret;
+
+       /* Do report and remap it */
+       ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector),
+                                 zones, nr_zones, gfp_mask);
+       if (ret != 0)
+               return ret;
+
+       if (*nr_zones)
+               dm_remap_zone_report(ti, lc->start, zones, nr_zones);
+       return 0;
+}
+#endif
+
  static int linear_iterate_devices(struct dm_target *ti,
                                   iterate_devices_callout_fn fn, void *data)
  {
@@ -211,8 +218,8 @@ static struct target_type linear_target = {
         .name   = "linear",
         .version = {1, 4, 0},
  #ifdef CONFIG_BLK_DEV_ZONED
-       .end_io = linear_end_io,
         .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM,
+       .report_zones = linear_report_zones,
  #else
         .features = DM_TARGET_PASSES_INTEGRITY,
  #endif
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c

index 419362c..d6a6692 100644 (file)
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -203,14 +203,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
  static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
  {
         if (m->queue_mode == DM_TYPE_NONE) {
-               /*
-                * Default to request-based.
-                */
-               if (dm_use_blk_mq(dm_table_get_md(ti->table)))
-                       m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
-               else
-                       m->queue_mode = DM_TYPE_REQUEST_BASED;
-
+               m->queue_mode = DM_TYPE_REQUEST_BASED;
         } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
                 INIT_WORK(&m->process_queued_bios, process_queued_bios);
                 /*
@@ -537,10 +530,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
                  * get the queue busy feedback (via BLK_STS_RESOURCE),
                  * otherwise I/O merging can suffer.
                  */
-               if (q->mq_ops)
-                       return DM_MAPIO_REQUEUE;
-               else
-                       return DM_MAPIO_DELAY_REQUEUE;
+               return DM_MAPIO_REQUEUE;
         }
         clone->bio = clone->biotail = NULL;
         clone->rq_disk = bdev->bd_disk;
@@ -668,7 +658,7 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
  
  static void process_queued_io_list(struct multipath *m)
  {
-       if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
+       if (m->queue_mode == DM_TYPE_REQUEST_BASED)
                 dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
         else if (m->queue_mode == DM_TYPE_BIO_BASED)
                 queue_work(kmultipathd, &m->process_queued_bios);
@@ -1089,10 +1079,9 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
  
                         if (!strcasecmp(queue_mode_name, "bio"))
                                 m->queue_mode = DM_TYPE_BIO_BASED;
-                       else if (!strcasecmp(queue_mode_name, "rq"))
+                       else if (!strcasecmp(queue_mode_name, "rq") ||
+                                !strcasecmp(queue_mode_name, "mq"))
                                 m->queue_mode = DM_TYPE_REQUEST_BASED;
-                       else if (!strcasecmp(queue_mode_name, "mq"))
-                               m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
                         else {
                                 ti->error = "Unknown 'queue_mode' requested";
                                 r = -EINVAL;
@@ -1726,9 +1715,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
                         case DM_TYPE_BIO_BASED:
                                 DMEMIT("queue_mode bio ");
                                 break;
-                       case DM_TYPE_MQ_REQUEST_BASED:
-                               DMEMIT("queue_mode mq ");
-                               break;
                         default:
                                 WARN_ON_ONCE(true);
                                 break;
@@ -1972,7 +1958,7 @@ static int multipath_busy(struct dm_target *ti)
  
         /* no paths available, for blk-mq: rely on IO mapping to delay requeue */
         if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
-               return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
+               return (m->queue_mode != DM_TYPE_REQUEST_BASED);
  
         /* Guess which priority_group will be used at next mapping time */
         pg = READ_ONCE(m->current_pg);
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c

index c44925e..e1dd162 100644 (file)
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2475,7 +2475,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
         }
  
         /* Enable bitmap creation for RAID levels != 0 */
-       mddev->bitmap_info.offset = rt_is_raid0(rs->raid_type) ? 0 : to_sector(4096);
+       mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096);
         mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
  
         if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c

index 6e547b8..7cd36e4 100644 (file)
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -23,19 +23,6 @@ static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
  #define RESERVED_REQUEST_BASED_IOS     256
  static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
  
-static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT);
-
-bool dm_use_blk_mq_default(void)
-{
-       return use_blk_mq;
-}
-
-bool dm_use_blk_mq(struct mapped_device *md)
-{
-       return md->use_blk_mq;
-}
-EXPORT_SYMBOL_GPL(dm_use_blk_mq);
-
  unsigned dm_get_reserved_rq_based_ios(void)
  {
         return __dm_get_module_param(&reserved_rq_based_ios,
@@ -59,41 +46,13 @@ int dm_request_based(struct mapped_device *md)
         return queue_is_rq_based(md->queue);
  }
  
-static void dm_old_start_queue(struct request_queue *q)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       if (blk_queue_stopped(q))
-               blk_start_queue(q);
-       spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void dm_mq_start_queue(struct request_queue *q)
+void dm_start_queue(struct request_queue *q)
  {
         blk_mq_unquiesce_queue(q);
         blk_mq_kick_requeue_list(q);
  }
  
-void dm_start_queue(struct request_queue *q)
-{
-       if (!q->mq_ops)
-               dm_old_start_queue(q);
-       else
-               dm_mq_start_queue(q);
-}
-
-static void dm_old_stop_queue(struct request_queue *q)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       if (!blk_queue_stopped(q))
-               blk_stop_queue(q);
-       spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void dm_mq_stop_queue(struct request_queue *q)
+void dm_stop_queue(struct request_queue *q)
  {
         if (blk_mq_queue_stopped(q))
                 return;
@@ -101,14 +60,6 @@ static void dm_mq_stop_queue(struct request_queue *q)
         blk_mq_quiesce_queue(q);
  }
  
-void dm_stop_queue(struct request_queue *q)
-{
-       if (!q->mq_ops)
-               dm_old_stop_queue(q);
-       else
-               dm_mq_stop_queue(q);
-}
-
  /*
   * Partial completion handling for request-based dm
   */
@@ -179,27 +130,12 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
   */
  static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  {
-       struct request_queue *q = md->queue;
-       unsigned long flags;
-
         atomic_dec(&md->pending[rw]);
  
         /* nudge anyone waiting on suspend queue */
         if (!md_in_flight(md))
                 wake_up(&md->wait);
  
-       /*
-        * Run this off this callpath, as drivers could invoke end_io while
-        * inside their request_fn (and holding the queue lock). Calling
-        * back into ->request_fn() could deadlock attempting to grab the
-        * queue lock again.
-        */
-       if (!q->mq_ops && run_queue) {
-               spin_lock_irqsave(q->queue_lock, flags);
-               blk_run_queue_async(q);
-               spin_unlock_irqrestore(q->queue_lock, flags);
-       }
-
         /*
          * dm_put() must be at the end of this function. See the comment above
          */
@@ -222,27 +158,10 @@ static void dm_end_request(struct request *clone, blk_status_t error)
         tio->ti->type->release_clone_rq(clone);
  
         rq_end_stats(md, rq);
-       if (!rq->q->mq_ops)
-               blk_end_request_all(rq, error);
-       else
-               blk_mq_end_request(rq, error);
+       blk_mq_end_request(rq, error);
         rq_completed(md, rw, true);
  }
  
-/*
- * Requeue the original request of a clone.
- */
-static void dm_old_requeue_request(struct request *rq, unsigned long delay_ms)
-{
-       struct request_queue *q = rq->q;
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       blk_requeue_request(q, rq);
-       blk_delay_queue(q, delay_ms);
-       spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
  static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
  {
         blk_mq_delay_kick_requeue_list(q, msecs);
@@ -273,11 +192,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_
                 tio->ti->type->release_clone_rq(tio->clone);
         }
  
-       if (!rq->q->mq_ops)
-               dm_old_requeue_request(rq, delay_ms);
-       else
-               dm_mq_delay_requeue_request(rq, delay_ms);
-
+       dm_mq_delay_requeue_request(rq, delay_ms);
         rq_completed(md, rw, false);
  }
  
@@ -340,10 +255,7 @@ static void dm_softirq_done(struct request *rq)
  
                 rq_end_stats(md, rq);
                 rw = rq_data_dir(rq);
-               if (!rq->q->mq_ops)
-                       blk_end_request_all(rq, tio->error);
-               else
-                       blk_mq_end_request(rq, tio->error);
+               blk_mq_end_request(rq, tio->error);
                 rq_completed(md, rw, false);
                 return;
         }
@@ -363,17 +275,14 @@ static void dm_complete_request(struct request *rq, blk_status_t error)
         struct dm_rq_target_io *tio = tio_from_request(rq);
  
         tio->error = error;
-       if (!rq->q->mq_ops)
-               blk_complete_request(rq);
-       else
-               blk_mq_complete_request(rq);
+       blk_mq_complete_request(rq);
  }
  
  /*
   * Complete the not-mapped clone and the original request with the error status
   * through softirq context.
   * Target's rq_end_io() function isn't called.
- * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
+ * This may be used when the target's clone_and_map_rq() function fails.
   */
  static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
  {
@@ -381,21 +290,10 @@ static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
         dm_complete_request(rq, error);
  }
  
-/*
- * Called with the clone's queue lock held (in the case of .request_fn)
- */
  static void end_clone_request(struct request *clone, blk_status_t error)
  {
         struct dm_rq_target_io *tio = clone->end_io_data;
  
-       /*
-        * Actual request completion is done in a softirq context which doesn't
-        * hold the clone's queue lock.  Otherwise, deadlock could occur because:
-        *     - another request may be submitted by the upper level driver
-        *       of the stacking during the completion
-        *     - the submission which requires queue lock may be done
-        *       against this clone's queue
-        */
         dm_complete_request(tio->orig, error);
  }
  
@@ -446,8 +344,6 @@ static int setup_clone(struct request *clone, struct request *rq,
         return 0;
  }
  
-static void map_tio_request(struct kthread_work *work);
-
  static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
                      struct mapped_device *md)
  {
@@ -464,8 +360,6 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
          */
         if (!md->init_tio_pdu)
                 memset(&tio->info, 0, sizeof(tio->info));
-       if (md->kworker_task)
-               kthread_init_work(&tio->work, map_tio_request);
  }
  
  /*
@@ -504,10 +398,7 @@ check_again:
                         blk_rq_unprep_clone(clone);
                         tio->ti->type->release_clone_rq(clone);
                         tio->clone = NULL;
-                       if (!rq->q->mq_ops)
-                               r = DM_MAPIO_DELAY_REQUEUE;
-                       else
-                               r = DM_MAPIO_REQUEUE;
+                       r = DM_MAPIO_REQUEUE;
                         goto check_again;
                 }
                 break;
@@ -530,20 +421,23 @@ check_again:
         return r;
  }
  
+/* DEPRECATED: previously used for request-based merge heuristic in dm_request_fn() */
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
+{
+       return sprintf(buf, "%u\n", 0);
+}
+
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
+                                                    const char *buf, size_t count)
+{
+       return count;
+}
+
  static void dm_start_request(struct mapped_device *md, struct request *orig)
  {
-       if (!orig->q->mq_ops)
-               blk_start_request(orig);
-       else
-               blk_mq_start_request(orig);
+       blk_mq_start_request(orig);
         atomic_inc(&md->pending[rq_data_dir(orig)]);
  
-       if (md->seq_rq_merge_deadline_usecs) {
-               md->last_rq_pos = rq_end_sector(orig);
-               md->last_rq_rw = rq_data_dir(orig);
-               md->last_rq_start_time = ktime_get();
-       }
-
         if (unlikely(dm_stats_used(&md->stats))) {
                 struct dm_rq_target_io *tio = tio_from_request(orig);
                 tio->duration_jiffies = jiffies;
@@ -563,8 +457,10 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
         dm_get(md);
  }
  
-static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq)
+static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
+                             unsigned int hctx_idx, unsigned int numa_node)
  {
+       struct mapped_device *md = set->driver_data;
         struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
  
         /*
@@ -581,163 +477,6 @@ static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq)
         return 0;
  }
  
-static int dm_rq_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
-{
-       return __dm_rq_init_rq(q->rq_alloc_data, rq);
-}
-
-static void map_tio_request(struct kthread_work *work)
-{
-       struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
-
-       if (map_request(tio) == DM_MAPIO_REQUEUE)
-               dm_requeue_original_request(tio, false);
-}
-
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
-{
-       return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
-}
-
-#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
-
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
-                                                    const char *buf, size_t count)
-{
-       unsigned deadline;
-
-       if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
-               return count;
-
-       if (kstrtouint(buf, 10, &deadline))
-               return -EINVAL;
-
-       if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
-               deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
-
-       md->seq_rq_merge_deadline_usecs = deadline;
-
-       return count;
-}
-
-static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
-{
-       ktime_t kt_deadline;
-
-       if (!md->seq_rq_merge_deadline_usecs)
-               return false;
-
-       kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
-       kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
-
-       return !ktime_after(ktime_get(), kt_deadline);
-}
-
-/*
- * q->request_fn for old request-based dm.
- * Called with the queue lock held.
- */
-static void dm_old_request_fn(struct request_queue *q)
-{
-       struct mapped_device *md = q->queuedata;
-       struct dm_target *ti = md->immutable_target;
-       struct request *rq;
-       struct dm_rq_target_io *tio;
-       sector_t pos = 0;
-
-       if (unlikely(!ti)) {
-               int srcu_idx;
-               struct dm_table *map = dm_get_live_table(md, &srcu_idx);
-
-               if (unlikely(!map)) {
-                       dm_put_live_table(md, srcu_idx);
-                       return;
-               }
-               ti = dm_table_find_target(map, pos);
-               dm_put_live_table(md, srcu_idx);
-       }
-
-       /*
-        * For suspend, check blk_queue_stopped() and increment
-        * ->pending within a single queue_lock not to increment the
-        * number of in-flight I/Os after the queue is stopped in
-        * dm_suspend().
-        */
-       while (!blk_queue_stopped(q)) {
-               rq = blk_peek_request(q);
-               if (!rq)
-                       return;
-
-               /* always use block 0 to find the target for flushes for now */
-               pos = 0;
-               if (req_op(rq) != REQ_OP_FLUSH)
-                       pos = blk_rq_pos(rq);
-
-               if ((dm_old_request_peeked_before_merge_deadline(md) &&
-                    md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
-                    md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
-                   (ti->type->busy && ti->type->busy(ti))) {
-                       blk_delay_queue(q, 10);
-                       return;
-               }
-
-               dm_start_request(md, rq);
-
-               tio = tio_from_request(rq);
-               init_tio(tio, rq, md);
-               /* Establish tio->ti before queuing work (map_tio_request) */
-               tio->ti = ti;
-               kthread_queue_work(&md->kworker, &tio->work);
-               BUG_ON(!irqs_disabled());
-       }
-}
-
-/*
- * Fully initialize a .request_fn request-based queue.
- */
-int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t)
-{
-       struct dm_target *immutable_tgt;
-
-       /* Fully initialize the queue */
-       md->queue->cmd_size = sizeof(struct dm_rq_target_io);
-       md->queue->rq_alloc_data = md;
-       md->queue->request_fn = dm_old_request_fn;
-       md->queue->init_rq_fn = dm_rq_init_rq;
-
-       immutable_tgt = dm_table_get_immutable_target(t);
-       if (immutable_tgt && immutable_tgt->per_io_data_size) {
-               /* any target-specific per-io data is immediately after the tio */
-               md->queue->cmd_size += immutable_tgt->per_io_data_size;
-               md->init_tio_pdu = true;
-       }
-       if (blk_init_allocated_queue(md->queue) < 0)
-               return -EINVAL;
-
-       /* disable dm_old_request_fn's merge heuristic by default */
-       md->seq_rq_merge_deadline_usecs = 0;
-
-       blk_queue_softirq_done(md->queue, dm_softirq_done);
-
-       /* Initialize the request-based DM worker thread */
-       kthread_init_worker(&md->kworker);
-       md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
-                                      "kdmwork-%s", dm_device_name(md));
-       if (IS_ERR(md->kworker_task)) {
-               int error = PTR_ERR(md->kworker_task);
-               md->kworker_task = NULL;
-               return error;
-       }
-
-       return 0;
-}
-
-static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
-               unsigned int hctx_idx, unsigned int numa_node)
-{
-       return __dm_rq_init_rq(set->driver_data, rq);
-}
-
  static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
                           const struct blk_mq_queue_data *bd)
  {
@@ -790,11 +529,6 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
         struct dm_target *immutable_tgt;
         int err;
  
-       if (!dm_table_all_blk_mq_devices(t)) {
-               DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
-               return -EINVAL;
-       }
-
         md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
         if (!md->tag_set)
                 return -ENOMEM;
@@ -845,6 +579,8 @@ void dm_mq_cleanup_mapped_device(struct mapped_device *md)
  module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
  MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
  
+/* Unused, but preserved for userspace compatibility */
+static bool use_blk_mq = true;
  module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
  MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
  
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h

index f43c454..b392455 100644 (file)
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -46,10 +46,6 @@ struct dm_rq_clone_bio_info {
         struct bio clone;
  };
  
-bool dm_use_blk_mq_default(void);
-bool dm_use_blk_mq(struct mapped_device *md);
-
-int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t);
  int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
  void dm_mq_cleanup_mapped_device(struct mapped_device *md);
  
diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c

index c209b8a..a05fcd5 100644 (file)
--- a/drivers/md/dm-sysfs.c
+++ b/drivers/md/dm-sysfs.c
@@ -92,7 +92,8 @@ static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf)
  
  static ssize_t dm_attr_use_blk_mq_show(struct mapped_device *md, char *buf)
  {
-       sprintf(buf, "%d\n", dm_use_blk_mq(md));
+       /* Purely for userspace compatibility */
+       sprintf(buf, "%d\n", true);
  
         return strlen(buf);
  }
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c

index 3d0e2c1..9038c30 100644 (file)
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -47,7 +47,6 @@ struct dm_table {
  
         bool integrity_supported:1;
         bool singleton:1;
-       bool all_blk_mq:1;
         unsigned integrity_added:1;
  
         /*
@@ -872,8 +871,7 @@ static bool __table_type_bio_based(enum dm_queue_mode table_type)
  
  static bool __table_type_request_based(enum dm_queue_mode table_type)
  {
-       return (table_type == DM_TYPE_REQUEST_BASED ||
-               table_type == DM_TYPE_MQ_REQUEST_BASED);
+       return table_type == DM_TYPE_REQUEST_BASED;
  }
  
  void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
@@ -999,10 +997,6 @@ verify_bio_based:
  
         BUG_ON(!request_based); /* No targets in this table */
  
-       /*
-        * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
-        * having a compatible target use dm_table_set_type.
-        */
         t->type = DM_TYPE_REQUEST_BASED;
  
  verify_rq_based:
@@ -1022,11 +1016,9 @@ verify_rq_based:
                 int srcu_idx;
                 struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
  
-               /* inherit live table's type and all_blk_mq */
-               if (live_table) {
+               /* inherit live table's type */
+               if (live_table)
                         t->type = live_table->type;
-                       t->all_blk_mq = live_table->all_blk_mq;
-               }
                 dm_put_live_table(t->md, srcu_idx);
                 return 0;
         }
@@ -1046,17 +1038,10 @@ verify_rq_based:
                 DMERR("table load rejected: including non-request-stackable devices");
                 return -EINVAL;
         }
-       if (v.sq_count && v.mq_count) {
+       if (v.sq_count > 0) {
                 DMERR("table load rejected: not all devices are blk-mq request-stackable");
                 return -EINVAL;
         }
-       t->all_blk_mq = v.mq_count > 0;
-
-       if (!t->all_blk_mq &&
-           (t->type == DM_TYPE_MQ_REQUEST_BASED || t->type == DM_TYPE_NVME_BIO_BASED)) {
-               DMERR("table load rejected: all devices are not blk-mq request-stackable");
-               return -EINVAL;
-       }
  
         return 0;
  }
@@ -1105,11 +1090,6 @@ bool dm_table_request_based(struct dm_table *t)
         return __table_type_request_based(dm_table_get_type(t));
  }
  
-bool dm_table_all_blk_mq_devices(struct dm_table *t)
-{
-       return t->all_blk_mq;
-}
-
  static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
  {
         enum dm_queue_mode type = dm_table_get_type(t);
@@ -1937,6 +1917,16 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
          */
         if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
                 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+
+       /*
+        * For a zoned target, the number of zones should be updated for the
+        * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
+        * target, this is all that is needed. For a request based target, the
+        * queue zone bitmaps must also be updated.
+        * Use blk_revalidate_disk_zones() to handle this.
+        */
+       if (blk_queue_is_zoned(q))
+               blk_revalidate_disk_zones(t->md->disk);
  }
  
  unsigned int dm_table_get_num_targets(struct dm_table *t)
@@ -2079,26 +2069,24 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
  }
  EXPORT_SYMBOL(dm_table_get_md);
  
+const char *dm_table_device_name(struct dm_table *t)
+{
+       return dm_device_name(t->md);
+}
+EXPORT_SYMBOL_GPL(dm_table_device_name);
+
  void dm_table_run_md_queue_async(struct dm_table *t)
  {
         struct mapped_device *md;
         struct request_queue *queue;
-       unsigned long flags;
  
         if (!dm_table_request_based(t))
                 return;
  
         md = dm_table_get_md(t);
         queue = dm_get_md_queue(md);
-       if (queue) {
-               if (queue->mq_ops)
-                       blk_mq_run_hw_queues(queue, true);
-               else {
-                       spin_lock_irqsave(queue->queue_lock, flags);
-                       blk_run_queue_async(queue);
-                       spin_unlock_irqrestore(queue->queue_lock, flags);
-               }
-       }
+       if (queue)
+               blk_mq_run_hw_queues(queue, true);
  }
  EXPORT_SYMBOL(dm_table_run_md_queue_async);
  
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c

index aaf1ad4..0bd8d49 100644 (file)
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -325,7 +325,7 @@ struct thin_c {
          * Ensures the thin is not destroyed until the worker has finished
          * iterating the active_thins list.
          */
-       atomic_t refcount;
+       refcount_t refcount;
         struct completion can_destroy;
  };
  
@@ -4044,12 +4044,12 @@ static struct target_type pool_target = {
   *--------------------------------------------------------------*/
  static void thin_get(struct thin_c *tc)
  {
-       atomic_inc(&tc->refcount);
+       refcount_inc(&tc->refcount);
  }
  
  static void thin_put(struct thin_c *tc)
  {
-       if (atomic_dec_and_test(&tc->refcount))
+       if (refcount_dec_and_test(&tc->refcount))
                 complete(&tc->can_destroy);
  }
  
@@ -4193,7 +4193,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
                 r = -EINVAL;
                 goto bad;
         }
-       atomic_set(&tc->refcount, 1);
+       refcount_set(&tc->refcount, 1);
         init_completion(&tc->can_destroy);
         list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
         spin_unlock_irqrestore(&tc->pool->lock, flags);
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c

index 5f1f80d..2d50eec 100644 (file)
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -350,10 +350,7 @@ static struct wc_memory_superblock *sb(struct dm_writecache *wc)
  
  static struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e)
  {
-       if (is_power_of_2(sizeof(struct wc_entry)) && 0)
-               return &sb(wc)->entries[e - wc->entries];
-       else
-               return &sb(wc)->entries[e->index];
+       return &sb(wc)->entries[e->index];
  }
  
  static void *memory_data(struct dm_writecache *wc, struct wc_entry *e)
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c

index 9699549..fa68336 100644 (file)
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -99,7 +99,7 @@ struct dmz_mblock {
         struct rb_node          node;
         struct list_head        link;
         sector_t                no;
-       atomic_t                ref;
+       unsigned int            ref;
         unsigned long           state;
         struct page             *page;
         void                    *data;
@@ -296,7 +296,7 @@ static struct dmz_mblock *dmz_alloc_mblock(struct dmz_metadata *zmd,
  
         RB_CLEAR_NODE(&mblk->node);
         INIT_LIST_HEAD(&mblk->link);
-       atomic_set(&mblk->ref, 0);
+       mblk->ref = 0;
         mblk->state = 0;
         mblk->no = mblk_no;
         mblk->data = page_address(mblk->page);
@@ -339,10 +339,11 @@ static void dmz_insert_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
  }
  
  /*
- * Lookup a metadata block in the rbtree.
+ * Lookup a metadata block in the rbtree. If the block is found, increment
+ * its reference count.
   */
-static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd,
-                                           sector_t mblk_no)
+static struct dmz_mblock *dmz_get_mblock_fast(struct dmz_metadata *zmd,
+                                             sector_t mblk_no)
  {
         struct rb_root *root = &zmd->mblk_rbtree;
         struct rb_node *node = root->rb_node;
@@ -350,8 +351,17 @@ static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd,
  
         while (node) {
                 mblk = container_of(node, struct dmz_mblock, node);
-               if (mblk->no == mblk_no)
+               if (mblk->no == mblk_no) {
+                       /*
+                        * If this is the first reference to the block,
+                        * remove it from the LRU list.
+                        */
+                       mblk->ref++;
+                       if (mblk->ref == 1 &&
+                           !test_bit(DMZ_META_DIRTY, &mblk->state))
+                               list_del_init(&mblk->link);
                         return mblk;
+               }
                 node = (mblk->no < mblk_no) ? node->rb_left : node->rb_right;
         }
  
@@ -382,32 +392,47 @@ static void dmz_mblock_bio_end_io(struct bio *bio)
  }
  
  /*
- * Read a metadata block from disk.
+ * Read an uncached metadata block from disk and add it to the cache.
   */
-static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd,
-                                          sector_t mblk_no)
+static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
+                                             sector_t mblk_no)
  {
-       struct dmz_mblock *mblk;
+       struct dmz_mblock *mblk, *m;
         sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no;
         struct bio *bio;
  
-       /* Get block and insert it */
+       /* Get a new block and a BIO to read it */
         mblk = dmz_alloc_mblock(zmd, mblk_no);
         if (!mblk)
                 return NULL;
  
-       spin_lock(&zmd->mblk_lock);
-       atomic_inc(&mblk->ref);
-       set_bit(DMZ_META_READING, &mblk->state);
-       dmz_insert_mblock(zmd, mblk);
-       spin_unlock(&zmd->mblk_lock);
-
         bio = bio_alloc(GFP_NOIO, 1);
         if (!bio) {
                 dmz_free_mblock(zmd, mblk);
                 return NULL;
         }
  
+       spin_lock(&zmd->mblk_lock);
+
+       /*
+        * Make sure that another context did not start reading
+        * the block already.
+        */
+       m = dmz_get_mblock_fast(zmd, mblk_no);
+       if (m) {
+               spin_unlock(&zmd->mblk_lock);
+               dmz_free_mblock(zmd, mblk);
+               bio_put(bio);
+               return m;
+       }
+
+       mblk->ref++;
+       set_bit(DMZ_META_READING, &mblk->state);
+       dmz_insert_mblock(zmd, mblk);
+
+       spin_unlock(&zmd->mblk_lock);
+
+       /* Submit read BIO */
         bio->bi_iter.bi_sector = dmz_blk2sect(block);
         bio_set_dev(bio, zmd->dev->bdev);
         bio->bi_private = mblk;
@@ -484,7 +509,8 @@ static void dmz_release_mblock(struct dmz_metadata *zmd,
  
         spin_lock(&zmd->mblk_lock);
  
-       if (atomic_dec_and_test(&mblk->ref)) {
+       mblk->ref--;
+       if (mblk->ref == 0) {
                 if (test_bit(DMZ_META_ERROR, &mblk->state)) {
                         rb_erase(&mblk->node, &zmd->mblk_rbtree);
                         dmz_free_mblock(zmd, mblk);
@@ -508,18 +534,12 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
  
         /* Check rbtree */
         spin_lock(&zmd->mblk_lock);
-       mblk = dmz_lookup_mblock(zmd, mblk_no);
-       if (mblk) {
-               /* Cache hit: remove block from LRU list */
-               if (atomic_inc_return(&mblk->ref) == 1 &&
-                   !test_bit(DMZ_META_DIRTY, &mblk->state))
-                       list_del_init(&mblk->link);
-       }
+       mblk = dmz_get_mblock_fast(zmd, mblk_no);
         spin_unlock(&zmd->mblk_lock);
  
         if (!mblk) {
                 /* Cache miss: read the block from disk */
-               mblk = dmz_fetch_mblock(zmd, mblk_no);
+               mblk = dmz_get_mblock_slow(zmd, mblk_no);
                 if (!mblk)
                         return ERR_PTR(-ENOMEM);
         }
@@ -753,7 +773,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
  
                 spin_lock(&zmd->mblk_lock);
                 clear_bit(DMZ_META_DIRTY, &mblk->state);
-               if (atomic_read(&mblk->ref) == 0)
+               if (mblk->ref == 0)
                         list_add_tail(&mblk->link, &zmd->mblk_lru_list);
                 spin_unlock(&zmd->mblk_lock);
         }
@@ -2308,7 +2328,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd)
                 mblk = list_first_entry(&zmd->mblk_dirty_list,
                                         struct dmz_mblock, link);
                 dmz_dev_warn(zmd->dev, "mblock %llu still in dirty list (ref %u)",
-                            (u64)mblk->no, atomic_read(&mblk->ref));
+                            (u64)mblk->no, mblk->ref);
                 list_del_init(&mblk->link);
                 rb_erase(&mblk->node, &zmd->mblk_rbtree);
                 dmz_free_mblock(zmd, mblk);
@@ -2326,8 +2346,8 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd)
         root = &zmd->mblk_rbtree;
         rbtree_postorder_for_each_entry_safe(mblk, next, root, node) {
                 dmz_dev_warn(zmd->dev, "mblock %llu ref %u still in rbtree",
-                            (u64)mblk->no, atomic_read(&mblk->ref));
-               atomic_set(&mblk->ref, 0);
+                            (u64)mblk->no, mblk->ref);
+               mblk->ref = 0;
                 dmz_free_mblock(zmd, mblk);
         }
  
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c

index a44183f..981154e 100644 (file)
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -19,7 +19,7 @@ struct dmz_bioctx {
         struct dmz_target       *target;
         struct dm_zone          *zone;
         struct bio              *bio;
-       atomic_t                ref;
+       refcount_t              ref;
         blk_status_t            status;
  };
  
@@ -28,7 +28,7 @@ struct dmz_bioctx {
   */
  struct dm_chunk_work {
         struct work_struct      work;
-       atomic_t                refcount;
+       refcount_t              refcount;
         struct dmz_target       *target;
         unsigned int            chunk;
         struct bio_list         bio_list;
@@ -115,7 +115,7 @@ static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
         if (nr_blocks == dmz_bio_blocks(bio)) {
                 /* Setup and submit the BIO */
                 bio->bi_iter.bi_sector = sector;
-               atomic_inc(&bioctx->ref);
+               refcount_inc(&bioctx->ref);
                 generic_make_request(bio);
                 return 0;
         }
@@ -134,7 +134,7 @@ static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
         bio_advance(bio, clone->bi_iter.bi_size);
  
         /* Submit the clone */
-       atomic_inc(&bioctx->ref);
+       refcount_inc(&bioctx->ref);
         generic_make_request(clone);
  
         return 0;
@@ -240,7 +240,7 @@ static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
         /* Setup and submit the BIO */
         bio_set_dev(bio, dmz->dev->bdev);
         bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
-       atomic_inc(&bioctx->ref);
+       refcount_inc(&bioctx->ref);
         generic_make_request(bio);
  
         if (dmz_is_seq(zone))
@@ -456,7 +456,7 @@ out:
   */
  static inline void dmz_get_chunk_work(struct dm_chunk_work *cw)
  {
-       atomic_inc(&cw->refcount);
+       refcount_inc(&cw->refcount);
  }
  
  /*
@@ -465,7 +465,7 @@ static inline void dmz_get_chunk_work(struct dm_chunk_work *cw)
   */
  static void dmz_put_chunk_work(struct dm_chunk_work *cw)
  {
-       if (atomic_dec_and_test(&cw->refcount)) {
+       if (refcount_dec_and_test(&cw->refcount)) {
                 WARN_ON(!bio_list_empty(&cw->bio_list));
                 radix_tree_delete(&cw->target->chunk_rxtree, cw->chunk);
                 kfree(cw);
@@ -546,7 +546,7 @@ static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
                         goto out;
  
                 INIT_WORK(&cw->work, dmz_chunk_work);
-               atomic_set(&cw->refcount, 0);
+               refcount_set(&cw->refcount, 0);
                 cw->target = dmz;
                 cw->chunk = chunk;
                 bio_list_init(&cw->bio_list);
@@ -599,7 +599,7 @@ static int dmz_map(struct dm_target *ti, struct bio *bio)
         bioctx->target = dmz;
         bioctx->zone = NULL;
         bioctx->bio = bio;
-       atomic_set(&bioctx->ref, 1);
+       refcount_set(&bioctx->ref, 1);
         bioctx->status = BLK_STS_OK;
  
         /* Set the BIO pending in the flush list */
@@ -633,7 +633,7 @@ static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error
         if (bioctx->status == BLK_STS_OK && *error)
                 bioctx->status = *error;
  
-       if (!atomic_dec_and_test(&bioctx->ref))
+       if (!refcount_dec_and_test(&bioctx->ref))
                 return DM_ENDIO_INCOMPLETE;
  
         /* Done */
@@ -702,8 +702,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
         dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
         dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks);
  
-       dev->nr_zones = (dev->capacity + dev->zone_nr_sectors - 1)
-               >> dev->zone_nr_sectors_shift;
+       dev->nr_zones = blkdev_nr_zones(dev->bdev);
  
         dmz->dev = dev;
  
diff --git a/drivers/md/dm.c b/drivers/md/dm.c

index 45abb54..c510179 100644 (file)
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -458,6 +458,57 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
         return dm_get_geometry(md, geo);
  }
  
+static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
+                              struct blk_zone *zones, unsigned int *nr_zones,
+                              gfp_t gfp_mask)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+       struct mapped_device *md = disk->private_data;
+       struct dm_target *tgt;
+       struct dm_table *map;
+       int srcu_idx, ret;
+
+       if (dm_suspended_md(md))
+               return -EAGAIN;
+
+       map = dm_get_live_table(md, &srcu_idx);
+       if (!map)
+               return -EIO;
+
+       tgt = dm_table_find_target(map, sector);
+       if (!dm_target_is_valid(tgt)) {
+               ret = -EIO;
+               goto out;
+       }
+
+       /*
+        * If we are executing this, we already know that the block device
+        * is a zoned device and so each target should have support for that
+        * type of drive. A missing report_zones method means that the target
+        * driver has a problem.
+        */
+       if (WARN_ON(!tgt->type->report_zones)) {
+               ret = -EIO;
+               goto out;
+       }
+
+       /*
+        * blkdev_report_zones() will loop and call this again to cover all the
+        * zones of the target, eventually moving on to the next target.
+        * So there is no need to loop here trying to fill the entire array
+        * of zones.
+        */
+       ret = tgt->type->report_zones(tgt, sector, zones,
+                                     nr_zones, gfp_mask);
+
+out:
+       dm_put_live_table(md, srcu_idx);
+       return ret;
+#else
+       return -ENOTSUPP;
+#endif
+}
+
  static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
                             struct block_device **bdev)
         __acquires(md->io_barrier)
@@ -1155,93 +1206,49 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
  EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
  
  /*
- * The zone descriptors obtained with a zone report indicate zone positions
- * within the target backing device, regardless of that device is a partition
- * and regardless of the target mapping start sector on the device or partition.
- * The zone descriptors start sector and write pointer position must be adjusted
- * to match their relative position within the dm device.
- * A target may call dm_remap_zone_report() after completion of a
- * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained from the
- * backing device.
+ * The zone descriptors obtained with a zone report indicate
+ * zone positions within the underlying device of the target. The zone
+ * descriptors must be remapped to match their position within the dm device.
+ * The caller target should obtain the zones information using
+ * blkdev_report_zones() to ensure that remapping for partition offset is
+ * already handled.
   */
-void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start)
+void dm_remap_zone_report(struct dm_target *ti, sector_t start,
+                         struct blk_zone *zones, unsigned int *nr_zones)
  {
  #ifdef CONFIG_BLK_DEV_ZONED
-       struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
-       struct bio *report_bio = tio->io->orig_bio;
-       struct blk_zone_report_hdr *hdr = NULL;
         struct blk_zone *zone;
-       unsigned int nr_rep = 0;
-       unsigned int ofst;
-       sector_t part_offset;
-       struct bio_vec bvec;
-       struct bvec_iter iter;
-       void *addr;
-
-       if (bio->bi_status)
-               return;
-
-       /*
-        * bio sector was incremented by the request size on completion. Taking
-        * into account the original request sector, the target start offset on
-        * the backing device and the target mapping offset (ti->begin), the
-        * start sector of the backing device. The partition offset is always 0
-        * if the target uses a whole device.
-        */
-       part_offset = bio->bi_iter.bi_sector + ti->begin - (start + bio_end_sector(report_bio));
+       unsigned int nrz = *nr_zones;
+       int i;
  
         /*
-        * Remap the start sector of the reported zones. For sequential zones,
-        * also remap the write pointer position.
+        * Remap the start sector and write pointer position of the zones in
+        * the array. Since we may have obtained from the target underlying
+        * device more zones that the target size, also adjust the number
+        * of zones.
          */
-       bio_for_each_segment(bvec, report_bio, iter) {
-               addr = kmap_atomic(bvec.bv_page);
-
-               /* Remember the report header in the first page */
-               if (!hdr) {
-                       hdr = addr;
-                       ofst = sizeof(struct blk_zone_report_hdr);
-               } else
-                       ofst = 0;
-
-               /* Set zones start sector */
-               while (hdr->nr_zones && ofst < bvec.bv_len) {
-                       zone = addr + ofst;
-                       zone->start -= part_offset;
-                       if (zone->start >= start + ti->len) {
-                               hdr->nr_zones = 0;
-                               break;
-                       }
-                       zone->start = zone->start + ti->begin - start;
-                       if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
-                               if (zone->cond == BLK_ZONE_COND_FULL)
-                                       zone->wp = zone->start + zone->len;
-                               else if (zone->cond == BLK_ZONE_COND_EMPTY)
-                                       zone->wp = zone->start;
-                               else
-                                       zone->wp = zone->wp + ti->begin - start - part_offset;
-                       }
-                       ofst += sizeof(struct blk_zone);
-                       hdr->nr_zones--;
-                       nr_rep++;
+       for (i = 0; i < nrz; i++) {
+               zone = zones + i;
+               if (zone->start >= start + ti->len) {
+                       memset(zone, 0, sizeof(struct blk_zone) * (nrz - i));
+                       break;
                 }
  
-               if (addr != hdr)
-                       kunmap_atomic(addr);
-
-               if (!hdr->nr_zones)
-                       break;
-       }
+               zone->start = zone->start + ti->begin - start;
+               if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+                       continue;
  
-       if (hdr) {
-               hdr->nr_zones = nr_rep;
-               kunmap_atomic(hdr);
+               if (zone->cond == BLK_ZONE_COND_FULL)
+                       zone->wp = zone->start + zone->len;
+               else if (zone->cond == BLK_ZONE_COND_EMPTY)
+                       zone->wp = zone->start;
+               else
+                       zone->wp = zone->wp + ti->begin - start;
         }
  
-       bio_advance(report_bio, report_bio->bi_iter.bi_size);
-
+       *nr_zones = i;
  #else /* !CONFIG_BLK_DEV_ZONED */
-       bio->bi_status = BLK_STS_NOTSUPP;
+       *nr_zones = 0;
  #endif
  }
  EXPORT_SYMBOL_GPL(dm_remap_zone_report);
@@ -1327,8 +1334,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
                         return r;
         }
  
-       if (bio_op(bio) != REQ_OP_ZONE_REPORT)
-               bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
+       bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
         clone->bi_iter.bi_size = to_bytes(len);
  
         if (unlikely(bio_integrity(bio) != NULL))
@@ -1541,7 +1547,6 @@ static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
   */
  static int __split_and_process_non_flush(struct clone_info *ci)
  {
-       struct bio *bio = ci->bio;
         struct dm_target *ti;
         unsigned len;
         int r;
@@ -1553,11 +1558,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
         if (unlikely(__process_abnormal_io(ci, ti, &r)))
                 return r;
  
-       if (bio_op(bio) == REQ_OP_ZONE_REPORT)
-               len = ci->sector_count;
-       else
-               len = min_t(sector_t, max_io_len(ci->sector, ti),
-                           ci->sector_count);
+       len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
  
         r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
         if (r < 0)
@@ -1616,9 +1617,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
                                  * We take a clone of the original to store in
                                  * ci.io->orig_bio to be used by end_io_acct() and
                                  * for dec_pending to use for completion handling.
-                                * As this path is not used for REQ_OP_ZONE_REPORT,
-                                * the usage of io->orig_bio in dm_remap_zone_report()
-                                * won't be affected by this reassignment.
                                  */
                                 struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
                                                           GFP_NOIO, &md->queue->bio_split);
@@ -1666,7 +1664,7 @@ static blk_qc_t __process_bio(struct mapped_device *md,
                  * Defend against IO still getting in during teardown
                  * - as was seen for a time with nvme-fcloop
                  */
-               if (unlikely(WARN_ON_ONCE(!ti || !dm_target_is_valid(ti)))) {
+               if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) {
                         error = -EIO;
                         goto out;
                 }
@@ -1808,8 +1806,6 @@ static void dm_wq_work(struct work_struct *work);
  
  static void dm_init_normal_md_queue(struct mapped_device *md)
  {
-       md->use_blk_mq = false;
-
         /*
          * Initialize aspects of queue that aren't relevant for blk-mq
          */
@@ -1820,8 +1816,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
  {
         if (md->wq)
                 destroy_workqueue(md->wq);
-       if (md->kworker_task)
-               kthread_stop(md->kworker_task);
         bioset_exit(&md->bs);
         bioset_exit(&md->io_bs);
  
@@ -1888,7 +1882,6 @@ static struct mapped_device *alloc_dev(int minor)
                 goto bad_io_barrier;
  
         md->numa_node_id = numa_node_id;
-       md->use_blk_mq = dm_use_blk_mq_default();
         md->init_tio_pdu = false;
         md->type = DM_TYPE_NONE;
         mutex_init(&md->suspend_lock);
@@ -1919,7 +1912,6 @@ static struct mapped_device *alloc_dev(int minor)
         INIT_WORK(&md->work, dm_wq_work);
         init_waitqueue_head(&md->eventq);
         init_completion(&md->kobj_holder.completion);
-       md->kworker_task = NULL;
  
         md->disk->major = _major;
         md->disk->first_minor = minor;
@@ -2219,14 +2211,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
  
         switch (type) {
         case DM_TYPE_REQUEST_BASED:
-               dm_init_normal_md_queue(md);
-               r = dm_old_init_request_queue(md, t);
-               if (r) {
-                       DMERR("Cannot initialize queue for request-based mapped device");
-                       return r;
-               }
-               break;
-       case DM_TYPE_MQ_REQUEST_BASED:
                 r = dm_mq_init_request_queue(md, t);
                 if (r) {
                         DMERR("Cannot initialize queue for request-based dm-mq mapped device");
@@ -2331,9 +2315,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
  
         blk_set_queue_dying(md->queue);
  
-       if (dm_request_based(md) && md->kworker_task)
-               kthread_flush_worker(&md->kworker);
-
         /*
          * Take suspend_lock so that presuspend and postsuspend methods
          * do not race with internal suspend.
@@ -2586,11 +2567,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
          * Stop md->queue before flushing md->wq in case request-based
          * dm defers requests to md->wq from md->queue.
          */
-       if (dm_request_based(md)) {
+       if (dm_request_based(md))
                 dm_stop_queue(md->queue);
-               if (md->kworker_task)
-                       kthread_flush_worker(&md->kworker);
-       }
  
         flush_workqueue(md->wq);
  
@@ -2965,7 +2943,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
                         goto out;
                 break;
         case DM_TYPE_REQUEST_BASED:
-       case DM_TYPE_MQ_REQUEST_BASED:
                 pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
                 front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
                 /* per_io_data_size is used for blk-mq pdu at queue allocation */
@@ -3167,6 +3144,7 @@ static const struct block_device_operations dm_blk_dops = {
         .release = dm_blk_close,
         .ioctl = dm_blk_ioctl,
         .getgeo = dm_blk_getgeo,
+       .report_zones = dm_blk_report_zones,
         .pr_ops = &dm_pr_ops,
         .owner = THIS_MODULE
  };
diff --git a/drivers/md/dm.h b/drivers/md/dm.h

index 114a81b..2d539b8 100644 (file)
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -70,7 +70,6 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
  struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
  bool dm_table_bio_based(struct dm_table *t);
  bool dm_table_request_based(struct dm_table *t);
-bool dm_table_all_blk_mq_devices(struct dm_table *t);
  void dm_table_free_md_mempools(struct dm_table *t);
  struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
  
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c

index 2fc8c11..1cd4f99 100644 (file)
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2288,9 +2288,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                         goto out;
                 }
                 if (mddev->pers) {
-                       mddev->pers->quiesce(mddev, 1);
+                       mddev_suspend(mddev);
                         md_bitmap_destroy(mddev);
-                       mddev->pers->quiesce(mddev, 0);
+                       mddev_resume(mddev);
                 }
                 mddev->bitmap_info.offset = 0;
                 if (mddev->bitmap_info.file) {
@@ -2327,8 +2327,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                         mddev->bitmap_info.offset = offset;
                         if (mddev->pers) {
                                 struct bitmap *bitmap;
-                               mddev->pers->quiesce(mddev, 1);
                                 bitmap = md_bitmap_create(mddev, -1);
+                               mddev_suspend(mddev);
                                 if (IS_ERR(bitmap))
                                         rv = PTR_ERR(bitmap);
                                 else {
@@ -2337,11 +2337,12 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                                         if (rv)
                                                 mddev->bitmap_info.offset = 0;
                                 }
-                               mddev->pers->quiesce(mddev, 0);
                                 if (rv) {
                                         md_bitmap_destroy(mddev);
+                                       mddev_resume(mddev);
                                         goto out;
                                 }
+                               mddev_resume(mddev);
                         }
                 }
         }
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c

index 0b2af6e..8dff19d 100644 (file)
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -33,13 +33,6 @@ struct dlm_lock_resource {
         int mode;
  };
  
-struct suspend_info {
-       int slot;
-       sector_t lo;
-       sector_t hi;
-       struct list_head list;
-};
-
  struct resync_info {
         __le64 lo;
         __le64 hi;
@@ -80,7 +73,13 @@ struct md_cluster_info {
         struct dlm_lock_resource **other_bitmap_lockres;
         struct dlm_lock_resource *resync_lockres;
         struct list_head suspend_list;
+
         spinlock_t suspend_lock;
+       /* record the region which write should be suspended */
+       sector_t suspend_lo;
+       sector_t suspend_hi;
+       int suspend_from; /* the slot which broadcast suspend_lo/hi */
+
         struct md_thread *recovery_thread;
         unsigned long recovery_map;
         /* communication loc resources */
@@ -105,6 +104,7 @@ enum msg_type {
         RE_ADD,
         BITMAP_NEEDS_SYNC,
         CHANGE_CAPACITY,
+       BITMAP_RESIZE,
  };
  
  struct cluster_msg {
@@ -270,25 +270,22 @@ static void add_resync_info(struct dlm_lock_resource *lockres,
         ri->hi = cpu_to_le64(hi);
  }
  
-static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
+static int read_resync_info(struct mddev *mddev,
+                           struct dlm_lock_resource *lockres)
  {
         struct resync_info ri;
-       struct suspend_info *s = NULL;
-       sector_t hi = 0;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       int ret = 0;
  
         dlm_lock_sync(lockres, DLM_LOCK_CR);
         memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
-       hi = le64_to_cpu(ri.hi);
-       if (hi > 0) {
-               s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
-               if (!s)
-                       goto out;
-               s->hi = hi;
-               s->lo = le64_to_cpu(ri.lo);
+       if (le64_to_cpu(ri.hi) > 0) {
+               cinfo->suspend_hi = le64_to_cpu(ri.hi);
+               cinfo->suspend_lo = le64_to_cpu(ri.lo);
+               ret = 1;
         }
         dlm_unlock_sync(lockres);
-out:
-       return s;
+       return ret;
  }
  
  static void recover_bitmaps(struct md_thread *thread)
@@ -298,7 +295,6 @@ static void recover_bitmaps(struct md_thread *thread)
         struct dlm_lock_resource *bm_lockres;
         char str[64];
         int slot, ret;
-       struct suspend_info *s, *tmp;
         sector_t lo, hi;
  
         while (cinfo->recovery_map) {
@@ -325,13 +321,17 @@ static void recover_bitmaps(struct md_thread *thread)
  
                 /* Clear suspend_area associated with the bitmap */
                 spin_lock_irq(&cinfo->suspend_lock);
-               list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
-                       if (slot == s->slot) {
-                               list_del(&s->list);
-                               kfree(s);
-                       }
+               cinfo->suspend_hi = 0;
+               cinfo->suspend_lo = 0;
+               cinfo->suspend_from = -1;
                 spin_unlock_irq(&cinfo->suspend_lock);
  
+               /* Kick off a reshape if needed */
+               if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
+                   test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+                   mddev->reshape_position != MaxSector)
+                       md_wakeup_thread(mddev->sync_thread);
+
                 if (hi > 0) {
                         if (lo < mddev->recovery_cp)
                                 mddev->recovery_cp = lo;
@@ -434,34 +434,23 @@ static void ack_bast(void *arg, int mode)
         }
  }
  
-static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
-{
-       struct suspend_info *s, *tmp;
-
-       list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
-               if (slot == s->slot) {
-                       list_del(&s->list);
-                       kfree(s);
-                       break;
-               }
-}
-
  static void remove_suspend_info(struct mddev *mddev, int slot)
  {
         struct md_cluster_info *cinfo = mddev->cluster_info;
         mddev->pers->quiesce(mddev, 1);
         spin_lock_irq(&cinfo->suspend_lock);
-       __remove_suspend_info(cinfo, slot);
+       cinfo->suspend_hi = 0;
+       cinfo->suspend_lo = 0;
         spin_unlock_irq(&cinfo->suspend_lock);
         mddev->pers->quiesce(mddev, 0);
  }
  
-
  static void process_suspend_info(struct mddev *mddev,
                 int slot, sector_t lo, sector_t hi)
  {
         struct md_cluster_info *cinfo = mddev->cluster_info;
-       struct suspend_info *s;
+       struct mdp_superblock_1 *sb = NULL;
+       struct md_rdev *rdev;
  
         if (!hi) {
                 /*
@@ -475,6 +464,12 @@ static void process_suspend_info(struct mddev *mddev,
                 return;
         }
  
+       rdev_for_each(rdev, mddev)
+               if (rdev->raid_disk > -1 && !test_bit(Faulty, &rdev->flags)) {
+                       sb = page_address(rdev->sb_page);
+                       break;
+               }
+
         /*
          * The bitmaps are not same for different nodes
          * if RESYNCING is happening in one node, then
@@ -487,26 +482,26 @@ static void process_suspend_info(struct mddev *mddev,
          * sync_low/hi is used to record the region which
          * arrived in the previous RESYNCING message,
          *
-        * Call bitmap_sync_with_cluster to clear
-        * NEEDED_MASK and set RESYNC_MASK since
-        * resync thread is running in another node,
-        * so we don't need to do the resync again
-        * with the same section */
-       md_bitmap_sync_with_cluster(mddev, cinfo->sync_low, cinfo->sync_hi, lo, hi);
+        * Call md_bitmap_sync_with_cluster to clear NEEDED_MASK
+        * and set RESYNC_MASK since  resync thread is running
+        * in another node, so we don't need to do the resync
+        * again with the same section.
+        *
+        * Skip md_bitmap_sync_with_cluster in case reshape
+        * happening, because reshaping region is small and
+        * we don't want to trigger lots of WARN.
+        */
+       if (sb && !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE))
+               md_bitmap_sync_with_cluster(mddev, cinfo->sync_low,
+                                           cinfo->sync_hi, lo, hi);
         cinfo->sync_low = lo;
         cinfo->sync_hi = hi;
  
-       s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
-       if (!s)
-               return;
-       s->slot = slot;
-       s->lo = lo;
-       s->hi = hi;
         mddev->pers->quiesce(mddev, 1);
         spin_lock_irq(&cinfo->suspend_lock);
-       /* Remove existing entry (if exists) before adding */
-       __remove_suspend_info(cinfo, slot);
-       list_add(&s->list, &cinfo->suspend_list);
+       cinfo->suspend_from = slot;
+       cinfo->suspend_lo = lo;
+       cinfo->suspend_hi = hi;
         spin_unlock_irq(&cinfo->suspend_lock);
         mddev->pers->quiesce(mddev, 0);
  }
@@ -612,6 +607,11 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
         case BITMAP_NEEDS_SYNC:
                 __recover_slot(mddev, le32_to_cpu(msg->slot));
                 break;
+       case BITMAP_RESIZE:
+               if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0))
+                       ret = md_bitmap_resize(mddev->bitmap,
+                                           le64_to_cpu(msg->high), 0, 0);
+               break;
         default:
                 ret = -1;
                 pr_warn("%s:%d Received unknown message from %d\n",
@@ -800,7 +800,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
         struct md_cluster_info *cinfo = mddev->cluster_info;
         int i, ret = 0;
         struct dlm_lock_resource *bm_lockres;
-       struct suspend_info *s;
         char str[64];
         sector_t lo, hi;
  
@@ -819,16 +818,13 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
                 bm_lockres->flags |= DLM_LKF_NOQUEUE;
                 ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
                 if (ret == -EAGAIN) {
-                       s = read_resync_info(mddev, bm_lockres);
-                       if (s) {
+                       if (read_resync_info(mddev, bm_lockres)) {
                                 pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
                                                 __func__, __LINE__,
-                                               (unsigned long long) s->lo,
-                                               (unsigned long long) s->hi, i);
-                               spin_lock_irq(&cinfo->suspend_lock);
-                               s->slot = i;
-                               list_add(&s->list, &cinfo->suspend_list);
-                               spin_unlock_irq(&cinfo->suspend_lock);
+                                       (unsigned long long) cinfo->suspend_lo,
+                                       (unsigned long long) cinfo->suspend_hi,
+                                       i);
+                               cinfo->suspend_from = i;
                         }
                         ret = 0;
                         lockres_free(bm_lockres);
@@ -1001,10 +997,17 @@ static int leave(struct mddev *mddev)
         if (!cinfo)
                 return 0;
  
-       /* BITMAP_NEEDS_SYNC message should be sent when node
+       /*
+        * BITMAP_NEEDS_SYNC message should be sent when node
          * is leaving the cluster with dirty bitmap, also we
-        * can only deliver it when dlm connection is available */
-       if (cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector)
+        * can only deliver it when dlm connection is available.
+        *
+        * Also, we should send BITMAP_NEEDS_SYNC message in
+        * case reshaping is interrupted.
+        */
+       if ((cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector) ||
+           (mddev->reshape_position != MaxSector &&
+            test_bit(MD_CLOSING, &mddev->flags)))
                 resync_bitmap(mddev);
  
         set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
@@ -1102,6 +1105,80 @@ static void metadata_update_cancel(struct mddev *mddev)
         unlock_comm(cinfo);
  }
  
+static int update_bitmap_size(struct mddev *mddev, sector_t size)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct cluster_msg cmsg = {0};
+       int ret;
+
+       cmsg.type = cpu_to_le32(BITMAP_RESIZE);
+       cmsg.high = cpu_to_le64(size);
+       ret = sendmsg(cinfo, &cmsg, 0);
+       if (ret)
+               pr_err("%s:%d: failed to send BITMAP_RESIZE message (%d)\n",
+                       __func__, __LINE__, ret);
+       return ret;
+}
+
+static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize)
+{
+       struct bitmap_counts *counts;
+       char str[64];
+       struct dlm_lock_resource *bm_lockres;
+       struct bitmap *bitmap = mddev->bitmap;
+       unsigned long my_pages = bitmap->counts.pages;
+       int i, rv;
+
+       /*
+        * We need to ensure all the nodes can grow to a larger
+        * bitmap size before make the reshaping.
+        */
+       rv = update_bitmap_size(mddev, newsize);
+       if (rv)
+               return rv;
+
+       for (i = 0; i < mddev->bitmap_info.nodes; i++) {
+               if (i == md_cluster_ops->slot_number(mddev))
+                       continue;
+
+               bitmap = get_bitmap_from_slot(mddev, i);
+               if (IS_ERR(bitmap)) {
+                       pr_err("can't get bitmap from slot %d\n", i);
+                       goto out;
+               }
+               counts = &bitmap->counts;
+
+               /*
+                * If we can hold the bitmap lock of one node then
+                * the slot is not occupied, update the pages.
+                */
+               snprintf(str, 64, "bitmap%04d", i);
+               bm_lockres = lockres_init(mddev, str, NULL, 1);
+               if (!bm_lockres) {
+                       pr_err("Cannot initialize %s lock\n", str);
+                       goto out;
+               }
+               bm_lockres->flags |= DLM_LKF_NOQUEUE;
+               rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
+               if (!rv)
+                       counts->pages = my_pages;
+               lockres_free(bm_lockres);
+
+               if (my_pages != counts->pages)
+                       /*
+                        * Let's revert the bitmap size if one node
+                        * can't resize bitmap
+                        */
+                       goto out;
+       }
+
+       return 0;
+out:
+       md_bitmap_free(bitmap);
+       update_bitmap_size(mddev, oldsize);
+       return -1;
+}
+
  /*
   * return 0 if all the bitmaps have the same sync_size
   */
@@ -1243,6 +1320,16 @@ static int resync_start(struct mddev *mddev)
         return dlm_lock_sync_interruptible(cinfo->resync_lockres, DLM_LOCK_EX, mddev);
  }
  
+static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       spin_lock_irq(&cinfo->suspend_lock);
+       *lo = cinfo->suspend_lo;
+       *hi = cinfo->suspend_hi;
+       spin_unlock_irq(&cinfo->suspend_lock);
+}
+
  static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
  {
         struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -1295,21 +1382,14 @@ static int area_resyncing(struct mddev *mddev, int direction,
  {
         struct md_cluster_info *cinfo = mddev->cluster_info;
         int ret = 0;
-       struct suspend_info *s;
  
         if ((direction == READ) &&
                 test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state))
                 return 1;
  
         spin_lock_irq(&cinfo->suspend_lock);
-       if (list_empty(&cinfo->suspend_list))
-               goto out;
-       list_for_each_entry(s, &cinfo->suspend_list, list)
-               if (hi > s->lo && lo < s->hi) {
-                       ret = 1;
-                       break;
-               }
-out:
+       if (hi > cinfo->suspend_lo && lo < cinfo->suspend_hi)
+               ret = 1;
         spin_unlock_irq(&cinfo->suspend_lock);
         return ret;
  }
@@ -1482,6 +1562,7 @@ static struct md_cluster_operations cluster_ops = {
         .resync_start = resync_start,
         .resync_finish = resync_finish,
         .resync_info_update = resync_info_update,
+       .resync_info_get = resync_info_get,
         .metadata_update_start = metadata_update_start,
         .metadata_update_finish = metadata_update_finish,
         .metadata_update_cancel = metadata_update_cancel,
@@ -1492,6 +1573,7 @@ static struct md_cluster_operations cluster_ops = {
         .remove_disk = remove_disk,
         .load_bitmaps = load_bitmaps,
         .gather_bitmaps = gather_bitmaps,
+       .resize_bitmaps = resize_bitmaps,
         .lock_all_bitmaps = lock_all_bitmaps,
         .unlock_all_bitmaps = unlock_all_bitmaps,
         .update_size = update_size,
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h

index c024070..a78e302 100644 (file)
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -14,6 +14,7 @@ struct md_cluster_operations {
         int (*leave)(struct mddev *mddev);
         int (*slot_number)(struct mddev *mddev);
         int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
+       void (*resync_info_get)(struct mddev *mddev, sector_t *lo, sector_t *hi);
         int (*metadata_update_start)(struct mddev *mddev);
         int (*metadata_update_finish)(struct mddev *mddev);
         void (*metadata_update_cancel)(struct mddev *mddev);
@@ -26,6 +27,7 @@ struct md_cluster_operations {
         int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
         void (*load_bitmaps)(struct mddev *mddev, int total_slots);
         int (*gather_bitmaps)(struct md_rdev *rdev);
+       int (*resize_bitmaps)(struct mddev *mddev, sector_t newsize, sector_t oldsize);
         int (*lock_all_bitmaps)(struct mddev *mddev);
         void (*unlock_all_bitmaps)(struct mddev *mddev);
         void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors);
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 63ceabb..fc488cb 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -452,10 +452,11 @@ static void md_end_flush(struct bio *fbio)
         rdev_dec_pending(rdev, mddev);
  
         if (atomic_dec_and_test(&fi->flush_pending)) {
-               if (bio->bi_iter.bi_size == 0)
+               if (bio->bi_iter.bi_size == 0) {
                         /* an empty barrier - all done */
                         bio_endio(bio);
-               else {
+                       mempool_free(fi, mddev->flush_pool);
+               } else {
                         INIT_WORK(&fi->flush_work, submit_flushes);
                         queue_work(md_wq, &fi->flush_work);
                 }
@@ -509,10 +510,11 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
         rcu_read_unlock();
  
         if (atomic_dec_and_test(&fi->flush_pending)) {
-               if (bio->bi_iter.bi_size == 0)
+               if (bio->bi_iter.bi_size == 0) {
                         /* an empty barrier - all done */
                         bio_endio(bio);
-               else {
+                       mempool_free(fi, mddev->flush_pool);
+               } else {
                         INIT_WORK(&fi->flush_work, submit_flushes);
                         queue_work(md_wq, &fi->flush_work);
                 }
@@ -5904,14 +5906,6 @@ static void __md_stop(struct mddev *mddev)
                 mddev->to_remove = &md_redundancy_group;
         module_put(pers->owner);
         clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-}
-
-void md_stop(struct mddev *mddev)
-{
-       /* stop the array and free an attached data structures.
-        * This is called from dm-raid
-        */
-       __md_stop(mddev);
         if (mddev->flush_bio_pool) {
                 mempool_destroy(mddev->flush_bio_pool);
                 mddev->flush_bio_pool = NULL;
@@ -5920,6 +5914,14 @@ void md_stop(struct mddev *mddev)
                 mempool_destroy(mddev->flush_pool);
                 mddev->flush_pool = NULL;
         }
+}
+
+void md_stop(struct mddev *mddev)
+{
+       /* stop the array and free an attached data structures.
+        * This is called from dm-raid
+        */
+       __md_stop(mddev);
         bioset_exit(&mddev->bio_set);
         bioset_exit(&mddev->sync_set);
  }
@@ -8370,9 +8372,17 @@ void md_do_sync(struct md_thread *thread)
                 else if (!mddev->bitmap)
                         j = mddev->recovery_cp;
  
-       } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+       } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
                 max_sectors = mddev->resync_max_sectors;
-       else {
+               /*
+                * If the original node aborts reshaping then we continue the
+                * reshaping, so set j again to avoid restart reshape from the
+                * first beginning
+                */
+               if (mddev_is_clustered(mddev) &&
+                   mddev->reshape_position != MaxSector)
+                       j = mddev->reshape_position;
+       } else {
                 /* recovery follows the physical size of devices */
                 max_sectors = mddev->dev_sectors;
                 j = MaxSector;
@@ -8623,8 +8633,10 @@ void md_do_sync(struct md_thread *thread)
                 mddev_lock_nointr(mddev);
                 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
                 mddev_unlock(mddev);
-               set_capacity(mddev->gendisk, mddev->array_sectors);
-               revalidate_disk(mddev->gendisk);
+               if (!mddev_is_clustered(mddev)) {
+                       set_capacity(mddev->gendisk, mddev->array_sectors);
+                       revalidate_disk(mddev->gendisk);
+               }
         }
  
         spin_lock(&mddev->lock);
@@ -8790,6 +8802,18 @@ static void md_start_sync(struct work_struct *ws)
   */
  void md_check_recovery(struct mddev *mddev)
  {
+       if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
+               /* Write superblock - thread that called mddev_suspend()
+                * holds reconfig_mutex for us.
+                */
+               set_bit(MD_UPDATING_SB, &mddev->flags);
+               smp_mb__after_atomic();
+               if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
+                       md_update_sb(mddev, 0);
+               clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
+               wake_up(&mddev->sb_wait);
+       }
+
         if (mddev->suspended)
                 return;
  
@@ -8949,16 +8973,6 @@ void md_check_recovery(struct mddev *mddev)
         unlock:
                 wake_up(&mddev->sb_wait);
                 mddev_unlock(mddev);
-       } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
-               /* Write superblock - thread that called mddev_suspend()
-                * holds reconfig_mutex for us.
-                */
-               set_bit(MD_UPDATING_SB, &mddev->flags);
-               smp_mb__after_atomic();
-               if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
-                       md_update_sb(mddev, 0);
-               clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
-               wake_up(&mddev->sb_wait);
         }
  }
  EXPORT_SYMBOL(md_check_recovery);
@@ -8966,6 +8980,8 @@ EXPORT_SYMBOL(md_check_recovery);
  void md_reap_sync_thread(struct mddev *mddev)
  {
         struct md_rdev *rdev;
+       sector_t old_dev_sectors = mddev->dev_sectors;
+       bool is_reshaped = false;
  
         /* resync has finished, collect result */
         md_unregister_thread(&mddev->sync_thread);
@@ -8980,8 +8996,11 @@ void md_reap_sync_thread(struct mddev *mddev)
                 }
         }
         if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-           mddev->pers->finish_reshape)
+           mddev->pers->finish_reshape) {
                 mddev->pers->finish_reshape(mddev);
+               if (mddev_is_clustered(mddev))
+                       is_reshaped = true;
+       }
  
         /* If array is no-longer degraded, then any saved_raid_disk
          * information must be scrapped.
@@ -9002,6 +9021,14 @@ void md_reap_sync_thread(struct mddev *mddev)
         clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
         clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
         clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+       /*
+        * We call md_cluster_ops->update_size here because sync_size could
+        * be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
+        * so it is time to update size across cluster.
+        */
+       if (mddev_is_clustered(mddev) && is_reshaped
+                                     && !test_bit(MD_CLOSING, &mddev->flags))
+               md_cluster_ops->update_size(mddev, old_dev_sectors);
         wake_up(&resync_wait);
         /* flag recovery needed just to double check */
         set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -9201,8 +9228,12 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                 }
  
                 if (role != rdev2->raid_disk) {
-                       /* got activated */
-                       if (rdev2->raid_disk == -1 && role != 0xffff) {
+                       /*
+                        * got activated except reshape is happening.
+                        */
+                       if (rdev2->raid_disk == -1 && role != 0xffff &&
+                           !(le32_to_cpu(sb->feature_map) &
+                             MD_FEATURE_RESHAPE_ACTIVE)) {
                                 rdev2->saved_raid_disk = role;
                                 ret = remove_and_add_spares(mddev, rdev2);
                                 pr_info("Activated spare: %s\n",
@@ -9228,6 +9259,30 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
         if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
                 update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
  
+       /*
+        * Since mddev->delta_disks has already updated in update_raid_disks,
+        * so it is time to check reshape.
+        */
+       if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
+           (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
+               /*
+                * reshape is happening in the remote node, we need to
+                * update reshape_position and call start_reshape.
+                */
+               mddev->reshape_position = sb->reshape_position;
+               if (mddev->pers->update_reshape_pos)
+                       mddev->pers->update_reshape_pos(mddev);
+               if (mddev->pers->start_reshape)
+                       mddev->pers->start_reshape(mddev);
+       } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
+                  mddev->reshape_position != MaxSector &&
+                  !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
+               /* reshape is just done in another node. */
+               mddev->reshape_position = MaxSector;
+               if (mddev->pers->update_reshape_pos)
+                       mddev->pers->update_reshape_pos(mddev);
+       }
+
         /* Finally set the event to be up to date */
         mddev->events = le64_to_cpu(sb->events);
  }
diff --git a/drivers/md/md.h b/drivers/md/md.h

index 8afd6bf..c52afb5 100644 (file)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -557,6 +557,7 @@ struct md_personality
         int (*check_reshape) (struct mddev *mddev);
         int (*start_reshape) (struct mddev *mddev);
         void (*finish_reshape) (struct mddev *mddev);
+       void (*update_reshape_pos) (struct mddev *mddev);
         /* quiesce suspends or resumes internal processing.
          * 1 - stop new actions and wait for action io to complete
          * 0 - return to normal behaviour
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index 4e99024..1d54109 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1734,6 +1734,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
          */
         if (rdev->saved_raid_disk >= 0 &&
             rdev->saved_raid_disk >= first &&
+           rdev->saved_raid_disk < conf->raid_disks &&
             conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
                 first = last = rdev->saved_raid_disk;
  
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index d6f7978..b98e746 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -25,6 +25,7 @@
  #include <linux/seq_file.h>
  #include <linux/ratelimit.h>
  #include <linux/kthread.h>
+#include <linux/raid/md_p.h>
  #include <trace/events/block.h>
  #include "md.h"
  #include "raid10.h"
@@ -1808,6 +1809,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                 first = last = rdev->raid_disk;
  
         if (rdev->saved_raid_disk >= first &&
+           rdev->saved_raid_disk < conf->geo.raid_disks &&
             conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
                 mirror = rdev->saved_raid_disk;
         else
@@ -3079,6 +3081,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                         sector_t sect;
                         int must_sync;
                         int any_working;
+                       int need_recover = 0;
+                       int need_replace = 0;
                         struct raid10_info *mirror = &conf->mirrors[i];
                         struct md_rdev *mrdev, *mreplace;
  
@@ -3086,11 +3090,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                         mrdev = rcu_dereference(mirror->rdev);
                         mreplace = rcu_dereference(mirror->replacement);
  
-                       if ((mrdev == NULL ||
-                            test_bit(Faulty, &mrdev->flags) ||
-                            test_bit(In_sync, &mrdev->flags)) &&
-                           (mreplace == NULL ||
-                            test_bit(Faulty, &mreplace->flags))) {
+                       if (mrdev != NULL &&
+                           !test_bit(Faulty, &mrdev->flags) &&
+                           !test_bit(In_sync, &mrdev->flags))
+                               need_recover = 1;
+                       if (mreplace != NULL &&
+                           !test_bit(Faulty, &mreplace->flags))
+                               need_replace = 1;
+
+                       if (!need_recover && !need_replace) {
                                 rcu_read_unlock();
                                 continue;
                         }
@@ -3213,7 +3221,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                                 r10_bio->devs[1].devnum = i;
                                 r10_bio->devs[1].addr = to_addr;
  
-                               if (!test_bit(In_sync, &mrdev->flags)) {
+                               if (need_recover) {
                                         bio = r10_bio->devs[1].bio;
                                         bio->bi_next = biolist;
                                         biolist = bio;
@@ -3230,16 +3238,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                                 bio = r10_bio->devs[1].repl_bio;
                                 if (bio)
                                         bio->bi_end_io = NULL;
-                               /* Note: if mreplace != NULL, then bio
+                               /* Note: if need_replace, then bio
                                  * cannot be NULL as r10buf_pool_alloc will
                                  * have allocated it.
-                                * So the second test here is pointless.
-                                * But it keeps semantic-checkers happy, and
-                                * this comment keeps human reviewers
-                                * happy.
                                  */
-                               if (mreplace == NULL || bio == NULL ||
-                                   test_bit(Faulty, &mreplace->flags))
+                               if (!need_replace)
                                         break;
                                 bio->bi_next = biolist;
                                 biolist = bio;
@@ -4286,12 +4289,46 @@ static int raid10_start_reshape(struct mddev *mddev)
         spin_unlock_irq(&conf->device_lock);
  
         if (mddev->delta_disks && mddev->bitmap) {
-               ret = md_bitmap_resize(mddev->bitmap,
-                                      raid10_size(mddev, 0, conf->geo.raid_disks),
-                                      0, 0);
+               struct mdp_superblock_1 *sb = NULL;
+               sector_t oldsize, newsize;
+
+               oldsize = raid10_size(mddev, 0, 0);
+               newsize = raid10_size(mddev, 0, conf->geo.raid_disks);
+
+               if (!mddev_is_clustered(mddev)) {
+                       ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
+                       if (ret)
+                               goto abort;
+                       else
+                               goto out;
+               }
+
+               rdev_for_each(rdev, mddev) {
+                       if (rdev->raid_disk > -1 &&
+                           !test_bit(Faulty, &rdev->flags))
+                               sb = page_address(rdev->sb_page);
+               }
+
+               /*
+                * some node is already performing reshape, and no need to
+                * call md_bitmap_resize again since it should be called when
+                * receiving BITMAP_RESIZE msg
+                */
+               if ((sb && (le32_to_cpu(sb->feature_map) &
+                           MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize))
+                       goto out;
+
+               ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
                 if (ret)
                         goto abort;
+
+               ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
+               if (ret) {
+                       md_bitmap_resize(mddev->bitmap, oldsize, 0, 0);
+                       goto abort;
+               }
         }
+out:
         if (mddev->delta_disks > 0) {
                 rdev_for_each(rdev, mddev)
                         if (rdev->raid_disk < 0 &&
@@ -4568,6 +4605,32 @@ read_more:
         r10_bio->master_bio = read_bio;
         r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
  
+       /*
+        * Broadcast RESYNC message to other nodes, so all nodes would not
+        * write to the region to avoid conflict.
+       */
+       if (mddev_is_clustered(mddev) && conf->cluster_sync_high <= sector_nr) {
+               struct mdp_superblock_1 *sb = NULL;
+               int sb_reshape_pos = 0;
+
+               conf->cluster_sync_low = sector_nr;
+               conf->cluster_sync_high = sector_nr + CLUSTER_RESYNC_WINDOW_SECTORS;
+               sb = page_address(rdev->sb_page);
+               if (sb) {
+                       sb_reshape_pos = le64_to_cpu(sb->reshape_position);
+                       /*
+                        * Set cluster_sync_low again if next address for array
+                        * reshape is less than cluster_sync_low. Since we can't
+                        * update cluster_sync_low until it has finished reshape.
+                        */
+                       if (sb_reshape_pos < conf->cluster_sync_low)
+                               conf->cluster_sync_low = sb_reshape_pos;
+               }
+
+               md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
+                                                         conf->cluster_sync_high);
+       }
+
         /* Now find the locations in the new layout */
         __raid10_find_phys(&conf->geo, r10_bio);
  
@@ -4719,6 +4782,19 @@ static void end_reshape(struct r10conf *conf)
         conf->fullsync = 0;
  }
  
+static void raid10_update_reshape_pos(struct mddev *mddev)
+{
+       struct r10conf *conf = mddev->private;
+       sector_t lo, hi;
+
+       md_cluster_ops->resync_info_get(mddev, &lo, &hi);
+       if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo))
+           || mddev->reshape_position == MaxSector)
+               conf->reshape_progress = mddev->reshape_position;
+       else
+               WARN_ON_ONCE(1);
+}
+
  static int handle_reshape_read_error(struct mddev *mddev,
                                      struct r10bio *r10_bio)
  {
@@ -4887,6 +4963,7 @@ static struct md_personality raid10_personality =
         .check_reshape  = raid10_check_reshape,
         .start_reshape  = raid10_start_reshape,
         .finish_reshape = raid10_finish_reshape,
+       .update_reshape_pos = raid10_update_reshape_pos,
         .congested      = raid10_congested,
  };
  
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c

index e6e925a..ec3a5ef 100644 (file)
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -3151,8 +3151,6 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
         set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
         return 0;
  
-       rcu_assign_pointer(conf->log, NULL);
-       md_unregister_thread(&log->reclaim_thread);
  reclaim_thread:
         mempool_exit(&log->meta_pool);
  out_mempool:
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index e4e98f4..4990f03 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2681,6 +2681,18 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
         pr_debug("raid456: error called\n");
  
         spin_lock_irqsave(&conf->device_lock, flags);
+
+       if (test_bit(In_sync, &rdev->flags) &&
+           mddev->degraded == conf->max_degraded) {
+               /*
+                * Don't allow to achieve failed state
+                * Don't try to recover this device
+                */
+               conf->recovery_disabled = mddev->recovery_disabled;
+               spin_unlock_irqrestore(&conf->device_lock, flags);
+               return;
+       }
+
         set_bit(Faulty, &rdev->flags);
         clear_bit(In_sync, &rdev->flags);
         mddev->degraded = raid5_calc_degraded(conf);
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c

index 2e30de9..57a6bb1 100644 (file)
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -280,7 +280,9 @@ int ocxl_config_check_afu_index(struct pci_dev *dev,
         u32 val;
         int rc, templ_major, templ_minor, len;
  
-       pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+       pci_write_config_byte(dev,
+                       fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+                       afu_idx);
         rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val);
         if (rc)
                 return rc;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c

index d96a84a..0cc911f 100644 (file)
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -119,7 +119,6 @@
  #include <linux/tcp.h>
  #include <linux/if_vlan.h>
  #include <linux/interrupt.h>
-#include <net/busy_poll.h>
  #include <linux/clk.h>
  #include <linux/if_ether.h>
  #include <linux/net_tstamp.h>
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c

index 5a727d4..686899d 100644 (file)
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -27,7 +27,6 @@
  #include <net/tcp.h>
  #include <net/ipv6.h>
  #include <net/ip6_checksum.h>
-#include <net/busy_poll.h>
  #include <linux/prefetch.h>
  #include "bnx2x_cmn.h"
  #include "bnx2x_init.h"
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c

index 35564a8..a6cbaca 100644 (file)
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -341,7 +341,7 @@ static struct device_node *bcmgenet_mii_of_find_mdio(struct bcmgenet_priv *priv)
         if (!compat)
                 return NULL;
  
-       priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
+       priv->mdio_dn = of_get_compatible_child(dn, compat);
         kfree(compat);
         if (!priv->mdio_dn) {
                 dev_err(kdev, "unable to find MDIO bus node\n");
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c

index 8f5bf91..1d86b4d 100644 (file)
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -1684,7 +1684,7 @@ static int macb_pad_and_fcs(struct sk_buff **skb, struct net_device *ndev)
                         padlen = 0;
                 /* No room for FCS, need to reallocate skb. */
                 else
-                       padlen = ETH_FCS_LEN - tailroom;
+                       padlen = ETH_FCS_LEN;
         } else {
                 /* Add room for FCS. */
                 padlen += ETH_FCS_LEN;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c

index f7e363b..dca6f23 100644 (file)
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -859,10 +859,12 @@ static int hclge_log_and_clear_ppp_error(struct hclge_dev *hdev, u32 cmd,
                 reset_level = HNAE3_FUNC_RESET;
         }
  
-       err_sts = (le32_to_cpu(desc[0].data[4]) >> 8) & 0x3;
-       if (err_sts) {
-               hclge_log_error(dev, hw_err_lst3, err_sts);
-               reset_level = HNAE3_FUNC_RESET;
+       if (cmd == HCLGE_PPP_CMD0_INT_CMD) {
+               err_sts = (le32_to_cpu(desc[0].data[4]) >> 8) & 0x3;
+               if (err_sts) {
+                       hclge_log_error(dev, hw_err_lst3, err_sts);
+                       reset_level = HNAE3_FUNC_RESET;
+               }
         }
  
         /* clear PPP INT */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c

index 740ea58..aef3c89 100644 (file)
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2,7 +2,6 @@
  /* Copyright(c) 2013 - 2018 Intel Corporation. */
  
  #include <linux/prefetch.h>
-#include <net/busy_poll.h>
  #include <linux/bpf_trace.h>
  #include <net/xdp.h>
  #include "i40e.h"
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c

index edc349f..fb9bfad 100644 (file)
--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -2,7 +2,6 @@
  /* Copyright(c) 2013 - 2018 Intel Corporation. */
  
  #include <linux/prefetch.h>
-#include <net/busy_poll.h>
  
  #include "iavf.h"
  #include "iavf_trace.h"
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c

index c52f450..8cd6a24 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -42,6 +42,23 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw)
         return 0;
  }
  
+/**
+ * ice_dev_onetime_setup - Temporary HW/FW workarounds
+ * @hw: pointer to the HW structure
+ *
+ * This function provides temporary workarounds for certain issues
+ * that are expected to be fixed in the HW/FW.
+ */
+void ice_dev_onetime_setup(struct ice_hw *hw)
+{
+       /* configure Rx - set non pxe mode */
+       wr32(hw, GLLAN_RCTL_0, 0x1);
+
+#define MBX_PF_VT_PFALLOC      0x00231E80
+       /* set VFs per PF */
+       wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF));
+}
+
  /**
   * ice_clear_pf_cfg - Clear PF configuration
   * @hw: pointer to the hardware structure
@@ -218,7 +235,7 @@ static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
   *
   * Get Link Status (0x607). Returns the link status of the adapter.
   */
-enum ice_status
+static enum ice_status
  ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
                      struct ice_link_status *link, struct ice_sq_cd *cd)
  {
@@ -740,6 +757,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
         if (status)
                 goto err_unroll_sched;
  
+       ice_dev_onetime_setup(hw);
+
         /* Get MAC information */
         /* A single port can report up to two (LAN and WoL) addresses */
         mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2,
@@ -1531,9 +1550,7 @@ ice_aq_discover_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
         if (!status)
                 ice_parse_caps(hw, buf, le32_to_cpu(cmd->count), opc);
         else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOMEM)
-               *cap_count =
-                       DIV_ROUND_UP(le16_to_cpu(desc.datalen),
-                                    sizeof(struct ice_aqc_list_caps_elem));
+               *cap_count = le32_to_cpu(cmd->count);
         return status;
  }
  
@@ -1987,33 +2004,6 @@ ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
         return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
  }
  
-/**
- * ice_aq_set_event_mask
- * @hw: pointer to the hw struct
- * @port_num: port number of the physical function
- * @mask: event mask to be set
- * @cd: pointer to command details structure or NULL
- *
- * Set event mask (0x0613)
- */
-enum ice_status
-ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
-                     struct ice_sq_cd *cd)
-{
-       struct ice_aqc_set_event_mask *cmd;
-       struct ice_aq_desc desc;
-
-       cmd = &desc.params.set_event_mask;
-
-       ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask);
-
-       cmd->lport_num = port_num;
-
-       cmd->event_mask = cpu_to_le16(mask);
-
-       return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
-}
-
  /**
   * __ice_aq_get_set_rss_lut
   * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h

index 1900681..cf760c2 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -34,6 +34,9 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
                 struct ice_sq_cd *cd);
  void ice_clear_pxe_mode(struct ice_hw *hw);
  enum ice_status ice_get_caps(struct ice_hw *hw);
+
+void ice_dev_onetime_setup(struct ice_hw *hw);
+
  enum ice_status
  ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
                   u32 rxq_index);
@@ -83,12 +86,6 @@ enum ice_status
  ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
                            struct ice_sq_cd *cd);
  enum ice_status
-ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
-                    struct ice_link_status *link, struct ice_sq_cd *cd);
-enum ice_status
-ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
-                     struct ice_sq_cd *cd);
-enum ice_status
  ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
                 u32 *q_teids, enum ice_disq_rst_src rst_src, u16 vmvf_num,
                 struct ice_sq_cd *cmd_details);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h

index 437f832..0038a41 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -19,11 +19,10 @@
  
  /* Defines that help manage the driver vs FW API checks.
   * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
- *
   */
  #define EXP_FW_API_VER_BRANCH          0x00
-#define EXP_FW_API_VER_MAJOR           0x00
-#define EXP_FW_API_VER_MINOR           0x01
+#define EXP_FW_API_VER_MAJOR           0x01
+#define EXP_FW_API_VER_MINOR           0x03
  
  /* Different control queue types: These are mainly for SW consumption. */
  enum ice_ctl_q {
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h

index a6f0a5c..f8d5c66 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_devids.h
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -6,10 +6,10 @@
  
  /* Device IDs */
  /* Intel(R) Ethernet Controller E810-C for backplane */
-#define ICE_DEV_ID_C810_BACKPLANE      0x1591
+#define ICE_DEV_ID_E810C_BACKPLANE     0x1591
  /* Intel(R) Ethernet Controller E810-C for QSFP */
-#define ICE_DEV_ID_C810_QSFP           0x1592
+#define ICE_DEV_ID_E810C_QSFP          0x1592
  /* Intel(R) Ethernet Controller E810-C for SFP */
-#define ICE_DEV_ID_C810_SFP            0x1593
+#define ICE_DEV_ID_E810C_SFP           0x1593
  
  #endif /* _ICE_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h

index a6679a9..5fdea6e 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -157,6 +157,13 @@
  #define VPINT_ALLOC_LAST_S                     12
  #define VPINT_ALLOC_LAST_M                     ICE_M(0x7FF, 12)
  #define VPINT_ALLOC_VALID_M                    BIT(31)
+#define VPINT_ALLOC_PCI(_VF)                   (0x0009D000 + ((_VF) * 4))
+#define VPINT_ALLOC_PCI_FIRST_S                        0
+#define VPINT_ALLOC_PCI_FIRST_M                        ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_PCI_LAST_S                 12
+#define VPINT_ALLOC_PCI_LAST_M                 ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_PCI_VALID_M                        BIT(31)
+#define GLLAN_RCTL_0                           0x002941F8
  #define QRX_CONTEXT(_i, _QRX)                  (0x00280000 + ((_i) * 8192 + (_QRX) * 4))
  #define QRX_CTRL(_QRX)                         (0x00120000 + ((_QRX) * 4))
  #define QRX_CTRL_MAX_INDEX                     2047
@@ -320,6 +327,7 @@
  #define GLV_UPRCL(_i)                          (0x003B2000 + ((_i) * 8))
  #define GLV_UPTCH(_i)                          (0x0030A004 + ((_i) * 8))
  #define GLV_UPTCL(_i)                          (0x0030A000 + ((_i) * 8))
+#define PF_VT_PFALLOC_HIF                      0x0009DD80
  #define VSIQF_HKEY_MAX_INDEX                   12
  #define VSIQF_HLUT_MAX_INDEX                   15
  #define VFINT_DYN_CTLN(_i)                     (0x00003800 + ((_i) * 4))
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c

index 49f1940..5bacad0 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -433,7 +433,7 @@ int ice_vsi_clear(struct ice_vsi *vsi)
   * @irq: interrupt number
   * @data: pointer to a q_vector
   */
-irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
+static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
  {
         struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
  
@@ -2529,6 +2529,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
         vsi->hw_base_vector = 0;
         ice_vsi_clear_rings(vsi);
         ice_vsi_free_arrays(vsi, false);
+       ice_dev_onetime_setup(&vsi->back->hw);
         ice_vsi_set_num_qs(vsi);
  
         /* Initialize VSI struct elements and create VSI in FW */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h

index 677db40..3831b4f 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -73,5 +73,4 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc);
  
  int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
  
-irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data);
  #endif /* !_ICE_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c

index 8f61b37..0599345 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -456,35 +456,6 @@ static void ice_reset_subtask(struct ice_pf *pf)
         }
  }
  
-/**
- * ice_watchdog_subtask - periodic tasks not using event driven scheduling
- * @pf: board private structure
- */
-static void ice_watchdog_subtask(struct ice_pf *pf)
-{
-       int i;
-
-       /* if interface is down do nothing */
-       if (test_bit(__ICE_DOWN, pf->state) ||
-           test_bit(__ICE_CFG_BUSY, pf->state))
-               return;
-
-       /* make sure we don't do these things too often */
-       if (time_before(jiffies,
-                       pf->serv_tmr_prev + pf->serv_tmr_period))
-               return;
-
-       pf->serv_tmr_prev = jiffies;
-
-       /* Update the stats for active netdevs so the network stack
-        * can look at updated numbers whenever it cares to
-        */
-       ice_update_pf_stats(pf);
-       for (i = 0; i < pf->num_alloc_vsi; i++)
-               if (pf->vsi[i] && pf->vsi[i]->netdev)
-                       ice_update_vsi_stats(pf->vsi[i]);
-}
-
  /**
   * ice_print_link_msg - print link up or down message
   * @vsi: the VSI whose link status is being queried
@@ -554,36 +525,6 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
                     speed, fc);
  }
  
-/**
- * ice_init_link_events - enable/initialize link events
- * @pi: pointer to the port_info instance
- *
- * Returns -EIO on failure, 0 on success
- */
-static int ice_init_link_events(struct ice_port_info *pi)
-{
-       u16 mask;
-
-       mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
-                      ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL));
-
-       if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
-               dev_dbg(ice_hw_to_dev(pi->hw),
-                       "Failed to set link event mask for port %d\n",
-                       pi->lport);
-               return -EIO;
-       }
-
-       if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
-               dev_dbg(ice_hw_to_dev(pi->hw),
-                       "Failed to enable link events for port %d\n",
-                       pi->lport);
-               return -EIO;
-       }
-
-       return 0;
-}
-
  /**
   * ice_vsi_link_event - update the vsi's netdev
   * @vsi: the vsi on which the link event occurred
@@ -671,27 +612,35 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi)
  }
  
  /**
- * ice_handle_link_event - handle link event via ARQ
- * @pf: pf that the link event is associated with
- *
- * Return -EINVAL if port_info is null
- * Return status on succes
+ * ice_watchdog_subtask - periodic tasks not using event driven scheduling
+ * @pf: board private structure
   */
-static int ice_handle_link_event(struct ice_pf *pf)
+static void ice_watchdog_subtask(struct ice_pf *pf)
  {
-       struct ice_port_info *port_info;
-       int status;
+       int i;
  
-       port_info = pf->hw.port_info;
-       if (!port_info)
-               return -EINVAL;
+       /* if interface is down do nothing */
+       if (test_bit(__ICE_DOWN, pf->state) ||
+           test_bit(__ICE_CFG_BUSY, pf->state))
+               return;
  
-       status = ice_link_event(pf, port_info);
-       if (status)
-               dev_dbg(&pf->pdev->dev,
-                       "Could not process link event, error %d\n", status);
+       /* make sure we don't do these things too often */
+       if (time_before(jiffies,
+                       pf->serv_tmr_prev + pf->serv_tmr_period))
+               return;
  
-       return status;
+       pf->serv_tmr_prev = jiffies;
+
+       if (ice_link_event(pf, pf->hw.port_info))
+               dev_dbg(&pf->pdev->dev, "ice_link_event failed\n");
+
+       /* Update the stats for active netdevs so the network stack
+        * can look at updated numbers whenever it cares to
+        */
+       ice_update_pf_stats(pf);
+       for (i = 0; i < pf->num_alloc_vsi; i++)
+               if (pf->vsi[i] && pf->vsi[i]->netdev)
+                       ice_update_vsi_stats(pf->vsi[i]);
  }
  
  /**
@@ -797,11 +746,6 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
                 opcode = le16_to_cpu(event.desc.opcode);
  
                 switch (opcode) {
-               case ice_aqc_opc_get_link_status:
-                       if (ice_handle_link_event(pf))
-                               dev_err(&pf->pdev->dev,
-                                       "Could not handle link event\n");
-                       break;
                 case ice_mbx_opc_send_msg_to_pf:
                         ice_vc_process_vf_msg(pf, &event);
                         break;
@@ -2207,12 +2151,6 @@ static int ice_probe(struct pci_dev *pdev,
         /* since everything is good, start the service timer */
         mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
  
-       err = ice_init_link_events(pf->hw.port_info);
-       if (err) {
-               dev_err(&pdev->dev, "ice_init_link_events failed: %d\n", err);
-               goto err_alloc_sw_unroll;
-       }
-
         return 0;
  
  err_alloc_sw_unroll:
@@ -2271,9 +2209,9 @@ static void ice_remove(struct pci_dev *pdev)
   *   Class, Class Mask, private data (not used) }
   */
  static const struct pci_device_id ice_pci_tbl[] = {
-       { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_BACKPLANE), 0 },
-       { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_QSFP), 0 },
-       { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SFP), 0 },
+       { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
+       { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
+       { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
         /* required last entry */
         { 0, }
  };
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c

index c25e486..45f10f8 100644 (file)
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -173,6 +173,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
         vsi = pf->vsi[vf->lan_vsi_idx];
  
         wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
+       wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
  
         first = vf->first_vector_idx;
         last = first + pf->num_vf_msix - 1;
@@ -519,6 +520,10 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
                VPINT_ALLOC_VALID_M);
         wr32(hw, VPINT_ALLOC(vf->vf_id), reg);
  
+       reg = (((first << VPINT_ALLOC_PCI_FIRST_S) & VPINT_ALLOC_PCI_FIRST_M) |
+              ((last << VPINT_ALLOC_PCI_LAST_S) & VPINT_ALLOC_PCI_LAST_M) |
+              VPINT_ALLOC_PCI_VALID_M);
+       wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg);
         /* map the interrupts to its functions */
         for (v = first; v <= last; v++) {
                 reg = (((abs_vf_id << GLINT_VECT2FUNC_VF_NUM_S) &
@@ -528,10 +533,11 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
                 wr32(hw, GLINT_VECT2FUNC(v), reg);
         }
  
+       /* set regardless of mapping mode */
+       wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M);
+
         /* VF Tx queues allocation */
         if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) {
-               wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id),
-                    VPLAN_TXQ_MAPENA_TX_ENA_M);
                 /* set the VF PF Tx queue range
                  * VFNUMQ value should be set to (number of queues - 1). A value
                  * of 0 means 1 queue and a value of 255 means 256 queues
@@ -546,10 +552,11 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
                         "Scattered mode for VF Tx queues is not yet implemented\n");
         }
  
+       /* set regardless of mapping mode */
+       wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M);
+
         /* VF Rx queues allocation */
         if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) {
-               wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id),
-                    VPLAN_RXQ_MAPENA_RX_ENA_M);
                 /* set the VF PF Rx queue range
                  * VFNUMQ value should be set to (number of queues - 1). A value
                  * of 0 means 1 queue and a value of 255 means 256 queues
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h

index 7a7679e..ec1b87c 100644 (file)
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -30,7 +30,6 @@
  #include "ixgbe_ipsec.h"
  
  #include <net/xdp.h>
-#include <net/busy_poll.h>
  
  /* common prefix used by pr_<> macros */
  #undef pr_fmt
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c

index 8890c95..a5ab7ef 100644 (file)
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -573,7 +573,7 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
                                        sizeof(struct nix_cq_ctx_s));
                         else if (req->ctype == NIX_AQ_CTYPE_RSS)
                                 memcpy(&rsp->rss, ctx,
-                                      sizeof(struct nix_cq_ctx_s));
+                                      sizeof(struct nix_rsse_s));
                         else if (req->ctype == NIX_AQ_CTYPE_MCE)
                                 memcpy(&rsp->mce, ctx,
                                        sizeof(struct nix_rx_mce_s));
@@ -1294,7 +1294,7 @@ static int nix_update_mce_list(struct nix_mce_list *mce_list,
                 return 0;
  
         /* Add a new one to the list, at the tail */
-       mce = kzalloc(sizeof(*mce), GFP_KERNEL);
+       mce = kzalloc(sizeof(*mce), GFP_ATOMIC);
         if (!mce)
                 return -ENOMEM;
         mce->idx = idx;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c

index fe49384..b744cd4 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -39,7 +39,6 @@
  #include <linux/slab.h>
  #include <linux/hash.h>
  #include <net/ip.h>
-#include <net/busy_poll.h>
  #include <net/vxlan.h>
  #include <net/devlink.h>
  
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c

index a1aeeb8..5a6d091 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -31,7 +31,6 @@
   *
   */
  
-#include <net/busy_poll.h>
  #include <linux/bpf.h>
  #include <linux/bpf_trace.h>
  #include <linux/mlx4/cq.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

index 2f7fb8d..94224c2 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -34,7 +34,6 @@
  #include <linux/ip.h>
  #include <linux/ipv6.h>
  #include <linux/tcp.h>
-#include <net/busy_poll.h>
  #include <net/ip6_checksum.h>
  #include <net/page_pool.h>
  #include <net/inet_ecn.h>
diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig

index 16bd3f4..cf1d491 100644 (file)
--- a/drivers/net/ethernet/microchip/Kconfig
+++ b/drivers/net/ethernet/microchip/Kconfig
@@ -5,7 +5,6 @@
  config NET_VENDOR_MICROCHIP
         bool "Microchip devices"
         default y
-       depends on SPI
         ---help---
           If you have a network (Ethernet) card belonging to this class, say Y.
  
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c

index b2d2ec8..5f384f7 100644 (file)
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -70,7 +70,6 @@
  #include <net/tcp.h>
  #include <asm/byteorder.h>
  #include <asm/processor.h>
-#include <net/busy_poll.h>
  
  #include "myri10ge_mcp.h"
  #include "myri10ge_mcp_gen_header.h"
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c

index 006b0aa..1fd0168 100644 (file)
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4161,10 +4161,15 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
  
  static bool rtl_wol_pll_power_down(struct rtl8169_private *tp)
  {
-       if (!netif_running(tp->dev) || !__rtl8169_get_wol(tp))
+       struct phy_device *phydev;
+
+       if (!__rtl8169_get_wol(tp))
                 return false;
  
-       phy_speed_down(tp->dev->phydev, false);
+       /* phydev may not be attached to netdevice */
+       phydev = mdiobus_get_phy(tp->mii_bus, 0);
+
+       phy_speed_down(phydev, false);
         rtl_wol_suspend_quirk(tp);
  
         return true;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c

index f9a61f9..0f660af 100644 (file)
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -714,8 +714,9 @@ static int get_ephy_nodes(struct stmmac_priv *priv)
                 return -ENODEV;
         }
  
-       mdio_internal = of_find_compatible_node(mdio_mux, NULL,
+       mdio_internal = of_get_compatible_child(mdio_mux,
                                                 "allwinner,sun8i-h3-mdio-internal");
+       of_node_put(mdio_mux);
         if (!mdio_internal) {
                 dev_err(priv->device, "Cannot get internal_mdio node\n");
                 return -ENODEV;
@@ -729,13 +730,20 @@ static int get_ephy_nodes(struct stmmac_priv *priv)
                 gmac->rst_ephy = of_reset_control_get_exclusive(iphynode, NULL);
                 if (IS_ERR(gmac->rst_ephy)) {
                         ret = PTR_ERR(gmac->rst_ephy);
-                       if (ret == -EPROBE_DEFER)
+                       if (ret == -EPROBE_DEFER) {
+                               of_node_put(iphynode);
+                               of_node_put(mdio_internal);
                                 return ret;
+                       }
                         continue;
                 }
                 dev_info(priv->device, "Found internal PHY node\n");
+               of_node_put(iphynode);
+               of_node_put(mdio_internal);
                 return 0;
         }
+
+       of_node_put(mdio_internal);
         return -ENODEV;
  }
  
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c

index e122554..d7636ff 100644 (file)
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -329,7 +329,7 @@ struct phy_driver genphy_10g_driver = {
         .name           = "Generic 10G PHY",
         .soft_reset     = gen10g_no_soft_reset,
         .config_init    = gen10g_config_init,
-       .features       = 0,
+       .features       = PHY_10GBIT_FEATURES,
         .config_aneg    = gen10g_config_aneg,
         .read_status    = gen10g_read_status,
         .suspend        = gen10g_suspend,
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c

index 91162f8..9a22056 100644 (file)
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -73,10 +73,9 @@ static int nfcmrvl_uart_parse_dt(struct device_node *node,
         struct device_node *matched_node;
         int ret;
  
-       matched_node = of_find_compatible_node(node, NULL, "marvell,nfc-uart");
+       matched_node = of_get_compatible_child(node, "marvell,nfc-uart");
         if (!matched_node) {
-               matched_node = of_find_compatible_node(node, NULL,
-                                                      "mrvl,nfc-uart");
+               matched_node = of_get_compatible_child(node, "mrvl,nfc-uart");
                 if (!matched_node)
                         return -ENODEV;
         }
diff --git a/drivers/of/base.c b/drivers/of/base.c

index 74eaedd..13ebb16 100644 (file)
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -67,6 +67,7 @@ bool of_node_name_eq(const struct device_node *np, const char *name)
  
         return (strlen(name) == len) && (strncmp(node_name, name, len) == 0);
  }
+EXPORT_SYMBOL(of_node_name_eq);
  
  bool of_node_name_prefix(const struct device_node *np, const char *prefix)
  {
@@ -75,6 +76,7 @@ bool of_node_name_prefix(const struct device_node *np, const char *prefix)
  
         return strncmp(kbasename(np->full_name), prefix, strlen(prefix)) == 0;
  }
+EXPORT_SYMBOL(of_node_name_prefix);
  
  int of_n_addr_cells(struct device_node *np)
  {
@@ -330,6 +332,8 @@ static bool __of_find_n_match_cpu_property(struct device_node *cpun,
  
         ac = of_n_addr_cells(cpun);
         cell = of_get_property(cpun, prop_name, &prop_len);
+       if (!cell && !ac && arch_match_cpu_phys_id(cpu, 0))
+               return true;
         if (!cell || !ac)
                 return false;
         prop_len /= sizeof(*cell) * ac;
@@ -390,7 +394,7 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
  {
         struct device_node *cpun;
  
-       for_each_node_by_type(cpun, "cpu") {
+       for_each_of_cpu_node(cpun) {
                 if (arch_find_n_match_cpu_physical_id(cpun, cpu, thread))
                         return cpun;
         }
@@ -744,6 +748,45 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
  }
  EXPORT_SYMBOL(of_get_next_available_child);
  
+/**
+ *     of_get_next_cpu_node - Iterate on cpu nodes
+ *     @prev:  previous child of the /cpus node, or NULL to get first
+ *
+ *     Returns a cpu node pointer with refcount incremented, use of_node_put()
+ *     on it when done. Returns NULL when prev is the last child. Decrements
+ *     the refcount of prev.
+ */
+struct device_node *of_get_next_cpu_node(struct device_node *prev)
+{
+       struct device_node *next = NULL;
+       unsigned long flags;
+       struct device_node *node;
+
+       if (!prev)
+               node = of_find_node_by_path("/cpus");
+
+       raw_spin_lock_irqsave(&devtree_lock, flags);
+       if (prev)
+               next = prev->sibling;
+       else if (node) {
+               next = node->child;
+               of_node_put(node);
+       }
+       for (; next; next = next->sibling) {
+               if (!(of_node_name_eq(next, "cpu") ||
+                     (next->type && !of_node_cmp(next->type, "cpu"))))
+                       continue;
+               if (!__of_device_is_available(next))
+                       continue;
+               if (of_node_get(next))
+                       break;
+       }
+       of_node_put(prev);
+       raw_spin_unlock_irqrestore(&devtree_lock, flags);
+       return next;
+}
+EXPORT_SYMBOL(of_get_next_cpu_node);
+
  /**
   * of_get_compatible_child - Find compatible child node
   * @parent:    parent node
@@ -2013,7 +2056,7 @@ struct device_node *of_find_next_cache_node(const struct device_node *np)
         /* OF on pmac has nodes instead of properties named "l2-cache"
          * beneath CPU nodes.
          */
-       if (!strcmp(np->type, "cpu"))
+       if (IS_ENABLED(CONFIG_PPC_PMAC) && !strcmp(np->type, "cpu"))
                 for_each_child_of_node(np, child)
                         if (!strcmp(child->type, "cache"))
                                 return child;
@@ -2045,3 +2088,105 @@ int of_find_last_cache_level(unsigned int cpu)
  
         return cache_level;
  }
+
+/**
+ * of_map_rid - Translate a requester ID through a downstream mapping.
+ * @np: root complex device node.
+ * @rid: device requester ID to map.
+ * @map_name: property name of the map to use.
+ * @map_mask_name: optional property name of the mask to use.
+ * @target: optional pointer to a target device node.
+ * @id_out: optional pointer to receive the translated ID.
+ *
+ * Given a device requester ID, look up the appropriate implementation-defined
+ * platform ID and/or the target device which receives transactions on that
+ * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
+ * @id_out may be NULL if only the other is required. If @target points to
+ * a non-NULL device node pointer, only entries targeting that node will be
+ * matched; if it points to a NULL value, it will receive the device node of
+ * the first matching target phandle, with a reference held.
+ *
+ * Return: 0 on success or a standard error code on failure.
+ */
+int of_map_rid(struct device_node *np, u32 rid,
+              const char *map_name, const char *map_mask_name,
+              struct device_node **target, u32 *id_out)
+{
+       u32 map_mask, masked_rid;
+       int map_len;
+       const __be32 *map = NULL;
+
+       if (!np || !map_name || (!target && !id_out))
+               return -EINVAL;
+
+       map = of_get_property(np, map_name, &map_len);
+       if (!map) {
+               if (target)
+                       return -ENODEV;
+               /* Otherwise, no map implies no translation */
+               *id_out = rid;
+               return 0;
+       }
+
+       if (!map_len || map_len % (4 * sizeof(*map))) {
+               pr_err("%pOF: Error: Bad %s length: %d\n", np,
+                       map_name, map_len);
+               return -EINVAL;
+       }
+
+       /* The default is to select all bits. */
+       map_mask = 0xffffffff;
+
+       /*
+        * Can be overridden by "{iommu,msi}-map-mask" property.
+        * If of_property_read_u32() fails, the default is used.
+        */
+       if (map_mask_name)
+               of_property_read_u32(np, map_mask_name, &map_mask);
+
+       masked_rid = map_mask & rid;
+       for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
+               struct device_node *phandle_node;
+               u32 rid_base = be32_to_cpup(map + 0);
+               u32 phandle = be32_to_cpup(map + 1);
+               u32 out_base = be32_to_cpup(map + 2);
+               u32 rid_len = be32_to_cpup(map + 3);
+
+               if (rid_base & ~map_mask) {
+                       pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
+                               np, map_name, map_name,
+                               map_mask, rid_base);
+                       return -EFAULT;
+               }
+
+               if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
+                       continue;
+
+               phandle_node = of_find_node_by_phandle(phandle);
+               if (!phandle_node)
+                       return -ENODEV;
+
+               if (target) {
+                       if (*target)
+                               of_node_put(phandle_node);
+                       else
+                               *target = phandle_node;
+
+                       if (*target != phandle_node)
+                               continue;
+               }
+
+               if (id_out)
+                       *id_out = masked_rid - rid_base + out_base;
+
+               pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
+                       np, map_name, map_mask, rid_base, out_base,
+                       rid_len, rid, masked_rid - rid_base + out_base);
+               return 0;
+       }
+
+       pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n",
+               np, map_name, rid, target && *target ? *target : NULL);
+       return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(of_map_rid);
diff --git a/drivers/of/device.c b/drivers/of/device.c

index c7fa5a9..0f27fad 100644 (file)
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -207,7 +207,8 @@ static ssize_t of_device_get_modalias(struct device *dev, char *str, ssize_t len
                 return -ENODEV;
  
         /* Name & Type */
-       csize = snprintf(str, len, "of:N%sT%s", dev->of_node->name,
+       /* %p eats all alphanum characters, so %c must be used here */
+       csize = snprintf(str, len, "of:N%pOFn%c%s", dev->of_node, 'T',
                          dev->of_node->type);
         tsize = csize;
         len -= csize;
@@ -286,7 +287,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
         if ((!dev) || (!dev->of_node))
                 return;
  
-       add_uevent_var(env, "OF_NAME=%s", dev->of_node->name);
+       add_uevent_var(env, "OF_NAME=%pOFn", dev->of_node);
         add_uevent_var(env, "OF_FULLNAME=%pOF", dev->of_node);
         if (dev->of_node->type && strcmp("<NULL>", dev->of_node->type) != 0)
                 add_uevent_var(env, "OF_TYPE=%s", dev->of_node->type);
diff --git a/drivers/of/irq.c b/drivers/of/irq.c

index 02ad93a..e1f6f39 100644 (file)
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -22,7 +22,6 @@
  #include <linux/module.h>
  #include <linux/of.h>
  #include <linux/of_irq.h>
-#include <linux/of_pci.h>
  #include <linux/string.h>
  #include <linux/slab.h>
  
@@ -588,8 +587,8 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np,
          * "msi-map" property.
          */
         for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent)
-               if (!of_pci_map_rid(parent_dev->of_node, rid_in, "msi-map",
-                                   "msi-map-mask", np, &rid_out))
+               if (!of_map_rid(parent_dev->of_node, rid_in, "msi-map",
+                               "msi-map-mask", np, &rid_out))
                         break;
         return rid_out;
  }
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c

index e92391d..5ad1342 100644 (file)
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -97,8 +97,8 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
                 return rc;
         }
  
-       dev_dbg(&mdio->dev, "registered phy %s at address %i\n",
-               child->name, addr);
+       dev_dbg(&mdio->dev, "registered phy %pOFn at address %i\n",
+               child, addr);
         return 0;
  }
  
@@ -127,8 +127,8 @@ static int of_mdiobus_register_device(struct mii_bus *mdio,
                 return rc;
         }
  
-       dev_dbg(&mdio->dev, "registered mdio device %s at address %i\n",
-               child->name, addr);
+       dev_dbg(&mdio->dev, "registered mdio device %pOFn at address %i\n",
+               child, addr);
         return 0;
  }
  
@@ -263,8 +263,8 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
                                 continue;
  
                         /* be noisy to encourage people to set reg property */
-                       dev_info(&mdio->dev, "scan phy %s at address %i\n",
-                                child->name, addr);
+                       dev_info(&mdio->dev, "scan phy %pOFn at address %i\n",
+                                child, addr);
  
                         if (of_mdiobus_child_is_phy(child)) {
                                 rc = of_mdiobus_register_phy(mdio, child, addr);
diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c

index 27d9b4b..35c64a4 100644 (file)
--- a/drivers/of/of_numa.c
+++ b/drivers/of/of_numa.c
@@ -24,18 +24,9 @@ static void __init of_numa_parse_cpu_nodes(void)
  {
         u32 nid;
         int r;
-       struct device_node *cpus;
-       struct device_node *np = NULL;
-
-       cpus = of_find_node_by_path("/cpus");
-       if (!cpus)
-               return;
-
-       for_each_child_of_node(cpus, np) {
-               /* Skip things that are not CPUs */
-               if (of_node_cmp(np->type, "cpu") != 0)
-                       continue;
+       struct device_node *np;
  
+       for_each_of_cpu_node(np) {
                 r = of_property_read_u32(np, "numa-node-id", &nid);
                 if (r)
                         continue;
@@ -46,8 +37,6 @@ static void __init of_numa_parse_cpu_nodes(void)
                 else
                         node_set(nid, numa_nodes_parsed);
         }
-
-       of_node_put(cpus);
  }
  
  static int __init of_numa_parse_memory_nodes(void)
@@ -163,8 +152,8 @@ int of_node_to_nid(struct device_node *device)
                 np = of_get_next_parent(np);
         }
         if (np && r)
-               pr_warn("Invalid \"numa-node-id\" property in node %s\n",
-                       np->name);
+               pr_warn("Invalid \"numa-node-id\" property in node %pOFn\n",
+                       np);
         of_node_put(np);
  
         /*
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h

index 216175d..5d15670 100644 (file)
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -27,6 +27,14 @@ struct alias_prop {
         char stem[0];
  };
  
+#if defined(CONFIG_SPARC)
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 2
+#else
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 1
+#endif
+
+#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
+
  extern struct mutex of_mutex;
  extern struct list_head aliases_lookup;
  extern struct kset *of_kset;
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c

index eda57ef..42b1f73 100644 (file)
--- a/drivers/of/overlay.c
+++ b/drivers/of/overlay.c
@@ -425,8 +425,8 @@ static int build_changeset_next_level(struct overlay_changeset *ovcs,
         for_each_child_of_node(overlay_node, child) {
                 ret = add_changeset_node(ovcs, target_node, child);
                 if (ret) {
-                       pr_debug("Failed to apply node @%pOF/%s, err=%d\n",
-                                target_node, child->name, ret);
+                       pr_debug("Failed to apply node @%pOF/%pOFn, err=%d\n",
+                                target_node, child, ret);
                         of_node_put(child);
                         return ret;
                 }
diff --git a/drivers/of/platform.c b/drivers/of/platform.c

index 6c59673..04ad312 100644 (file)
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -91,8 +91,8 @@ static void of_device_make_bus_id(struct device *dev)
                  */
                 reg = of_get_property(node, "reg", NULL);
                 if (reg && (addr = of_translate_address(node, reg)) != OF_BAD_ADDR) {
-                       dev_set_name(dev, dev_name(dev) ? "%llx.%s:%s" : "%llx.%s",
-                                    (unsigned long long)addr, node->name,
+                       dev_set_name(dev, dev_name(dev) ? "%llx.%pOFn:%s" : "%llx.%pOFn",
+                                    (unsigned long long)addr, node,
                                      dev_name(dev));
                         return;
                 }
@@ -142,8 +142,8 @@ struct platform_device *of_device_alloc(struct device_node *np,
                         WARN_ON(rc);
                 }
                 if (of_irq_to_resource_table(np, res, num_irq) != num_irq)
-                       pr_debug("not all legacy IRQ resources mapped for %s\n",
-                                np->name);
+                       pr_debug("not all legacy IRQ resources mapped for %pOFn\n",
+                                np);
         }
  
         dev->dev.of_node = of_node_get(np);
diff --git a/drivers/of/unittest-data/overlay_15.dts b/drivers/of/unittest-data/overlay_15.dts

index b98f251..5728490 100644 (file)
--- a/drivers/of/unittest-data/overlay_15.dts
+++ b/drivers/of/unittest-data/overlay_15.dts
@@ -20,8 +20,8 @@
                         #size-cells = <0>;
                         reg = <0>;
  
-                       test-mux-dev {
-                               reg = <32>;
+                       test-mux-dev@20 {
+                               reg = <0x20>;
                                 compatible = "unittest-i2c-dev";
                                 status = "okay";
                         };
diff --git a/drivers/of/unittest-data/tests-overlay.dtsi b/drivers/of/unittest-data/tests-overlay.dtsi

index 25cf397..4ea024d 100644 (file)
--- a/drivers/of/unittest-data/tests-overlay.dtsi
+++ b/drivers/of/unittest-data/tests-overlay.dtsi
@@ -103,8 +103,8 @@
                                                         #size-cells = <0>;
                                                         reg = <0>;
  
-                                                       test-mux-dev {
-                                                               reg = <32>;
+                                                       test-mux-dev@20 {
+                                                               reg = <0x20>;
                                                                 compatible = "unittest-i2c-dev";
                                                                 status = "okay";
                                                         };
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c

index 41b4971..a3a6866 100644 (file)
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -212,8 +212,8 @@ static int __init of_unittest_check_node_linkage(struct device_node *np)
  
         for_each_child_of_node(np, child) {
                 if (child->parent != np) {
-                       pr_err("Child node %s links to wrong parent %s\n",
-                                child->name, np->name);
+                       pr_err("Child node %pOFn links to wrong parent %pOFn\n",
+                                child, np);
                         rc = -EINVAL;
                         goto put_child;
                 }
@@ -299,6 +299,10 @@ static void __init of_unittest_printf(void)
  
         of_unittest_printf_one(np, "%pOF",  full_name);
         of_unittest_printf_one(np, "%pOFf", full_name);
+       of_unittest_printf_one(np, "%pOFn", "dev");
+       of_unittest_printf_one(np, "%2pOFn", "dev");
+       of_unittest_printf_one(np, "%5pOFn", "  dev");
+       of_unittest_printf_one(np, "%pOFnc", "dev:test-sub-device");
         of_unittest_printf_one(np, "%pOFp", phandle_str);
         of_unittest_printf_one(np, "%pOFP", "dev@100");
         of_unittest_printf_one(np, "ABC %pOFP ABC", "ABC dev@100 ABC");
@@ -1046,16 +1050,16 @@ static void __init of_unittest_platform_populate(void)
         for_each_child_of_node(np, child) {
                 for_each_child_of_node(child, grandchild)
                         unittest(of_find_device_by_node(grandchild),
-                                "Could not create device for node '%s'\n",
-                                grandchild->name);
+                                "Could not create device for node '%pOFn'\n",
+                                grandchild);
         }
  
         of_platform_depopulate(&test_bus->dev);
         for_each_child_of_node(np, child) {
                 for_each_child_of_node(child, grandchild)
                         unittest(!of_find_device_by_node(grandchild),
-                                "device didn't get destroyed '%s'\n",
-                                grandchild->name);
+                                "device didn't get destroyed '%pOFn'\n",
+                                grandchild);
         }
  
         platform_device_unregister(test_bus);
@@ -2357,11 +2361,14 @@ static __init void of_unittest_overlay_high_level(void)
                 }
         }
  
-       for (np = overlay_base_root->child; np; np = np->sibling) {
-               if (of_get_child_by_name(of_root, np->name)) {
-                       unittest(0, "illegal node name in overlay_base %s",
-                               np->name);
-                       return;
+       for_each_child_of_node(overlay_base_root, np) {
+               struct device_node *base_child;
+               for_each_child_of_node(of_root, base_child) {
+                       if (!strcmp(np->full_name, base_child->full_name)) {
+                               unittest(0, "illegal node name in overlay_base %pOFn",
+                                        np);
+                               return;
+                       }
                 }
         }
  
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c

index ee54f5b..6758fd7 100644 (file)
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -746,7 +746,7 @@ static irqreturn_t pnv_php_interrupt(int irq, void *data)
                 pe = edev ? edev->pe : NULL;
                 if (pe) {
                         eeh_serialize_lock(&flags);
-                       eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+                       eeh_pe_mark_isolated(pe);
                         eeh_serialize_unlock(flags);
                         eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
                 }
diff --git a/drivers/pci/of.c b/drivers/pci/of.c

index 1836b8d..4c4217d 100644 (file)
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -355,107 +355,6 @@ failed:
  EXPORT_SYMBOL_GPL(devm_of_pci_get_host_bridge_resources);
  #endif /* CONFIG_OF_ADDRESS */
  
-/**
- * of_pci_map_rid - Translate a requester ID through a downstream mapping.
- * @np: root complex device node.
- * @rid: PCI requester ID to map.
- * @map_name: property name of the map to use.
- * @map_mask_name: optional property name of the mask to use.
- * @target: optional pointer to a target device node.
- * @id_out: optional pointer to receive the translated ID.
- *
- * Given a PCI requester ID, look up the appropriate implementation-defined
- * platform ID and/or the target device which receives transactions on that
- * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
- * @id_out may be NULL if only the other is required. If @target points to
- * a non-NULL device node pointer, only entries targeting that node will be
- * matched; if it points to a NULL value, it will receive the device node of
- * the first matching target phandle, with a reference held.
- *
- * Return: 0 on success or a standard error code on failure.
- */
-int of_pci_map_rid(struct device_node *np, u32 rid,
-                  const char *map_name, const char *map_mask_name,
-                  struct device_node **target, u32 *id_out)
-{
-       u32 map_mask, masked_rid;
-       int map_len;
-       const __be32 *map = NULL;
-
-       if (!np || !map_name || (!target && !id_out))
-               return -EINVAL;
-
-       map = of_get_property(np, map_name, &map_len);
-       if (!map) {
-               if (target)
-                       return -ENODEV;
-               /* Otherwise, no map implies no translation */
-               *id_out = rid;
-               return 0;
-       }
-
-       if (!map_len || map_len % (4 * sizeof(*map))) {
-               pr_err("%pOF: Error: Bad %s length: %d\n", np,
-                       map_name, map_len);
-               return -EINVAL;
-       }
-
-       /* The default is to select all bits. */
-       map_mask = 0xffffffff;
-
-       /*
-        * Can be overridden by "{iommu,msi}-map-mask" property.
-        * If of_property_read_u32() fails, the default is used.
-        */
-       if (map_mask_name)
-               of_property_read_u32(np, map_mask_name, &map_mask);
-
-       masked_rid = map_mask & rid;
-       for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
-               struct device_node *phandle_node;
-               u32 rid_base = be32_to_cpup(map + 0);
-               u32 phandle = be32_to_cpup(map + 1);
-               u32 out_base = be32_to_cpup(map + 2);
-               u32 rid_len = be32_to_cpup(map + 3);
-
-               if (rid_base & ~map_mask) {
-                       pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
-                               np, map_name, map_name,
-                               map_mask, rid_base);
-                       return -EFAULT;
-               }
-
-               if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
-                       continue;
-
-               phandle_node = of_find_node_by_phandle(phandle);
-               if (!phandle_node)
-                       return -ENODEV;
-
-               if (target) {
-                       if (*target)
-                               of_node_put(phandle_node);
-                       else
-                               *target = phandle_node;
-
-                       if (*target != phandle_node)
-                               continue;
-               }
-
-               if (id_out)
-                       *id_out = masked_rid - rid_base + out_base;
-
-               pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
-                       np, map_name, map_mask, rid_base, out_base,
-                       rid_len, rid, masked_rid - rid_base + out_base);
-               return 0;
-       }
-
-       pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n",
-               np, map_name, rid, target && *target ? *target : NULL);
-       return -EFAULT;
-}
-
  #if IS_ENABLED(CONFIG_OF_IRQ)
  /**
   * of_irq_parse_pci - Resolve the interrupt for a PCI device
diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c

index 9671ded..b31abe3 100644 (file)
--- a/drivers/pcmcia/electra_cf.c
+++ b/drivers/pcmcia/electra_cf.c
@@ -230,7 +230,7 @@ static int electra_cf_probe(struct platform_device *ofdev)
  
         if (!cf->mem_base || !cf->io_virt || !cf->gpio_base ||
             (__ioremap_at(io.start, cf->io_virt, cf->io_size,
-                 pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)) {
+                         pgprot_noncached(PAGE_KERNEL)) == NULL)) {
                 dev_err(device, "can't ioremap ranges\n");
                 status = -ENOMEM;
                 goto fail1;
diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c

index 80582c8..0e202d4 100644 (file)
--- a/drivers/power/supply/twl4030_charger.c
+++ b/drivers/power/supply/twl4030_charger.c
@@ -1022,12 +1022,13 @@ static int twl4030_bci_probe(struct platform_device *pdev)
         if (bci->dev->of_node) {
                 struct device_node *phynode;
  
-               phynode = of_find_compatible_node(bci->dev->of_node->parent,
-                                                 NULL, "ti,twl4030-usb");
+               phynode = of_get_compatible_child(bci->dev->of_node->parent,
+                                                 "ti,twl4030-usb");
                 if (phynode) {
                         bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
                         bci->transceiver = devm_usb_get_phy_by_node(
                                 bci->dev, phynode, &bci->usb_nb);
+                       of_node_put(phynode);
                         if (IS_ERR(bci->transceiver)) {
                                 ret = PTR_ERR(bci->transceiver);
                                 if (ret == -EPROBE_DEFER)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c

index b762d0f..3bb2b33 100644 (file)
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1272,8 +1272,6 @@ static int sd_init_command(struct scsi_cmnd *cmd)
         case REQ_OP_READ:
         case REQ_OP_WRITE:
                 return sd_setup_read_write_cmnd(cmd);
-       case REQ_OP_ZONE_REPORT:
-               return sd_zbc_setup_report_cmnd(cmd);
         case REQ_OP_ZONE_RESET:
                 return sd_zbc_setup_reset_cmnd(cmd);
         default:
@@ -1802,6 +1800,7 @@ static const struct block_device_operations sd_fops = {
         .check_events           = sd_check_events,
         .revalidate_disk        = sd_revalidate_disk,
         .unlock_native_capacity = sd_unlock_native_capacity,
+       .report_zones           = sd_zbc_report_zones,
         .pr_ops                 = &sd_pr_ops,
  };
  
@@ -1953,16 +1952,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
                         scsi_set_resid(SCpnt, blk_rq_bytes(req));
                 }
                 break;
-       case REQ_OP_ZONE_REPORT:
-               if (!result) {
-                       good_bytes = scsi_bufflen(SCpnt)
-                               - scsi_get_resid(SCpnt);
-                       scsi_set_resid(SCpnt, 0);
-               } else {
-                       good_bytes = 0;
-                       scsi_set_resid(SCpnt, blk_rq_bytes(req));
-               }
-               break;
         default:
                 /*
                  * In case of bogus fw or device, we could end up having
@@ -3425,8 +3414,6 @@ static int sd_remove(struct device *dev)
         del_gendisk(sdkp->disk);
         sd_shutdown(dev);
  
-       sd_zbc_remove(sdkp);
-
         free_opal_dev(sdkp->opal_dev);
  
         blk_register_region(devt, SD_MINORS, NULL,
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h

index a7d4f50..1d63f3a 100644 (file)
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -76,7 +76,6 @@ struct scsi_disk {
  #ifdef CONFIG_BLK_DEV_ZONED
         u32             nr_zones;
         u32             zone_blocks;
-       u32             zone_shift;
         u32             zones_optimal_open;
         u32             zones_optimal_nonseq;
         u32             zones_max_open;
@@ -271,12 +270,13 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
  #ifdef CONFIG_BLK_DEV_ZONED
  
  extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
-extern void sd_zbc_remove(struct scsi_disk *sdkp);
  extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
-extern int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd);
  extern int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
  extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
                             struct scsi_sense_hdr *sshdr);
+extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
+                              struct blk_zone *zones, unsigned int *nr_zones,
+                              gfp_t gfp_mask);
  
  #else /* CONFIG_BLK_DEV_ZONED */
  
@@ -286,15 +286,8 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
         return 0;
  }
  
-static inline void sd_zbc_remove(struct scsi_disk *sdkp) {}
-
  static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {}
  
-static inline int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd)
-{
-       return BLKPREP_INVALID;
-}
-
  static inline int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
  {
         return BLKPREP_INVALID;
@@ -304,6 +297,8 @@ static inline void sd_zbc_complete(struct scsi_cmnd *cmd,
                                    unsigned int good_bytes,
                                    struct scsi_sense_hdr *sshdr) {}
  
+#define sd_zbc_report_zones NULL
+
  #endif /* CONFIG_BLK_DEV_ZONED */
  
  #endif /* _SCSI_DISK_H */
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c

index 412c178..e06c48c 100644 (file)
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -62,16 +62,22 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
  }
  
  /**
- * sd_zbc_report_zones - Issue a REPORT ZONES scsi command.
+ * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
   * @sdkp: The target disk
   * @buf: Buffer to use for the reply
   * @buflen: the buffer size
   * @lba: Start LBA of the report
+ * @partial: Do partial report
   *
   * For internal use during device validation.
+ * Using partial=true can significantly speed up execution of a report zones
+ * command because the disk does not have to count all possible report matching
+ * zones and will only report the count of zones fitting in the command reply
+ * buffer.
   */
-static int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
-                              unsigned int buflen, sector_t lba)
+static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
+                                 unsigned int buflen, sector_t lba,
+                                 bool partial)
  {
         struct scsi_device *sdp = sdkp->device;
         const int timeout = sdp->request_queue->rq_timeout;
@@ -85,6 +91,8 @@ static int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
         cmd[1] = ZI_REPORT_ZONES;
         put_unaligned_be64(lba, &cmd[2]);
         put_unaligned_be32(buflen, &cmd[10]);
+       if (partial)
+               cmd[14] = ZBC_REPORT_ZONE_PARTIAL;
         memset(buf, 0, buflen);
  
         result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
@@ -110,108 +118,56 @@ static int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
  }
  
  /**
- * sd_zbc_setup_report_cmnd - Prepare a REPORT ZONES scsi command
- * @cmd: The command to setup
+ * sd_zbc_report_zones - Disk report zones operation.
+ * @disk: The target disk
+ * @sector: Start 512B sector of the report
+ * @zones: Array of zone descriptors
+ * @nr_zones: Number of descriptors in the array
+ * @gfp_mask: Memory allocation mask
   *
- * Call in sd_init_command() for a REQ_OP_ZONE_REPORT request.
+ * Execute a report zones command on the target disk.
   */
-int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd)
+int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
+                       struct blk_zone *zones, unsigned int *nr_zones,
+                       gfp_t gfp_mask)
  {
-       struct request *rq = cmd->request;
-       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
-       sector_t lba, sector = blk_rq_pos(rq);
-       unsigned int nr_bytes = blk_rq_bytes(rq);
-       int ret;
-
-       WARN_ON(nr_bytes == 0);
+       struct scsi_disk *sdkp = scsi_disk(disk);
+       unsigned int i, buflen, nrz = *nr_zones;
+       unsigned char *buf;
+       size_t offset = 0;
+       int ret = 0;
  
         if (!sd_is_zoned(sdkp))
                 /* Not a zoned device */
-               return BLKPREP_KILL;
-
-       ret = scsi_init_io(cmd);
-       if (ret != BLKPREP_OK)
-               return ret;
-
-       cmd->cmd_len = 16;
-       memset(cmd->cmnd, 0, cmd->cmd_len);
-       cmd->cmnd[0] = ZBC_IN;
-       cmd->cmnd[1] = ZI_REPORT_ZONES;
-       lba = sectors_to_logical(sdkp->device, sector);
-       put_unaligned_be64(lba, &cmd->cmnd[2]);
-       put_unaligned_be32(nr_bytes, &cmd->cmnd[10]);
-       /* Do partial report for speeding things up */
-       cmd->cmnd[14] = ZBC_REPORT_ZONE_PARTIAL;
-
-       cmd->sc_data_direction = DMA_FROM_DEVICE;
-       cmd->sdb.length = nr_bytes;
-       cmd->transfersize = sdkp->device->sector_size;
-       cmd->allowed = 0;
-
-       return BLKPREP_OK;
-}
-
-/**
- * sd_zbc_report_zones_complete - Process a REPORT ZONES scsi command reply.
- * @scmd: The completed report zones command
- * @good_bytes: reply size in bytes
- *
- * Convert all reported zone descriptors to struct blk_zone. The conversion
- * is done in-place, directly in the request specified sg buffer.
- */
-static void sd_zbc_report_zones_complete(struct scsi_cmnd *scmd,
-                                        unsigned int good_bytes)
-{
-       struct request *rq = scmd->request;
-       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
-       struct sg_mapping_iter miter;
-       struct blk_zone_report_hdr hdr;
-       struct blk_zone zone;
-       unsigned int offset, bytes = 0;
-       unsigned long flags;
-       u8 *buf;
-
-       if (good_bytes < 64)
-               return;
+               return -EOPNOTSUPP;
  
-       memset(&hdr, 0, sizeof(struct blk_zone_report_hdr));
-
-       sg_miter_start(&miter, scsi_sglist(scmd), scsi_sg_count(scmd),
-                      SG_MITER_TO_SG | SG_MITER_ATOMIC);
-
-       local_irq_save(flags);
-       while (sg_miter_next(&miter) && bytes < good_bytes) {
+       /*
+        * Get a reply buffer for the number of requested zones plus a header.
+        * For ATA, buffers must be aligned to 512B.
+        */
+       buflen = roundup((nrz + 1) * 64, 512);
+       buf = kmalloc(buflen, gfp_mask);
+       if (!buf)
+               return -ENOMEM;
  
-               buf = miter.addr;
-               offset = 0;
+       ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
+                       sectors_to_logical(sdkp->device, sector), true);
+       if (ret)
+               goto out_free_buf;
  
-               if (bytes == 0) {
-                       /* Set the report header */
-                       hdr.nr_zones = min_t(unsigned int,
-                                        (good_bytes - 64) / 64,
-                                        get_unaligned_be32(&buf[0]) / 64);
-                       memcpy(buf, &hdr, sizeof(struct blk_zone_report_hdr));
-                       offset += 64;
-                       bytes += 64;
-               }
+       nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64);
+       for (i = 0; i < nrz; i++) {
+               offset += 64;
+               sd_zbc_parse_report(sdkp, buf + offset, zones);
+               zones++;
+       }
  
-               /* Parse zone descriptors */
-               while (offset < miter.length && hdr.nr_zones) {
-                       WARN_ON(offset > miter.length);
-                       buf = miter.addr + offset;
-                       sd_zbc_parse_report(sdkp, buf, &zone);
-                       memcpy(buf, &zone, sizeof(struct blk_zone));
-                       offset += 64;
-                       bytes += 64;
-                       hdr.nr_zones--;
-               }
+       *nr_zones = nrz;
  
-               if (!hdr.nr_zones)
-                       break;
+out_free_buf:
+       kfree(buf);
  
-       }
-       sg_miter_stop(&miter);
-       local_irq_restore(flags);
+       return ret;
  }
  
  /**
@@ -294,30 +250,23 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
         case REQ_OP_WRITE_ZEROES:
         case REQ_OP_WRITE_SAME:
                 break;
-
-       case REQ_OP_ZONE_REPORT:
-
-               if (!result)
-                       sd_zbc_report_zones_complete(cmd, good_bytes);
-               break;
-
         }
  }
  
  /**
- * sd_zbc_read_zoned_characteristics - Read zoned block device characteristics
+ * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics
   * @sdkp: Target disk
   * @buf: Buffer where to store the VPD page data
   *
- * Read VPD page B6.
+ * Read VPD page B6, get information and check that reads are unconstrained.
   */
-static int sd_zbc_read_zoned_characteristics(struct scsi_disk *sdkp,
-                                            unsigned char *buf)
+static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
+                                             unsigned char *buf)
  {
  
         if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) {
                 sd_printk(KERN_NOTICE, sdkp,
-                         "Unconstrained-read check failed\n");
+                         "Read zoned characteristics VPD page failed\n");
                 return -ENODEV;
         }
  
@@ -335,43 +284,17 @@ static int sd_zbc_read_zoned_characteristics(struct scsi_disk *sdkp,
                 sdkp->zones_max_open = get_unaligned_be32(&buf[16]);
         }
  
-       return 0;
-}
-
-/**
- * sd_zbc_check_capacity - Check reported capacity.
- * @sdkp: Target disk
- * @buf: Buffer to use for commands
- *
- * ZBC drive may report only the capacity of the first conventional zones at
- * LBA 0. This is indicated by the RC_BASIS field of the read capacity reply.
- * Check this here. If the disk reported only its conventional zones capacity,
- * get the total capacity by doing a report zones.
- */
-static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
-{
-       sector_t lba;
-       int ret;
-
-       if (sdkp->rc_basis != 0)
-               return 0;
-
-       /* Do a report zone to get the maximum LBA to check capacity */
-       ret = sd_zbc_report_zones(sdkp, buf, SD_BUF_SIZE, 0);
-       if (ret)
-               return ret;
-
-       /* The max_lba field is the capacity of this device */
-       lba = get_unaligned_be64(&buf[8]);
-       if (lba + 1 == sdkp->capacity)
-               return 0;
-
-       if (sdkp->first_scan)
-               sd_printk(KERN_WARNING, sdkp,
-                         "Changing capacity from %llu to max LBA+1 %llu\n",
-                         (unsigned long long)sdkp->capacity,
-                         (unsigned long long)lba + 1);
-       sdkp->capacity = lba + 1;
+       /*
+        * Check for unconstrained reads: host-managed devices with
+        * constrained reads (drives failing read after write pointer)
+        * are not supported.
+        */
+       if (!sdkp->urswrz) {
+               if (sdkp->first_scan)
+                       sd_printk(KERN_NOTICE, sdkp,
+                         "constrained reads devices are not supported\n");
+               return -ENODEV;
+       }
  
         return 0;
  }
@@ -379,24 +302,27 @@ static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
  #define SD_ZBC_BUF_SIZE 131072U
  
  /**
- * sd_zbc_check_zone_size - Check the device zone sizes
+ * sd_zbc_check_zones - Check the device capacity and zone sizes
   * @sdkp: Target disk
   *
- * Check that all zones of the device are equal. The last zone can however
- * be smaller. The zone size must also be a power of two number of LBAs.
+ * Check that the device capacity as reported by READ CAPACITY matches the
+ * max_lba value (plus one)of the report zones command reply. Also check that
+ * all zones of the device have an equal size, only allowing the last zone of
+ * the disk to have a smaller size (runt zone). The zone size must also be a
+ * power of two.
   *
   * Returns the zone size in number of blocks upon success or an error code
   * upon failure.
   */
-static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
+static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
  {
         u64 zone_blocks = 0;
-       sector_t block = 0;
+       sector_t max_lba, block = 0;
         unsigned char *buf;
         unsigned char *rec;
         unsigned int buf_len;
         unsigned int list_length;
-       s64 ret;
+       int ret;
         u8 same;
  
         /* Get a buffer */
@@ -404,11 +330,28 @@ static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
         if (!buf)
                 return -ENOMEM;
  
-       /* Do a report zone to get the same field */
-       ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0);
+       /* Do a report zone to get max_lba and the same field */
+       ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false);
         if (ret)
                 goto out_free;
  
+       if (sdkp->rc_basis == 0) {
+               /* The max_lba field is the capacity of this device */
+               max_lba = get_unaligned_be64(&buf[8]);
+               if (sdkp->capacity != max_lba + 1) {
+                       if (sdkp->first_scan)
+                               sd_printk(KERN_WARNING, sdkp,
+                                       "Changing capacity from %llu to max LBA+1 %llu\n",
+                                       (unsigned long long)sdkp->capacity,
+                                       (unsigned long long)max_lba + 1);
+                       sdkp->capacity = max_lba + 1;
+               }
+       }
+
+       /*
+        * Check same field: for any value other than 0, we know that all zones
+        * have the same size.
+        */
         same = buf[4] & 0x0f;
         if (same > 0) {
                 rec = &buf[64];
@@ -445,8 +388,8 @@ static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
                 }
  
                 if (block < sdkp->capacity) {
-                       ret = sd_zbc_report_zones(sdkp, buf,
-                                                 SD_ZBC_BUF_SIZE, block);
+                       ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
+                                                    block, true);
                         if (ret)
                                 goto out_free;
                 }
@@ -470,9 +413,10 @@ out:
                 if (sdkp->first_scan)
                         sd_printk(KERN_NOTICE, sdkp,
                                   "Zone size too large\n");
-               ret = -ENODEV;
+               ret = -EFBIG;
         } else {
-               ret = zone_blocks;
+               *zblocks = zone_blocks;
+               ret = 0;
         }
  
  out_free:
@@ -481,191 +425,11 @@ out_free:
         return ret;
  }
  
-/**
- * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
- * @nr_zones: Number of zones to allocate space for.
- * @numa_node: NUMA node to allocate the memory from.
- */
-static inline unsigned long *
-sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
-{
-       return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
-                           GFP_KERNEL, numa_node);
-}
-
-/**
- * sd_zbc_get_seq_zones - Parse report zones reply to identify sequential zones
- * @sdkp: disk used
- * @buf: report reply buffer
- * @buflen: length of @buf
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
- * @seq_zones_bitmap: bitmap of sequential zones to set
- *
- * Parse reported zone descriptors in @buf to identify sequential zones and
- * set the reported zone bit in @seq_zones_bitmap accordingly.
- * Since read-only and offline zones cannot be written, do not
- * mark them as sequential in the bitmap.
- * Return the LBA after the last zone reported.
- */
-static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
-                                    unsigned int buflen, u32 zone_shift,
-                                    unsigned long *seq_zones_bitmap)
-{
-       sector_t lba, next_lba = sdkp->capacity;
-       unsigned int buf_len, list_length;
-       unsigned char *rec;
-       u8 type, cond;
-
-       list_length = get_unaligned_be32(&buf[0]) + 64;
-       buf_len = min(list_length, buflen);
-       rec = buf + 64;
-
-       while (rec < buf + buf_len) {
-               type = rec[0] & 0x0f;
-               cond = (rec[1] >> 4) & 0xf;
-               lba = get_unaligned_be64(&rec[16]);
-               if (type != ZBC_ZONE_TYPE_CONV &&
-                   cond != ZBC_ZONE_COND_READONLY &&
-                   cond != ZBC_ZONE_COND_OFFLINE)
-                       set_bit(lba >> zone_shift, seq_zones_bitmap);
-               next_lba = lba + get_unaligned_be64(&rec[8]);
-               rec += 64;
-       }
-
-       return next_lba;
-}
-
-/**
- * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
- * @sdkp: target disk
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
- * @nr_zones: number of zones to set up a seq zone bitmap for
- *
- * Allocate a zone bitmap and initialize it by identifying sequential zones.
- */
-static unsigned long *
-sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
-                             u32 nr_zones)
-{
-       struct request_queue *q = sdkp->disk->queue;
-       unsigned long *seq_zones_bitmap;
-       sector_t lba = 0;
-       unsigned char *buf;
-       int ret = -ENOMEM;
-
-       seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
-       if (!seq_zones_bitmap)
-               return ERR_PTR(-ENOMEM);
-
-       buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
-       if (!buf)
-               goto out;
-
-       while (lba < sdkp->capacity) {
-               ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, lba);
-               if (ret)
-                       goto out;
-               lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
-                                          zone_shift, seq_zones_bitmap);
-       }
-
-       if (lba != sdkp->capacity) {
-               /* Something went wrong */
-               ret = -EIO;
-       }
-
-out:
-       kfree(buf);
-       if (ret) {
-               kfree(seq_zones_bitmap);
-               return ERR_PTR(ret);
-       }
-       return seq_zones_bitmap;
-}
-
-static void sd_zbc_cleanup(struct scsi_disk *sdkp)
-{
-       struct request_queue *q = sdkp->disk->queue;
-
-       kfree(q->seq_zones_bitmap);
-       q->seq_zones_bitmap = NULL;
-
-       kfree(q->seq_zones_wlock);
-       q->seq_zones_wlock = NULL;
-
-       q->nr_zones = 0;
-}
-
-static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
-{
-       struct request_queue *q = sdkp->disk->queue;
-       u32 zone_shift = ilog2(zone_blocks);
-       u32 nr_zones;
-       int ret;
-
-       /* chunk_sectors indicates the zone size */
-       blk_queue_chunk_sectors(q,
-                       logical_to_sectors(sdkp->device, zone_blocks));
-       nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
-
-       /*
-        * Initialize the device request queue information if the number
-        * of zones changed.
-        */
-       if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
-               unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
-               size_t zone_bitmap_size;
-
-               if (nr_zones) {
-                       seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
-                                                                  q->node);
-                       if (!seq_zones_wlock) {
-                               ret = -ENOMEM;
-                               goto err;
-                       }
-
-                       seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
-                                                       zone_shift, nr_zones);
-                       if (IS_ERR(seq_zones_bitmap)) {
-                               ret = PTR_ERR(seq_zones_bitmap);
-                               kfree(seq_zones_wlock);
-                               goto err;
-                       }
-               }
-               zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
-                       sizeof(unsigned long);
-               blk_mq_freeze_queue(q);
-               if (q->nr_zones != nr_zones) {
-                       /* READ16/WRITE16 is mandatory for ZBC disks */
-                       sdkp->device->use_16_for_rw = 1;
-                       sdkp->device->use_10_for_rw = 0;
-
-                       sdkp->zone_blocks = zone_blocks;
-                       sdkp->zone_shift = zone_shift;
-                       sdkp->nr_zones = nr_zones;
-                       q->nr_zones = nr_zones;
-                       swap(q->seq_zones_wlock, seq_zones_wlock);
-                       swap(q->seq_zones_bitmap, seq_zones_bitmap);
-               } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
-                                 zone_bitmap_size) != 0) {
-                       memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
-                              zone_bitmap_size);
-               }
-               blk_mq_unfreeze_queue(q);
-               kfree(seq_zones_wlock);
-               kfree(seq_zones_bitmap);
-       }
-
-       return 0;
-
-err:
-       sd_zbc_cleanup(sdkp);
-       return ret;
-}
-
  int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
  {
-       int64_t zone_blocks;
+       struct gendisk *disk = sdkp->disk;
+       unsigned int nr_zones;
+       u32 zone_blocks;
         int ret;
  
         if (!sd_is_zoned(sdkp))
@@ -675,26 +439,8 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
                  */
                 return 0;
  
-       /* Get zoned block device characteristics */
-       ret = sd_zbc_read_zoned_characteristics(sdkp, buf);
-       if (ret)
-               goto err;
-
-       /*
-        * Check for unconstrained reads: host-managed devices with
-        * constrained reads (drives failing read after write pointer)
-        * are not supported.
-        */
-       if (!sdkp->urswrz) {
-               if (sdkp->first_scan)
-                       sd_printk(KERN_NOTICE, sdkp,
-                         "constrained reads devices are not supported\n");
-               ret = -ENODEV;
-               goto err;
-       }
-
-       /* Check capacity */
-       ret = sd_zbc_check_capacity(sdkp, buf);
+       /* Check zoned block device characteristics (unconstrained reads) */
+       ret = sd_zbc_check_zoned_characteristics(sdkp, buf);
         if (ret)
                 goto err;
  
@@ -702,33 +448,44 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
          * Check zone size: only devices with a constant zone size (except
          * an eventual last runt zone) that is a power of 2 are supported.
          */
-       zone_blocks = sd_zbc_check_zone_size(sdkp);
-       ret = -EFBIG;
-       if (zone_blocks != (u32)zone_blocks)
-               goto err;
-       ret = zone_blocks;
-       if (ret < 0)
+       ret = sd_zbc_check_zones(sdkp, &zone_blocks);
+       if (ret != 0)
                 goto err;
  
         /* The drive satisfies the kernel restrictions: set it up */
-       ret = sd_zbc_setup(sdkp, zone_blocks);
-       if (ret)
-               goto err;
+       blk_queue_chunk_sectors(sdkp->disk->queue,
+                       logical_to_sectors(sdkp->device, zone_blocks));
+       nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
+
+       /* READ16/WRITE16 is mandatory for ZBC disks */
+       sdkp->device->use_16_for_rw = 1;
+       sdkp->device->use_10_for_rw = 0;
+
+       /*
+        * If something changed, revalidate the disk zone bitmaps once we have
+        * the capacity, that is on the second revalidate execution during disk
+        * scan and always during normal revalidate.
+        */
+       if (sdkp->first_scan)
+               return 0;
+       if (sdkp->zone_blocks != zone_blocks ||
+           sdkp->nr_zones != nr_zones ||
+           disk->queue->nr_zones != nr_zones) {
+               ret = blk_revalidate_disk_zones(disk);
+               if (ret != 0)
+                       goto err;
+               sdkp->zone_blocks = zone_blocks;
+               sdkp->nr_zones = nr_zones;
+       }
  
         return 0;
  
  err:
         sdkp->capacity = 0;
-       sd_zbc_cleanup(sdkp);
  
         return ret;
  }
  
-void sd_zbc_remove(struct scsi_disk *sdkp)
-{
-       sd_zbc_cleanup(sdkp);
-}
-
  void sd_zbc_print_zones(struct scsi_disk *sdkp)
  {
         if (!sd_is_zoned(sdkp) || !sdkp->capacity)
diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c

index 5abb08f..ffc5311 100644 (file)
--- a/drivers/soc/dove/pmu.c
+++ b/drivers/soc/dove/pmu.c
@@ -383,7 +383,7 @@ int __init dove_init_pmu(void)
  
         domains_node = of_get_child_by_name(np_pmu, "domains");
         if (!domains_node) {
-               pr_err("%s: failed to find domains sub-node\n", np_pmu->name);
+               pr_err("%pOFn: failed to find domains sub-node\n", np_pmu);
                 return 0;
         }
  
@@ -396,7 +396,7 @@ int __init dove_init_pmu(void)
         pmu->pmc_base = of_iomap(pmu->of_node, 0);
         pmu->pmu_base = of_iomap(pmu->of_node, 1);
         if (!pmu->pmc_base || !pmu->pmu_base) {
-               pr_err("%s: failed to map PMU\n", np_pmu->name);
+               pr_err("%pOFn: failed to map PMU\n", np_pmu);
                 iounmap(pmu->pmu_base);
                 iounmap(pmu->pmc_base);
                 kfree(pmu);
@@ -414,7 +414,7 @@ int __init dove_init_pmu(void)
                         break;
  
                 domain->pmu = pmu;
-               domain->base.name = kstrdup(np->name, GFP_KERNEL);
+               domain->base.name = kasprintf(GFP_KERNEL, "%pOFn", np);
                 if (!domain->base.name) {
                         kfree(domain);
                         break;
@@ -444,7 +444,7 @@ int __init dove_init_pmu(void)
         /* Loss of the interrupt controller is not a fatal error. */
         parent_irq = irq_of_parse_and_map(pmu->of_node, 0);
         if (!parent_irq) {
-               pr_err("%s: no interrupt specified\n", np_pmu->name);
+               pr_err("%pOFn: no interrupt specified\n", np_pmu);
         } else {
                 ret = dove_init_pmu_irq(pmu, parent_irq);
                 if (ret)
diff --git a/drivers/soc/fsl/qbman/qman_ccsr.c b/drivers/soc/fsl/qbman/qman_ccsr.c

index 6fd5fef..109b38d 100644 (file)
--- a/drivers/soc/fsl/qbman/qman_ccsr.c
+++ b/drivers/soc/fsl/qbman/qman_ccsr.c
@@ -419,7 +419,7 @@ static size_t fqd_sz, pfdr_sz;
  static int zero_priv_mem(phys_addr_t addr, size_t sz)
  {
         /* map as cacheable, non-guarded */
-       void __iomem *tmpp = ioremap_prot(addr, sz, 0);
+       void __iomem *tmpp = ioremap_cache(addr, sz);
  
         if (!tmpp)
                 return -ENOMEM;
diff --git a/drivers/soc/fsl/qe/qe_tdm.c b/drivers/soc/fsl/qe/qe_tdm.c

index f744c21..f78c346 100644 (file)
--- a/drivers/soc/fsl/qe/qe_tdm.c
+++ b/drivers/soc/fsl/qe/qe_tdm.c
@@ -131,7 +131,7 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm,
  
         pdev = of_find_device_by_node(np2);
         if (!pdev) {
-               pr_err("%s: failed to lookup pdev\n", np2->name);
+               pr_err("%pOFn: failed to lookup pdev\n", np2);
                 of_node_put(np2);
                 return -EINVAL;
         }
@@ -153,7 +153,7 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm,
         pdev = of_find_device_by_node(np2);
         if (!pdev) {
                 ret = -EINVAL;
-               pr_err("%s: failed to lookup pdev\n", np2->name);
+               pr_err("%pOFn: failed to lookup pdev\n", np2);
                 of_node_put(np2);
                 goto err_miss_siram_property;
         }
diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c

index 57af8a5..4bda793 100644 (file)
--- a/drivers/soc/qcom/apr.c
+++ b/drivers/soc/qcom/apr.c
@@ -219,7 +219,7 @@ static int apr_add_device(struct device *dev, struct device_node *np,
         adev->domain_id = id->domain_id;
         adev->version = id->svc_version;
         if (np)
-               strncpy(adev->name, np->name, APR_NAME_SIZE);
+               snprintf(adev->name, APR_NAME_SIZE, "%pOFn", np);
         else
                 strncpy(adev->name, id->name, APR_NAME_SIZE);
  
diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c

index 6dff868..6f86a72 100644 (file)
--- a/drivers/soc/rockchip/pm_domains.c
+++ b/drivers/soc/rockchip/pm_domains.c
@@ -392,21 +392,21 @@ static int rockchip_pm_add_one_domain(struct rockchip_pmu *pmu,
         error = of_property_read_u32(node, "reg", &id);
         if (error) {
                 dev_err(pmu->dev,
-                       "%s: failed to retrieve domain id (reg): %d\n",
-                       node->name, error);
+                       "%pOFn: failed to retrieve domain id (reg): %d\n",
+                       node, error);
                 return -EINVAL;
         }
  
         if (id >= pmu->info->num_domains) {
-               dev_err(pmu->dev, "%s: invalid domain id %d\n",
-                       node->name, id);
+               dev_err(pmu->dev, "%pOFn: invalid domain id %d\n",
+                       node, id);
                 return -EINVAL;
         }
  
         pd_info = &pmu->info->domain_info[id];
         if (!pd_info) {
-               dev_err(pmu->dev, "%s: undefined domain id %d\n",
-                       node->name, id);
+               dev_err(pmu->dev, "%pOFn: undefined domain id %d\n",
+                       node, id);
                 return -EINVAL;
         }
  
@@ -424,8 +424,8 @@ static int rockchip_pm_add_one_domain(struct rockchip_pmu *pmu,
                 if (!pd->clks)
                         return -ENOMEM;
         } else {
-               dev_dbg(pmu->dev, "%s: doesn't have clocks: %d\n",
-                       node->name, pd->num_clks);
+               dev_dbg(pmu->dev, "%pOFn: doesn't have clocks: %d\n",
+                       node, pd->num_clks);
                 pd->num_clks = 0;
         }
  
@@ -434,8 +434,8 @@ static int rockchip_pm_add_one_domain(struct rockchip_pmu *pmu,
                 if (IS_ERR(pd->clks[i].clk)) {
                         error = PTR_ERR(pd->clks[i].clk);
                         dev_err(pmu->dev,
-                               "%s: failed to get clk at index %d: %d\n",
-                               node->name, i, error);
+                               "%pOFn: failed to get clk at index %d: %d\n",
+                               node, i, error);
                         return error;
                 }
         }
@@ -486,8 +486,8 @@ static int rockchip_pm_add_one_domain(struct rockchip_pmu *pmu,
         error = rockchip_pd_power(pd, true);
         if (error) {
                 dev_err(pmu->dev,
-                       "failed to power on domain '%s': %d\n",
-                       node->name, error);
+                       "failed to power on domain '%pOFn': %d\n",
+                       node, error);
                 goto err_unprepare_clocks;
         }
  
@@ -575,24 +575,24 @@ static int rockchip_pm_add_subdomain(struct rockchip_pmu *pmu,
                 error = of_property_read_u32(parent, "reg", &idx);
                 if (error) {
                         dev_err(pmu->dev,
-                               "%s: failed to retrieve domain id (reg): %d\n",
-                               parent->name, error);
+                               "%pOFn: failed to retrieve domain id (reg): %d\n",
+                               parent, error);
                         goto err_out;
                 }
                 parent_domain = pmu->genpd_data.domains[idx];
  
                 error = rockchip_pm_add_one_domain(pmu, np);
                 if (error) {
-                       dev_err(pmu->dev, "failed to handle node %s: %d\n",
-                               np->name, error);
+                       dev_err(pmu->dev, "failed to handle node %pOFn: %d\n",
+                               np, error);
                         goto err_out;
                 }
  
                 error = of_property_read_u32(np, "reg", &idx);
                 if (error) {
                         dev_err(pmu->dev,
-                               "%s: failed to retrieve domain id (reg): %d\n",
-                               np->name, error);
+                               "%pOFn: failed to retrieve domain id (reg): %d\n",
+                               np, error);
                         goto err_out;
                 }
                 child_domain = pmu->genpd_data.domains[idx];
@@ -683,16 +683,16 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
         for_each_available_child_of_node(np, node) {
                 error = rockchip_pm_add_one_domain(pmu, node);
                 if (error) {
-                       dev_err(dev, "failed to handle node %s: %d\n",
-                               node->name, error);
+                       dev_err(dev, "failed to handle node %pOFn: %d\n",
+                               node, error);
                         of_node_put(node);
                         goto err_out;
                 }
  
                 error = rockchip_pm_add_subdomain(pmu, node);
                 if (error < 0) {
-                       dev_err(dev, "failed to handle subdomain node %s: %d\n",
-                               node->name, error);
+                       dev_err(dev, "failed to handle subdomain node %pOFn: %d\n",
+                               node, error);
                         of_node_put(node);
                         goto err_out;
                 }
diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c

index 2d6f3fc..acbe63e 100644 (file)
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -796,7 +796,7 @@ static void tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np)
  
         id = tegra_powergate_lookup(pmc, np->name);
         if (id < 0) {
-               pr_err("powergate lookup failed for %s: %d\n", np->name, id);
+               pr_err("powergate lookup failed for %pOFn: %d\n", np, id);
                 goto free_mem;
         }
  
@@ -816,13 +816,13 @@ static void tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np)
  
         err = tegra_powergate_of_get_clks(pg, np);
         if (err < 0) {
-               pr_err("failed to get clocks for %s: %d\n", np->name, err);
+               pr_err("failed to get clocks for %pOFn: %d\n", np, err);
                 goto set_available;
         }
  
         err = tegra_powergate_of_get_resets(pg, np, off);
         if (err < 0) {
-               pr_err("failed to get resets for %s: %d\n", np->name, err);
+               pr_err("failed to get resets for %pOFn: %d\n", np, err);
                 goto remove_clks;
         }
  
@@ -851,15 +851,15 @@ static void tegra_powergate_add(struct tegra_pmc *pmc, struct device_node *np)
  
         err = pm_genpd_init(&pg->genpd, NULL, off);
         if (err < 0) {
-               pr_err("failed to initialise PM domain %s: %d\n", np->name,
+               pr_err("failed to initialise PM domain %pOFn: %d\n", np,
                        err);
                 goto remove_resets;
         }
  
         err = of_genpd_add_provider_simple(np, &pg->genpd);
         if (err < 0) {
-               pr_err("failed to add PM domain provider for %s: %d\n",
-                      np->name, err);
+               pr_err("failed to add PM domain provider for %pOFn: %d\n",
+                      np, err);
                 goto remove_genpd;
         }
  
diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c

index 224d7dd..bbd4e5b 100644 (file)
--- a/drivers/soc/ti/knav_dma.c
+++ b/drivers/soc/ti/knav_dma.c
@@ -544,15 +544,15 @@ static void __iomem *pktdma_get_regs(struct knav_dma_device *dma,
  
         ret = of_address_to_resource(node, index, &res);
         if (ret) {
-               dev_err(dev, "Can't translate of node(%s) address for index(%d)\n",
-                       node->name, index);
+               dev_err(dev, "Can't translate of node(%pOFn) address for index(%d)\n",
+                       node, index);
                 return ERR_PTR(ret);
         }
  
         regs = devm_ioremap_resource(kdev->dev, &res);
         if (IS_ERR(regs))
-               dev_err(dev, "Failed to map register base for index(%d) node(%s)\n",
-                       index, node->name);
+               dev_err(dev, "Failed to map register base for index(%d) node(%pOFn)\n",
+                       index, node);
         if (_size)
                 *_size = resource_size(&res);
  
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c

index 6755f2a..b5d5673 100644 (file)
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -1382,15 +1382,15 @@ static void __iomem *knav_queue_map_reg(struct knav_device *kdev,
  
         ret = of_address_to_resource(node, index, &res);
         if (ret) {
-               dev_err(kdev->dev, "Can't translate of node(%s) address for index(%d)\n",
-                       node->name, index);
+               dev_err(kdev->dev, "Can't translate of node(%pOFn) address for index(%d)\n",
+                       node, index);
                 return ERR_PTR(ret);
         }
  
         regs = devm_ioremap_resource(kdev->dev, &res);
         if (IS_ERR(regs))
-               dev_err(kdev->dev, "Failed to map register base for index(%d) node(%s)\n",
-                       index, node->name);
+               dev_err(kdev->dev, "Failed to map register base for index(%d) node(%pOFn)\n",
+                       index, node);
         return regs;
  }
  
diff --git a/drivers/tc/tc.c b/drivers/tc/tc.c

index 3be9519..cf3fad2 100644 (file)
--- a/drivers/tc/tc.c
+++ b/drivers/tc/tc.c
@@ -2,7 +2,7 @@
   *     TURBOchannel bus services.
   *
   *     Copyright (c) Harald Koerfgen, 1998
- *     Copyright (c) 2001, 2003, 2005, 2006  Maciej W. Rozycki
+ *     Copyright (c) 2001, 2003, 2005, 2006, 2018  Maciej W. Rozycki
   *     Copyright (c) 2005  James Simmons
   *
   *     This file is subject to the terms and conditions of the GNU
@@ -10,6 +10,7 @@
   *     directory of this archive for more details.
   */
  #include <linux/compiler.h>
+#include <linux/dma-mapping.h>
  #include <linux/errno.h>
  #include <linux/init.h>
  #include <linux/ioport.h>
@@ -92,6 +93,11 @@ static void __init tc_bus_add_devices(struct tc_bus *tbus)
                 tdev->dev.bus = &tc_bus_type;
                 tdev->slot = slot;
  
+               /* TURBOchannel has 34-bit DMA addressing (16GiB space). */
+               tdev->dma_mask = DMA_BIT_MASK(34);
+               tdev->dev.dma_mask = &tdev->dma_mask;
+               tdev->dev.coherent_dma_mask = DMA_BIT_MASK(34);
+
                 for (i = 0; i < 8; i++) {
                         tdev->firmware[i] =
                                 readb(module + offset + TC_FIRM_VER + 4 * i);
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig

index 0e69edc..5422523 100644 (file)
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -432,7 +432,7 @@ source "drivers/thermal/samsung/Kconfig"
  endmenu
  
  menu "STMicroelectronics thermal drivers"
-depends on ARCH_STI && OF
+depends on (ARCH_STI || ARCH_STM32) && OF
  source "drivers/thermal/st/Kconfig"
  endmenu
  
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile

index 610344e..82bb50d 100644 (file)
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_TI_SOC_THERMAL)  += ti-soc-thermal/
  obj-$(CONFIG_INT340X_THERMAL)  += int340x_thermal/
  obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o
  obj-$(CONFIG_INTEL_PCH_THERMAL)        += intel_pch_thermal.o
-obj-$(CONFIG_ST_THERMAL)       += st/
+obj-y                          += st/
  obj-$(CONFIG_QCOM_TSENS)       += qcom/
  obj-y                          += tegra/
  obj-$(CONFIG_HISI_THERMAL)     += hisi_thermal.o
diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c

index 2c2f6d9..92f67d4 100644 (file)
--- a/drivers/thermal/armada_thermal.c
+++ b/drivers/thermal/armada_thermal.c
@@ -526,8 +526,8 @@ static int armada_thermal_probe_legacy(struct platform_device *pdev,
  
         /* First memory region points towards the status register */
         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (IS_ERR(res))
-               return PTR_ERR(res);
+       if (!res)
+               return -EIO;
  
         /*
          * Edit the resource start address and length to map over all the
diff --git a/drivers/thermal/da9062-thermal.c b/drivers/thermal/da9062-thermal.c

index dd8dd94..01b0cb9 100644 (file)
--- a/drivers/thermal/da9062-thermal.c
+++ b/drivers/thermal/da9062-thermal.c
@@ -106,7 +106,7 @@ static void da9062_thermal_poll_on(struct work_struct *work)
                                            THERMAL_EVENT_UNSPECIFIED);
  
                 delay = msecs_to_jiffies(thermal->zone->passive_delay);
-               schedule_delayed_work(&thermal->work, delay);
+               queue_delayed_work(system_freezable_wq, &thermal->work, delay);
                 return;
         }
  
@@ -125,7 +125,7 @@ static irqreturn_t da9062_thermal_irq_handler(int irq, void *data)
         struct da9062_thermal *thermal = data;
  
         disable_irq_nosync(thermal->irq);
-       schedule_delayed_work(&thermal->work, 0);
+       queue_delayed_work(system_freezable_wq, &thermal->work, 0);
  
         return IRQ_HANDLED;
  }
diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c

index 761d055..c4111a9 100644 (file)
--- a/drivers/thermal/hisi_thermal.c
+++ b/drivers/thermal/hisi_thermal.c
@@ -55,25 +55,39 @@
  #define HI3660_TEMP_STEP               (205)
  #define HI3660_TEMP_LAG                        (4000)
  
-#define HI6220_DEFAULT_SENSOR          2
-#define HI3660_DEFAULT_SENSOR          1
+#define HI6220_CLUSTER0_SENSOR         2
+#define HI6220_CLUSTER1_SENSOR         1
+
+#define HI3660_LITTLE_SENSOR           0
+#define HI3660_BIG_SENSOR              1
+#define HI3660_G3D_SENSOR              2
+#define HI3660_MODEM_SENSOR            3
+
+struct hisi_thermal_data;
  
  struct hisi_thermal_sensor {
+       struct hisi_thermal_data *data;
         struct thermal_zone_device *tzd;
+       const char *irq_name;
         uint32_t id;
         uint32_t thres_temp;
  };
  
+struct hisi_thermal_ops {
+       int (*get_temp)(struct hisi_thermal_sensor *sensor);
+       int (*enable_sensor)(struct hisi_thermal_sensor *sensor);
+       int (*disable_sensor)(struct hisi_thermal_sensor *sensor);
+       int (*irq_handler)(struct hisi_thermal_sensor *sensor);
+       int (*probe)(struct hisi_thermal_data *data);
+};
+
  struct hisi_thermal_data {
-       int (*get_temp)(struct hisi_thermal_data *data);
-       int (*enable_sensor)(struct hisi_thermal_data *data);
-       int (*disable_sensor)(struct hisi_thermal_data *data);
-       int (*irq_handler)(struct hisi_thermal_data *data);
+       const struct hisi_thermal_ops *ops;
+       struct hisi_thermal_sensor *sensor;
         struct platform_device *pdev;
         struct clk *clk;
-       struct hisi_thermal_sensor sensor;
         void __iomem *regs;
-       int irq;
+       int nr_sensors;
  };
  
  /*
@@ -266,30 +280,40 @@ static inline void hi6220_thermal_hdak_set(void __iomem *addr, int value)
                (value << 4), addr + HI6220_TEMP0_CFG);
  }
  
-static int hi6220_thermal_irq_handler(struct hisi_thermal_data *data)
+static int hi6220_thermal_irq_handler(struct hisi_thermal_sensor *sensor)
  {
+       struct hisi_thermal_data *data = sensor->data;
+
         hi6220_thermal_alarm_clear(data->regs, 1);
         return 0;
  }
  
-static int hi3660_thermal_irq_handler(struct hisi_thermal_data *data)
+static int hi3660_thermal_irq_handler(struct hisi_thermal_sensor *sensor)
  {
-       hi3660_thermal_alarm_clear(data->regs, data->sensor.id, 1);
+       struct hisi_thermal_data *data = sensor->data;
+
+       hi3660_thermal_alarm_clear(data->regs, sensor->id, 1);
         return 0;
  }
  
-static int hi6220_thermal_get_temp(struct hisi_thermal_data *data)
+static int hi6220_thermal_get_temp(struct hisi_thermal_sensor *sensor)
  {
+       struct hisi_thermal_data *data = sensor->data;
+
         return hi6220_thermal_get_temperature(data->regs);
  }
  
-static int hi3660_thermal_get_temp(struct hisi_thermal_data *data)
+static int hi3660_thermal_get_temp(struct hisi_thermal_sensor *sensor)
  {
-       return hi3660_thermal_get_temperature(data->regs, data->sensor.id);
+       struct hisi_thermal_data *data = sensor->data;
+
+       return hi3660_thermal_get_temperature(data->regs, sensor->id);
  }
  
-static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data)
+static int hi6220_thermal_disable_sensor(struct hisi_thermal_sensor *sensor)
  {
+       struct hisi_thermal_data *data = sensor->data;
+
         /* disable sensor module */
         hi6220_thermal_enable(data->regs, 0);
         hi6220_thermal_alarm_enable(data->regs, 0);
@@ -300,16 +324,18 @@ static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data)
         return 0;
  }
  
-static int hi3660_thermal_disable_sensor(struct hisi_thermal_data *data)
+static int hi3660_thermal_disable_sensor(struct hisi_thermal_sensor *sensor)
  {
+       struct hisi_thermal_data *data = sensor->data;
+
         /* disable sensor module */
-       hi3660_thermal_alarm_enable(data->regs, data->sensor.id, 0);
+       hi3660_thermal_alarm_enable(data->regs, sensor->id, 0);
         return 0;
  }
  
-static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data)
+static int hi6220_thermal_enable_sensor(struct hisi_thermal_sensor *sensor)
  {
-       struct hisi_thermal_sensor *sensor = &data->sensor;
+       struct hisi_thermal_data *data = sensor->data;
         int ret;
  
         /* enable clock for tsensor */
@@ -345,10 +371,10 @@ static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data)
         return 0;
  }
  
-static int hi3660_thermal_enable_sensor(struct hisi_thermal_data *data)
+static int hi3660_thermal_enable_sensor(struct hisi_thermal_sensor *sensor)
  {
         unsigned int value;
-       struct hisi_thermal_sensor *sensor = &data->sensor;
+       struct hisi_thermal_data *data = sensor->data;
  
         /* disable interrupt */
         hi3660_thermal_alarm_enable(data->regs, sensor->id, 0);
@@ -371,21 +397,8 @@ static int hi6220_thermal_probe(struct hisi_thermal_data *data)
  {
         struct platform_device *pdev = data->pdev;
         struct device *dev = &pdev->dev;
-       struct resource *res;
         int ret;
  
-       data->get_temp = hi6220_thermal_get_temp;
-       data->enable_sensor = hi6220_thermal_enable_sensor;
-       data->disable_sensor = hi6220_thermal_disable_sensor;
-       data->irq_handler = hi6220_thermal_irq_handler;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       data->regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(data->regs)) {
-               dev_err(dev, "failed to get io address\n");
-               return PTR_ERR(data->regs);
-       }
-
         data->clk = devm_clk_get(dev, "thermal_clk");
         if (IS_ERR(data->clk)) {
                 ret = PTR_ERR(data->clk);
@@ -394,11 +407,14 @@ static int hi6220_thermal_probe(struct hisi_thermal_data *data)
                 return ret;
         }
  
-       data->irq = platform_get_irq(pdev, 0);
-       if (data->irq < 0)
-               return data->irq;
+       data->sensor = devm_kzalloc(dev, sizeof(*data->sensor), GFP_KERNEL);
+       if (!data->sensor)
+               return -ENOMEM;
  
-       data->sensor.id = HI6220_DEFAULT_SENSOR;
+       data->sensor[0].id = HI6220_CLUSTER0_SENSOR;
+       data->sensor[0].irq_name = "tsensor_intr";
+       data->sensor[0].data = data;
+       data->nr_sensors = 1;
  
         return 0;
  }
@@ -407,38 +423,34 @@ static int hi3660_thermal_probe(struct hisi_thermal_data *data)
  {
         struct platform_device *pdev = data->pdev;
         struct device *dev = &pdev->dev;
-       struct resource *res;
  
-       data->get_temp = hi3660_thermal_get_temp;
-       data->enable_sensor = hi3660_thermal_enable_sensor;
-       data->disable_sensor = hi3660_thermal_disable_sensor;
-       data->irq_handler = hi3660_thermal_irq_handler;
+       data->nr_sensors = 2;
  
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       data->regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(data->regs)) {
-               dev_err(dev, "failed to get io address\n");
-               return PTR_ERR(data->regs);
-       }
+       data->sensor = devm_kzalloc(dev, sizeof(*data->sensor) *
+                                   data->nr_sensors, GFP_KERNEL);
+       if (!data->sensor)
+               return -ENOMEM;
  
-       data->irq = platform_get_irq(pdev, 0);
-       if (data->irq < 0)
-               return data->irq;
+       data->sensor[0].id = HI3660_BIG_SENSOR;
+       data->sensor[0].irq_name = "tsensor_a73";
+       data->sensor[0].data = data;
  
-       data->sensor.id = HI3660_DEFAULT_SENSOR;
+       data->sensor[1].id = HI3660_LITTLE_SENSOR;
+       data->sensor[1].irq_name = "tsensor_a53";
+       data->sensor[1].data = data;
  
         return 0;
  }
  
  static int hisi_thermal_get_temp(void *__data, int *temp)
  {
-       struct hisi_thermal_data *data = __data;
-       struct hisi_thermal_sensor *sensor = &data->sensor;
+       struct hisi_thermal_sensor *sensor = __data;
+       struct hisi_thermal_data *data = sensor->data;
  
-       *temp = data->get_temp(data);
+       *temp = data->ops->get_temp(sensor);
  
-       dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n",
-               sensor->id, *temp, sensor->thres_temp);
+       dev_dbg(&data->pdev->dev, "tzd=%p, id=%d, temp=%d, thres=%d\n",
+               sensor->tzd, sensor->id, *temp, sensor->thres_temp);
  
         return 0;
  }
@@ -449,38 +461,39 @@ static const struct thermal_zone_of_device_ops hisi_of_thermal_ops = {
  
  static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)
  {
-       struct hisi_thermal_data *data = dev;
-       struct hisi_thermal_sensor *sensor = &data->sensor;
+       struct hisi_thermal_sensor *sensor = dev;
+       struct hisi_thermal_data *data = sensor->data;
         int temp = 0;
  
-       data->irq_handler(data);
+       data->ops->irq_handler(sensor);
  
-       hisi_thermal_get_temp(data, &temp);
+       hisi_thermal_get_temp(sensor, &temp);
  
         if (temp >= sensor->thres_temp) {
-               dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n",
-                        temp, sensor->thres_temp);
+               dev_crit(&data->pdev->dev,
+                        "sensor <%d> THERMAL ALARM: %d > %d\n",
+                        sensor->id, temp, sensor->thres_temp);
  
-               thermal_zone_device_update(data->sensor.tzd,
+               thermal_zone_device_update(sensor->tzd,
                                            THERMAL_EVENT_UNSPECIFIED);
  
         } else {
-               dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n",
-                        temp, sensor->thres_temp);
+               dev_crit(&data->pdev->dev,
+                        "sensor <%d> THERMAL ALARM stopped: %d < %d\n",
+                        sensor->id, temp, sensor->thres_temp);
         }
  
         return IRQ_HANDLED;
  }
  
  static int hisi_thermal_register_sensor(struct platform_device *pdev,
-                                       struct hisi_thermal_data *data,
                                         struct hisi_thermal_sensor *sensor)
  {
         int ret, i;
         const struct thermal_trip *trip;
  
         sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev,
-                                                          sensor->id, data,
+                                                          sensor->id, sensor,
                                                            &hisi_of_thermal_ops);
         if (IS_ERR(sensor->tzd)) {
                 ret = PTR_ERR(sensor->tzd);
@@ -502,14 +515,30 @@ static int hisi_thermal_register_sensor(struct platform_device *pdev,
         return 0;
  }
  
+static const struct hisi_thermal_ops hi6220_ops = {
+       .get_temp       = hi6220_thermal_get_temp,
+       .enable_sensor  = hi6220_thermal_enable_sensor,
+       .disable_sensor = hi6220_thermal_disable_sensor,
+       .irq_handler    = hi6220_thermal_irq_handler,
+       .probe          = hi6220_thermal_probe,
+};
+
+static const struct hisi_thermal_ops hi3660_ops = {
+       .get_temp       = hi3660_thermal_get_temp,
+       .enable_sensor  = hi3660_thermal_enable_sensor,
+       .disable_sensor = hi3660_thermal_disable_sensor,
+       .irq_handler    = hi3660_thermal_irq_handler,
+       .probe          = hi3660_thermal_probe,
+};
+
  static const struct of_device_id of_hisi_thermal_match[] = {
         {
                 .compatible = "hisilicon,tsensor",
-               .data = hi6220_thermal_probe
+               .data = &hi6220_ops,
         },
         {
                 .compatible = "hisilicon,hi3660-tsensor",
-               .data = hi3660_thermal_probe
+               .data = &hi3660_ops,
         },
         { /* end */ }
  };
@@ -527,9 +556,9 @@ static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor,
  static int hisi_thermal_probe(struct platform_device *pdev)
  {
         struct hisi_thermal_data *data;
-       int (*platform_probe)(struct hisi_thermal_data *);
         struct device *dev = &pdev->dev;
-       int ret;
+       struct resource *res;
+       int i, ret;
  
         data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
         if (!data)
@@ -537,41 +566,50 @@ static int hisi_thermal_probe(struct platform_device *pdev)
  
         data->pdev = pdev;
         platform_set_drvdata(pdev, data);
+       data->ops = of_device_get_match_data(dev);
  
-       platform_probe = of_device_get_match_data(dev);
-       if (!platform_probe) {
-               dev_err(dev, "failed to get probe func\n");
-               return -EINVAL;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       data->regs = devm_ioremap_resource(dev, res);
+       if (IS_ERR(data->regs)) {
+               dev_err(dev, "failed to get io address\n");
+               return PTR_ERR(data->regs);
         }
  
-       ret = platform_probe(data);
+       ret = data->ops->probe(data);
         if (ret)
                 return ret;
  
-       ret = hisi_thermal_register_sensor(pdev, data,
-                                          &data->sensor);
-       if (ret) {
-               dev_err(dev, "failed to register thermal sensor: %d\n", ret);
-               return ret;
-       }
+       for (i = 0; i < data->nr_sensors; i++) {
+               struct hisi_thermal_sensor *sensor = &data->sensor[i];
  
-       ret = data->enable_sensor(data);
-       if (ret) {
-               dev_err(dev, "Failed to setup the sensor: %d\n", ret);
-               return ret;
-       }
+               ret = hisi_thermal_register_sensor(pdev, sensor);
+               if (ret) {
+                       dev_err(dev, "failed to register thermal sensor: %d\n",
+                               ret);
+                       return ret;
+               }
+
+               ret = platform_get_irq_byname(pdev, sensor->irq_name);
+               if (ret < 0)
+                       return ret;
  
-       if (data->irq) {
-               ret = devm_request_threaded_irq(dev, data->irq, NULL,
-                               hisi_thermal_alarm_irq_thread,
-                               IRQF_ONESHOT, "hisi_thermal", data);
+               ret = devm_request_threaded_irq(dev, ret, NULL,
+                                               hisi_thermal_alarm_irq_thread,
+                                               IRQF_ONESHOT, sensor->irq_name,
+                                               sensor);
                 if (ret < 0) {
-                       dev_err(dev, "failed to request alarm irq: %d\n", ret);
+                       dev_err(dev, "Failed to request alarm irq: %d\n", ret);
                         return ret;
                 }
-       }
  
-       hisi_thermal_toggle_sensor(&data->sensor, true);
+               ret = data->ops->enable_sensor(sensor);
+               if (ret) {
+                       dev_err(dev, "Failed to setup the sensor: %d\n", ret);
+                       return ret;
+               }
+
+               hisi_thermal_toggle_sensor(sensor, true);
+       }
  
         return 0;
  }
@@ -579,11 +617,14 @@ static int hisi_thermal_probe(struct platform_device *pdev)
  static int hisi_thermal_remove(struct platform_device *pdev)
  {
         struct hisi_thermal_data *data = platform_get_drvdata(pdev);
-       struct hisi_thermal_sensor *sensor = &data->sensor;
+       int i;
  
-       hisi_thermal_toggle_sensor(sensor, false);
+       for (i = 0; i < data->nr_sensors; i++) {
+               struct hisi_thermal_sensor *sensor = &data->sensor[i];
  
-       data->disable_sensor(data);
+               hisi_thermal_toggle_sensor(sensor, false);
+               data->ops->disable_sensor(sensor);
+       }
  
         return 0;
  }
@@ -592,8 +633,10 @@ static int hisi_thermal_remove(struct platform_device *pdev)
  static int hisi_thermal_suspend(struct device *dev)
  {
         struct hisi_thermal_data *data = dev_get_drvdata(dev);
+       int i;
  
-       data->disable_sensor(data);
+       for (i = 0; i < data->nr_sensors; i++)
+               data->ops->disable_sensor(&data->sensor[i]);
  
         return 0;
  }
@@ -601,8 +644,12 @@ static int hisi_thermal_suspend(struct device *dev)
  static int hisi_thermal_resume(struct device *dev)
  {
         struct hisi_thermal_data *data = dev_get_drvdata(dev);
+       int i, ret = 0;
+
+       for (i = 0; i < data->nr_sensors; i++)
+               ret |= data->ops->enable_sensor(&data->sensor[i]);
  
-       return data->enable_sensor(data);
+       return ret;
  }
  #endif
  
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c

index aa452ac..1566154 100644 (file)
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -725,7 +725,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
         } else {
                 ret = imx_init_from_tempmon_data(pdev);
                 if (ret) {
-                       dev_err(&pdev->dev, "failed to init from from fsl,tempmon-data\n");
+                       dev_err(&pdev->dev, "failed to init from fsl,tempmon-data\n");
                         return ret;
                 }
         }
@@ -762,9 +762,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
                 if (ret != -EPROBE_DEFER)
                         dev_err(&pdev->dev,
                                 "failed to get thermal clk: %d\n", ret);
-               cpufreq_cooling_unregister(data->cdev);
-               cpufreq_cpu_put(data->policy);
-               return ret;
+               goto cpufreq_put;
         }
  
         /*
@@ -777,9 +775,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
         ret = clk_prepare_enable(data->thermal_clk);
         if (ret) {
                 dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret);
-               cpufreq_cooling_unregister(data->cdev);
-               cpufreq_cpu_put(data->policy);
-               return ret;
+               goto cpufreq_put;
         }
  
         data->tz = thermal_zone_device_register("imx_thermal_zone",
@@ -792,10 +788,7 @@ static int imx_thermal_probe(struct platform_device *pdev)
                 ret = PTR_ERR(data->tz);
                 dev_err(&pdev->dev,
                         "failed to register thermal zone device %d\n", ret);
-               clk_disable_unprepare(data->thermal_clk);
-               cpufreq_cooling_unregister(data->cdev);
-               cpufreq_cpu_put(data->policy);
-               return ret;
+               goto clk_disable;
         }
  
         dev_info(&pdev->dev, "%s CPU temperature grade - max:%dC"
@@ -827,14 +820,20 @@ static int imx_thermal_probe(struct platform_device *pdev)
                         0, "imx_thermal", data);
         if (ret < 0) {
                 dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
-               clk_disable_unprepare(data->thermal_clk);
-               thermal_zone_device_unregister(data->tz);
-               cpufreq_cooling_unregister(data->cdev);
-               cpufreq_cpu_put(data->policy);
-               return ret;
+               goto thermal_zone_unregister;
         }
  
         return 0;
+
+thermal_zone_unregister:
+       thermal_zone_device_unregister(data->tz);
+clk_disable:
+       clk_disable_unprepare(data->thermal_clk);
+cpufreq_put:
+       cpufreq_cooling_unregister(data->cdev);
+       cpufreq_cpu_put(data->policy);
+
+       return ret;
  }
  
  static int imx_thermal_remove(struct platform_device *pdev)
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c

index 4f28165..4bfdb4a 100644 (file)
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -19,22 +19,33 @@
  /***   Private data structures to represent thermal device tree data ***/
  
  /**
- * struct __thermal_bind_param - a match between trip and cooling device
+ * struct __thermal_cooling_bind_param - a cooling device for a trip point
   * @cooling_device: a pointer to identify the referred cooling device
- * @trip_id: the trip point index
- * @usage: the percentage (from 0 to 100) of cooling contribution
   * @min: minimum cooling state used at this trip point
   * @max: maximum cooling state used at this trip point
   */
  
-struct __thermal_bind_params {
+struct __thermal_cooling_bind_param {
         struct device_node *cooling_device;
-       unsigned int trip_id;
-       unsigned int usage;
         unsigned long min;
         unsigned long max;
  };
  
+/**
+ * struct __thermal_bind_param - a match between trip and cooling device
+ * @tcbp: a pointer to an array of cooling devices
+ * @count: number of elements in array
+ * @trip_id: the trip point index
+ * @usage: the percentage (from 0 to 100) of cooling contribution
+ */
+
+struct __thermal_bind_params {
+       struct __thermal_cooling_bind_param *tcbp;
+       unsigned int count;
+       unsigned int trip_id;
+       unsigned int usage;
+};
+
  /**
   * struct __thermal_zone - internal representation of a thermal zone
   * @mode: current thermal zone device mode (enabled/disabled)
@@ -192,25 +203,31 @@ static int of_thermal_bind(struct thermal_zone_device *thermal,
                            struct thermal_cooling_device *cdev)
  {
         struct __thermal_zone *data = thermal->devdata;
-       int i;
+       struct __thermal_bind_params *tbp;
+       struct __thermal_cooling_bind_param *tcbp;
+       int i, j;
  
         if (!data || IS_ERR(data))
                 return -ENODEV;
  
         /* find where to bind */
         for (i = 0; i < data->num_tbps; i++) {
-               struct __thermal_bind_params *tbp = data->tbps + i;
+               tbp = data->tbps + i;
  
-               if (tbp->cooling_device == cdev->np) {
-                       int ret;
+               for (j = 0; j < tbp->count; j++) {
+                       tcbp = tbp->tcbp + j;
  
-                       ret = thermal_zone_bind_cooling_device(thermal,
+                       if (tcbp->cooling_device == cdev->np) {
+                               int ret;
+
+                               ret = thermal_zone_bind_cooling_device(thermal,
                                                 tbp->trip_id, cdev,
-                                               tbp->max,
-                                               tbp->min,
+                                               tcbp->max,
+                                               tcbp->min,
                                                 tbp->usage);
-                       if (ret)
-                               return ret;
+                               if (ret)
+                                       return ret;
+                       }
                 }
         }
  
@@ -221,22 +238,28 @@ static int of_thermal_unbind(struct thermal_zone_device *thermal,
                              struct thermal_cooling_device *cdev)
  {
         struct __thermal_zone *data = thermal->devdata;
-       int i;
+       struct __thermal_bind_params *tbp;
+       struct __thermal_cooling_bind_param *tcbp;
+       int i, j;
  
         if (!data || IS_ERR(data))
                 return -ENODEV;
  
         /* find where to unbind */
         for (i = 0; i < data->num_tbps; i++) {
-               struct __thermal_bind_params *tbp = data->tbps + i;
+               tbp = data->tbps + i;
+
+               for (j = 0; j < tbp->count; j++) {
+                       tcbp = tbp->tcbp + j;
  
-               if (tbp->cooling_device == cdev->np) {
-                       int ret;
+                       if (tcbp->cooling_device == cdev->np) {
+                               int ret;
  
-                       ret = thermal_zone_unbind_cooling_device(thermal,
-                                               tbp->trip_id, cdev);
-                       if (ret)
-                               return ret;
+                               ret = thermal_zone_unbind_cooling_device(thermal,
+                                                       tbp->trip_id, cdev);
+                               if (ret)
+                                       return ret;
+                       }
                 }
         }
  
@@ -486,8 +509,8 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data,
                 if (sensor_specs.args_count >= 1) {
                         id = sensor_specs.args[0];
                         WARN(sensor_specs.args_count > 1,
-                            "%s: too many cells in sensor specifier %d\n",
-                            sensor_specs.np->name, sensor_specs.args_count);
+                            "%pOFn: too many cells in sensor specifier %d\n",
+                            sensor_specs.np, sensor_specs.args_count);
                 } else {
                         id = 0;
                 }
@@ -655,8 +678,9 @@ static int thermal_of_populate_bind_params(struct device_node *np,
                                            int ntrips)
  {
         struct of_phandle_args cooling_spec;
+       struct __thermal_cooling_bind_param *__tcbp;
         struct device_node *trip;
-       int ret, i;
+       int ret, i, count;
         u32 prop;
  
         /* Default weight. Usage is optional */
@@ -683,20 +707,44 @@ static int thermal_of_populate_bind_params(struct device_node *np,
                 goto end;
         }
  
-       ret = of_parse_phandle_with_args(np, "cooling-device", "#cooling-cells",
-                                        0, &cooling_spec);
-       if (ret < 0) {
-               pr_err("missing cooling_device property\n");
+       count = of_count_phandle_with_args(np, "cooling-device",
+                                          "#cooling-cells");
+       if (!count) {
+               pr_err("Add a cooling_device property with at least one device\n");
                 goto end;
         }
-       __tbp->cooling_device = cooling_spec.np;
-       if (cooling_spec.args_count >= 2) { /* at least min and max */
-               __tbp->min = cooling_spec.args[0];
-               __tbp->max = cooling_spec.args[1];
-       } else {
-               pr_err("wrong reference to cooling device, missing limits\n");
+
+       __tcbp = kcalloc(count, sizeof(*__tcbp), GFP_KERNEL);
+       if (!__tcbp)
+               goto end;
+
+       for (i = 0; i < count; i++) {
+               ret = of_parse_phandle_with_args(np, "cooling-device",
+                               "#cooling-cells", i, &cooling_spec);
+               if (ret < 0) {
+                       pr_err("Invalid cooling-device entry\n");
+                       goto free_tcbp;
+               }
+
+               __tcbp[i].cooling_device = cooling_spec.np;
+
+               if (cooling_spec.args_count >= 2) { /* at least min and max */
+                       __tcbp[i].min = cooling_spec.args[0];
+                       __tcbp[i].max = cooling_spec.args[1];
+               } else {
+                       pr_err("wrong reference to cooling device, missing limits\n");
+               }
         }
  
+       __tbp->tcbp = __tcbp;
+       __tbp->count = count;
+
+       goto end;
+
+free_tcbp:
+       for (i = i - 1; i >= 0; i--)
+               of_node_put(__tcbp[i].cooling_device);
+       kfree(__tcbp);
  end:
         of_node_put(trip);
  
@@ -903,8 +951,16 @@ finish:
         return tz;
  
  free_tbps:
-       for (i = i - 1; i >= 0; i--)
-               of_node_put(tz->tbps[i].cooling_device);
+       for (i = i - 1; i >= 0; i--) {
+               struct __thermal_bind_params *tbp = tz->tbps + i;
+               int j;
+
+               for (j = 0; j < tbp->count; j++)
+                       of_node_put(tbp->tcbp[j].cooling_device);
+
+               kfree(tbp->tcbp);
+       }
+
         kfree(tz->tbps);
  free_trips:
         for (i = 0; i < tz->ntrips; i++)
@@ -920,10 +976,18 @@ free_tz:
  
  static inline void of_thermal_free_zone(struct __thermal_zone *tz)
  {
-       int i;
+       struct __thermal_bind_params *tbp;
+       int i, j;
+
+       for (i = 0; i < tz->num_tbps; i++) {
+               tbp = tz->tbps + i;
+
+               for (j = 0; j < tbp->count; j++)
+                       of_node_put(tbp->tcbp[j].cooling_device);
+
+               kfree(tbp->tcbp);
+       }
  
-       for (i = 0; i < tz->num_tbps; i++)
-               of_node_put(tz->tbps[i].cooling_device);
         kfree(tz->tbps);
         for (i = 0; i < tz->ntrips; i++)
                 of_node_put(tz->trips[i].np);
@@ -963,8 +1027,8 @@ int __init of_parse_thermal_zones(void)
  
                 tz = thermal_of_build_thermal_zone(child);
                 if (IS_ERR(tz)) {
-                       pr_err("failed to build thermal zone %s: %ld\n",
-                              child->name,
+                       pr_err("failed to build thermal zone %pOFn: %ld\n",
+                              child,
                                PTR_ERR(tz));
                         continue;
                 }
@@ -998,7 +1062,7 @@ int __init of_parse_thermal_zones(void)
                                                     tz->passive_delay,
                                                     tz->polling_delay);
                 if (IS_ERR(zone)) {
-                       pr_err("Failed to build %s zone %ld\n", child->name,
+                       pr_err("Failed to build %pOFn zone %ld\n", child,
                                PTR_ERR(zone));
                         kfree(tzp);
                         kfree(ops);
diff --git a/drivers/thermal/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom-spmi-temp-alarm.c

index ad4f3a8..b2d5d5b 100644 (file)
--- a/drivers/thermal/qcom-spmi-temp-alarm.c
+++ b/drivers/thermal/qcom-spmi-temp-alarm.c
@@ -23,6 +23,8 @@
  #include <linux/regmap.h>
  #include <linux/thermal.h>
  
+#include "thermal_core.h"
+
  #define QPNP_TM_REG_TYPE               0x04
  #define QPNP_TM_REG_SUBTYPE            0x05
  #define QPNP_TM_REG_STATUS             0x08
@@ -37,9 +39,11 @@
  #define STATUS_GEN2_STATE_MASK         GENMASK(6, 4)
  #define STATUS_GEN2_STATE_SHIFT                4
  
-#define SHUTDOWN_CTRL1_OVERRIDE_MASK   GENMASK(7, 6)
+#define SHUTDOWN_CTRL1_OVERRIDE_S2     BIT(6)
  #define SHUTDOWN_CTRL1_THRESHOLD_MASK  GENMASK(1, 0)
  
+#define SHUTDOWN_CTRL1_RATE_25HZ       BIT(3)
+
  #define ALARM_CTRL_FORCE_ENABLE                BIT(7)
  
  /*
@@ -56,12 +60,19 @@
  #define TEMP_THRESH_STEP               5000    /* Threshold step: 5 C */
  
  #define THRESH_MIN                     0
+#define THRESH_MAX                     3
+
+/* Stage 2 Threshold Min: 125 C */
+#define STAGE2_THRESHOLD_MIN           125000
+/* Stage 2 Threshold Max: 140 C */
+#define STAGE2_THRESHOLD_MAX           140000
  
  /* Temperature in Milli Celsius reported during stage 0 if no ADC is present */
  #define DEFAULT_TEMP                   37000
  
  struct qpnp_tm_chip {
         struct regmap                   *map;
+       struct device                   *dev;
         struct thermal_zone_device      *tz_dev;
         unsigned int                    subtype;
         long                            temp;
@@ -69,6 +80,10 @@ struct qpnp_tm_chip {
         unsigned int                    stage;
         unsigned int                    prev_stage;
         unsigned int                    base;
+       /* protects .thresh, .stage and chip registers */
+       struct mutex                    lock;
+       bool                            initialized;
+
         struct iio_channel              *adc;
  };
  
@@ -125,6 +140,8 @@ static int qpnp_tm_update_temp_no_adc(struct qpnp_tm_chip *chip)
         unsigned int stage, stage_new, stage_old;
         int ret;
  
+       WARN_ON(!mutex_is_locked(&chip->lock));
+
         ret = qpnp_tm_get_temp_stage(chip);
         if (ret < 0)
                 return ret;
@@ -163,8 +180,15 @@ static int qpnp_tm_get_temp(void *data, int *temp)
         if (!temp)
                 return -EINVAL;
  
+       if (!chip->initialized) {
+               *temp = DEFAULT_TEMP;
+               return 0;
+       }
+
         if (!chip->adc) {
+               mutex_lock(&chip->lock);
                 ret = qpnp_tm_update_temp_no_adc(chip);
+               mutex_unlock(&chip->lock);
                 if (ret < 0)
                         return ret;
         } else {
@@ -180,8 +204,72 @@ static int qpnp_tm_get_temp(void *data, int *temp)
         return 0;
  }
  
+static int qpnp_tm_update_critical_trip_temp(struct qpnp_tm_chip *chip,
+                                            int temp)
+{
+       u8 reg;
+       bool disable_s2_shutdown = false;
+
+       WARN_ON(!mutex_is_locked(&chip->lock));
+
+       /*
+        * Default: S2 and S3 shutdown enabled, thresholds at
+        * 105C/125C/145C, monitoring at 25Hz
+        */
+       reg = SHUTDOWN_CTRL1_RATE_25HZ;
+
+       if (temp == THERMAL_TEMP_INVALID ||
+           temp < STAGE2_THRESHOLD_MIN) {
+               chip->thresh = THRESH_MIN;
+               goto skip;
+       }
+
+       if (temp <= STAGE2_THRESHOLD_MAX) {
+               chip->thresh = THRESH_MAX -
+                       ((STAGE2_THRESHOLD_MAX - temp) /
+                        TEMP_THRESH_STEP);
+               disable_s2_shutdown = true;
+       } else {
+               chip->thresh = THRESH_MAX;
+
+               if (chip->adc)
+                       disable_s2_shutdown = true;
+               else
+                       dev_warn(chip->dev,
+                                "No ADC is configured and critical temperature is above the maximum stage 2 threshold of 140 C! Configuring stage 2 shutdown at 140 C.\n");
+       }
+
+skip:
+       reg |= chip->thresh;
+       if (disable_s2_shutdown)
+               reg |= SHUTDOWN_CTRL1_OVERRIDE_S2;
+
+       return qpnp_tm_write(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, reg);
+}
+
+static int qpnp_tm_set_trip_temp(void *data, int trip, int temp)
+{
+       struct qpnp_tm_chip *chip = data;
+       const struct thermal_trip *trip_points;
+       int ret;
+
+       trip_points = of_thermal_get_trip_points(chip->tz_dev);
+       if (!trip_points)
+               return -EINVAL;
+
+       if (trip_points[trip].type != THERMAL_TRIP_CRITICAL)
+               return 0;
+
+       mutex_lock(&chip->lock);
+       ret = qpnp_tm_update_critical_trip_temp(chip, temp);
+       mutex_unlock(&chip->lock);
+
+       return ret;
+}
+
  static const struct thermal_zone_of_device_ops qpnp_tm_sensor_ops = {
         .get_temp = qpnp_tm_get_temp,
+       .set_trip_temp = qpnp_tm_set_trip_temp,
  };
  
  static irqreturn_t qpnp_tm_isr(int irq, void *data)
@@ -193,6 +281,29 @@ static irqreturn_t qpnp_tm_isr(int irq, void *data)
         return IRQ_HANDLED;
  }
  
+static int qpnp_tm_get_critical_trip_temp(struct qpnp_tm_chip *chip)
+{
+       int ntrips;
+       const struct thermal_trip *trips;
+       int i;
+
+       ntrips = of_thermal_get_ntrips(chip->tz_dev);
+       if (ntrips <= 0)
+               return THERMAL_TEMP_INVALID;
+
+       trips = of_thermal_get_trip_points(chip->tz_dev);
+       if (!trips)
+               return THERMAL_TEMP_INVALID;
+
+       for (i = 0; i < ntrips; i++) {
+               if (of_thermal_is_trip_valid(chip->tz_dev, i) &&
+                   trips[i].type == THERMAL_TRIP_CRITICAL)
+                       return trips[i].temperature;
+       }
+
+       return THERMAL_TEMP_INVALID;
+}
+
  /*
   * This function initializes the internal temp value based on only the
   * current thermal stage and threshold. Setup threshold control and
@@ -203,17 +314,20 @@ static int qpnp_tm_init(struct qpnp_tm_chip *chip)
         unsigned int stage;
         int ret;
         u8 reg = 0;
+       int crit_temp;
+
+       mutex_lock(&chip->lock);
  
         ret = qpnp_tm_read(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, &reg);
         if (ret < 0)
-               return ret;
+               goto out;
  
         chip->thresh = reg & SHUTDOWN_CTRL1_THRESHOLD_MASK;
         chip->temp = DEFAULT_TEMP;
  
         ret = qpnp_tm_get_temp_stage(chip);
         if (ret < 0)
-               return ret;
+               goto out;
         chip->stage = ret;
  
         stage = chip->subtype == QPNP_TM_SUBTYPE_GEN1
@@ -224,21 +338,19 @@ static int qpnp_tm_init(struct qpnp_tm_chip *chip)
                              (stage - 1) * TEMP_STAGE_STEP +
                              TEMP_THRESH_MIN;
  
-       /*
-        * Set threshold and disable software override of stage 2 and 3
-        * shutdowns.
-        */
-       chip->thresh = THRESH_MIN;
-       reg &= ~(SHUTDOWN_CTRL1_OVERRIDE_MASK | SHUTDOWN_CTRL1_THRESHOLD_MASK);
-       reg |= chip->thresh & SHUTDOWN_CTRL1_THRESHOLD_MASK;
-       ret = qpnp_tm_write(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, reg);
+       crit_temp = qpnp_tm_get_critical_trip_temp(chip);
+       ret = qpnp_tm_update_critical_trip_temp(chip, crit_temp);
         if (ret < 0)
-               return ret;
+               goto out;
  
         /* Enable the thermal alarm PMIC module in always-on mode. */
         reg = ALARM_CTRL_FORCE_ENABLE;
         ret = qpnp_tm_write(chip, QPNP_TM_REG_ALARM_CTRL, reg);
  
+       chip->initialized = true;
+
+out:
+       mutex_unlock(&chip->lock);
         return ret;
  }
  
@@ -257,6 +369,9 @@ static int qpnp_tm_probe(struct platform_device *pdev)
                 return -ENOMEM;
  
         dev_set_drvdata(&pdev->dev, chip);
+       chip->dev = &pdev->dev;
+
+       mutex_init(&chip->lock);
  
         chip->map = dev_get_regmap(pdev->dev.parent, NULL);
         if (!chip->map)
@@ -302,6 +417,18 @@ static int qpnp_tm_probe(struct platform_device *pdev)
  
         chip->subtype = subtype;
  
+       /*
+        * Register the sensor before initializing the hardware to be able to
+        * read the trip points. get_temp() returns the default temperature
+        * before the hardware initialization is completed.
+        */
+       chip->tz_dev = devm_thermal_zone_of_sensor_register(
+               &pdev->dev, 0, chip, &qpnp_tm_sensor_ops);
+       if (IS_ERR(chip->tz_dev)) {
+               dev_err(&pdev->dev, "failed to register sensor\n");
+               return PTR_ERR(chip->tz_dev);
+       }
+
         ret = qpnp_tm_init(chip);
         if (ret < 0) {
                 dev_err(&pdev->dev, "init failed\n");
@@ -313,12 +440,7 @@ static int qpnp_tm_probe(struct platform_device *pdev)
         if (ret < 0)
                 return ret;
  
-       chip->tz_dev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, chip,
-                                                       &qpnp_tm_sensor_ops);
-       if (IS_ERR(chip->tz_dev)) {
-               dev_err(&pdev->dev, "failed to register sensor\n");
-               return PTR_ERR(chip->tz_dev);
-       }
+       thermal_zone_device_update(chip->tz_dev, THERMAL_EVENT_UNSPECIFIED);
  
         return 0;
  }
diff --git a/drivers/thermal/qcom/tsens-8916.c b/drivers/thermal/qcom/tsens-8916.c

index fdf561b..c6dd620 100644 (file)
--- a/drivers/thermal/qcom/tsens-8916.c
+++ b/drivers/thermal/qcom/tsens-8916.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
   */
  
  #include <linux/platform_device.h>
@@ -109,5 +100,6 @@ static const struct tsens_ops ops_8916 = {
  const struct tsens_data data_8916 = {
         .num_sensors    = 5,
         .ops            = &ops_8916,
+       .reg_offsets    = { [SROT_CTRL_OFFSET] = 0x0 },
         .hw_ids         = (unsigned int []){0, 1, 2, 4, 5 },
  };
diff --git a/drivers/thermal/qcom/tsens-8960.c b/drivers/thermal/qcom/tsens-8960.c

index 0451277..0f0adb3 100644 (file)
--- a/drivers/thermal/qcom/tsens-8960.c
+++ b/drivers/thermal/qcom/tsens-8960.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
   */
  
  #include <linux/platform_device.h>
@@ -69,7 +60,7 @@ static int suspend_8960(struct tsens_device *tmdev)
  {
         int ret;
         unsigned int mask;
-       struct regmap *map = tmdev->map;
+       struct regmap *map = tmdev->tm_map;
  
         ret = regmap_read(map, THRESHOLD_ADDR, &tmdev->ctx.threshold);
         if (ret)
@@ -94,7 +85,7 @@ static int suspend_8960(struct tsens_device *tmdev)
  static int resume_8960(struct tsens_device *tmdev)
  {
         int ret;
-       struct regmap *map = tmdev->map;
+       struct regmap *map = tmdev->tm_map;
  
         ret = regmap_update_bits(map, CNTL_ADDR, SW_RST, SW_RST);
         if (ret)
@@ -126,12 +117,12 @@ static int enable_8960(struct tsens_device *tmdev, int id)
         int ret;
         u32 reg, mask;
  
-       ret = regmap_read(tmdev->map, CNTL_ADDR, &reg);
+       ret = regmap_read(tmdev->tm_map, CNTL_ADDR, &reg);
         if (ret)
                 return ret;
  
         mask = BIT(id + SENSOR0_SHIFT);
-       ret = regmap_write(tmdev->map, CNTL_ADDR, reg | SW_RST);
+       ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg | SW_RST);
         if (ret)
                 return ret;
  
@@ -140,7 +131,7 @@ static int enable_8960(struct tsens_device *tmdev, int id)
         else
                 reg |= mask | SLP_CLK_ENA_8660 | EN;
  
-       ret = regmap_write(tmdev->map, CNTL_ADDR, reg);
+       ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg);
         if (ret)
                 return ret;
  
@@ -157,7 +148,7 @@ static void disable_8960(struct tsens_device *tmdev)
         mask <<= SENSOR0_SHIFT;
         mask |= EN;
  
-       ret = regmap_read(tmdev->map, CNTL_ADDR, &reg_cntl);
+       ret = regmap_read(tmdev->tm_map, CNTL_ADDR, &reg_cntl);
         if (ret)
                 return;
  
@@ -168,7 +159,7 @@ static void disable_8960(struct tsens_device *tmdev)
         else
                 reg_cntl &= ~SLP_CLK_ENA_8660;
  
-       regmap_write(tmdev->map, CNTL_ADDR, reg_cntl);
+       regmap_write(tmdev->tm_map, CNTL_ADDR, reg_cntl);
  }
  
  static int init_8960(struct tsens_device *tmdev)
@@ -176,8 +167,8 @@ static int init_8960(struct tsens_device *tmdev)
         int ret, i;
         u32 reg_cntl;
  
-       tmdev->map = dev_get_regmap(tmdev->dev, NULL);
-       if (!tmdev->map)
+       tmdev->tm_map = dev_get_regmap(tmdev->dev, NULL);
+       if (!tmdev->tm_map)
                 return -ENODEV;
  
         /*
@@ -193,14 +184,14 @@ static int init_8960(struct tsens_device *tmdev)
         }
  
         reg_cntl = SW_RST;
-       ret = regmap_update_bits(tmdev->map, CNTL_ADDR, SW_RST, reg_cntl);
+       ret = regmap_update_bits(tmdev->tm_map, CNTL_ADDR, SW_RST, reg_cntl);
         if (ret)
                 return ret;
  
         if (tmdev->num_sensors > 1) {
                 reg_cntl |= SLP_CLK_ENA | (MEASURE_PERIOD << 18);
                 reg_cntl &= ~SW_RST;
-               ret = regmap_update_bits(tmdev->map, CONFIG_ADDR,
+               ret = regmap_update_bits(tmdev->tm_map, CONFIG_ADDR,
                                          CONFIG_MASK, CONFIG);
         } else {
                 reg_cntl |= SLP_CLK_ENA_8660 | (MEASURE_PERIOD << 16);
@@ -209,12 +200,12 @@ static int init_8960(struct tsens_device *tmdev)
         }
  
         reg_cntl |= GENMASK(tmdev->num_sensors - 1, 0) << SENSOR0_SHIFT;
-       ret = regmap_write(tmdev->map, CNTL_ADDR, reg_cntl);
+       ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg_cntl);
         if (ret)
                 return ret;
  
         reg_cntl |= EN;
-       ret = regmap_write(tmdev->map, CNTL_ADDR, reg_cntl);
+       ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg_cntl);
         if (ret)
                 return ret;
  
@@ -261,12 +252,12 @@ static int get_temp_8960(struct tsens_device *tmdev, int id, int *temp)
  
         timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
         do {
-               ret = regmap_read(tmdev->map, INT_STATUS_ADDR, &trdy);
+               ret = regmap_read(tmdev->tm_map, INT_STATUS_ADDR, &trdy);
                 if (ret)
                         return ret;
                 if (!(trdy & TRDY_MASK))
                         continue;
-               ret = regmap_read(tmdev->map, s->status, &code);
+               ret = regmap_read(tmdev->tm_map, s->status, &code);
                 if (ret)
                         return ret;
                 *temp = code_to_mdegC(code, s);
diff --git a/drivers/thermal/qcom/tsens-8974.c b/drivers/thermal/qcom/tsens-8974.c

index 9baf77e..3d3fda3 100644 (file)
--- a/drivers/thermal/qcom/tsens-8974.c
+++ b/drivers/thermal/qcom/tsens-8974.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
   */
  
  #include <linux/platform_device.h>
@@ -241,4 +232,5 @@ static const struct tsens_ops ops_8974 = {
  const struct tsens_data data_8974 = {
         .num_sensors    = 11,
         .ops            = &ops_8974,
+       .reg_offsets    = { [SROT_CTRL_OFFSET] = 0x0 },
  };
diff --git a/drivers/thermal/qcom/tsens-common.c b/drivers/thermal/qcom/tsens-common.c

index 6207d8d..3be4be2 100644 (file)
--- a/drivers/thermal/qcom/tsens-common.c
+++ b/drivers/thermal/qcom/tsens-common.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
   */
  
  #include <linux/err.h>
@@ -21,7 +12,11 @@
  #include <linux/regmap.h>
  #include "tsens.h"
  
-#define S0_ST_ADDR             0x1030
+/* SROT */
+#define TSENS_EN               BIT(0)
+
+/* TM */
+#define STATUS_OFFSET          0x30
  #define SN_ADDR_OFFSET         0x4
  #define SN_ST_TEMP_MASK                0x3ff
  #define CAL_DEGC_PT1           30
@@ -107,8 +102,8 @@ int get_temp_common(struct tsens_device *tmdev, int id, int *temp)
         unsigned int status_reg;
         int last_temp = 0, ret;
  
-       status_reg = S0_ST_ADDR + s->hw_id * SN_ADDR_OFFSET;
-       ret = regmap_read(tmdev->map, status_reg, &code);
+       status_reg = tmdev->tm_offset + STATUS_OFFSET + s->hw_id * SN_ADDR_OFFSET;
+       ret = regmap_read(tmdev->tm_map, status_reg, &code);
         if (ret)
                 return ret;
         last_temp = code & SN_ST_TEMP_MASK;
@@ -126,29 +121,52 @@ static const struct regmap_config tsens_config = {
  
  int __init init_common(struct tsens_device *tmdev)
  {
-       void __iomem *base;
+       void __iomem *tm_base, *srot_base;
         struct resource *res;
+       u32 code;
+       int ret;
         struct platform_device *op = of_find_device_by_node(tmdev->dev->of_node);
+       u16 ctrl_offset = tmdev->reg_offsets[SROT_CTRL_OFFSET];
  
         if (!op)
                 return -EINVAL;
  
-       /* The driver only uses the TM register address space for now */
         if (op->num_resources > 1) {
+               /* DT with separate SROT and TM address space */
                 tmdev->tm_offset = 0;
+               res = platform_get_resource(op, IORESOURCE_MEM, 1);
+               srot_base = devm_ioremap_resource(&op->dev, res);
+               if (IS_ERR(srot_base))
+                       return PTR_ERR(srot_base);
+
+               tmdev->srot_map = devm_regmap_init_mmio(tmdev->dev,
+                                                       srot_base, &tsens_config);
+               if (IS_ERR(tmdev->srot_map))
+                       return PTR_ERR(tmdev->srot_map);
+
         } else {
                 /* old DTs where SROT and TM were in a contiguous 2K block */
                 tmdev->tm_offset = 0x1000;
         }
  
         res = platform_get_resource(op, IORESOURCE_MEM, 0);
-       base = devm_ioremap_resource(&op->dev, res);
-       if (IS_ERR(base))
-               return PTR_ERR(base);
-
-       tmdev->map = devm_regmap_init_mmio(tmdev->dev, base, &tsens_config);
-       if (IS_ERR(tmdev->map))
-               return PTR_ERR(tmdev->map);
+       tm_base = devm_ioremap_resource(&op->dev, res);
+       if (IS_ERR(tm_base))
+               return PTR_ERR(tm_base);
+
+       tmdev->tm_map = devm_regmap_init_mmio(tmdev->dev, tm_base, &tsens_config);
+       if (IS_ERR(tmdev->tm_map))
+               return PTR_ERR(tmdev->tm_map);
+
+       if (tmdev->srot_map) {
+               ret = regmap_read(tmdev->srot_map, ctrl_offset, &code);
+               if (ret)
+                       return ret;
+               if (!(code & TSENS_EN)) {
+                       dev_err(tmdev->dev, "tsens device is not enabled\n");
+                       return -ENODEV;
+               }
+       }
  
         return 0;
  }
diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c

index 44da02f..381a212 100644 (file)
--- a/drivers/thermal/qcom/tsens-v2.c
+++ b/drivers/thermal/qcom/tsens-v2.c
@@ -21,7 +21,7 @@ static int get_temp_tsens_v2(struct tsens_device *tmdev, int id, int *temp)
         int ret;
  
         status_reg = tmdev->tm_offset + STATUS_OFFSET + s->hw_id * 4;
-       ret = regmap_read(tmdev->map, status_reg, &code);
+       ret = regmap_read(tmdev->tm_map, status_reg, &code);
         if (ret)
                 return ret;
         last_temp = code & LAST_TEMP_MASK;
@@ -29,7 +29,7 @@ static int get_temp_tsens_v2(struct tsens_device *tmdev, int id, int *temp)
                 goto done;
  
         /* Try a second time */
-       ret = regmap_read(tmdev->map, status_reg, &code);
+       ret = regmap_read(tmdev->tm_map, status_reg, &code);
         if (ret)
                 return ret;
         if (code & STATUS_VALID_BIT) {
@@ -40,7 +40,7 @@ static int get_temp_tsens_v2(struct tsens_device *tmdev, int id, int *temp)
         }
  
         /* Try a third/last time */
-       ret = regmap_read(tmdev->map, status_reg, &code);
+       ret = regmap_read(tmdev->tm_map, status_reg, &code);
         if (ret)
                 return ret;
         if (code & STATUS_VALID_BIT) {
@@ -68,10 +68,12 @@ static const struct tsens_ops ops_generic_v2 = {
  
  const struct tsens_data data_tsens_v2 = {
         .ops            = &ops_generic_v2,
+       .reg_offsets    = { [SROT_CTRL_OFFSET] = 0x4 },
  };
  
  /* Kept around for backward compatibility with old msm8996.dtsi */
  const struct tsens_data data_8996 = {
         .num_sensors    = 13,
         .ops            = &ops_generic_v2,
+       .reg_offsets    = { [SROT_CTRL_OFFSET] = 0x4 },
  };
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c

index a2c9bfa..f1ec9bb 100644 (file)
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -1,15 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
  /*
   * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
   */
  
  #include <linux/err.h>
@@ -89,11 +80,6 @@ static int tsens_register(struct tsens_device *tmdev)
  {
         int i;
         struct thermal_zone_device *tzd;
-       u32 *hw_id, n = tmdev->num_sensors;
-
-       hw_id = devm_kcalloc(tmdev->dev, n, sizeof(u32), GFP_KERNEL);
-       if (!hw_id)
-               return -ENOMEM;
  
         for (i = 0;  i < tmdev->num_sensors; i++) {
                 tmdev->sensor[i].tmdev = tmdev;
@@ -158,6 +144,9 @@ static int tsens_probe(struct platform_device *pdev)
                 else
                         tmdev->sensor[i].hw_id = i;
         }
+       for (i = 0; i < REG_ARRAY_SIZE; i++) {
+               tmdev->reg_offsets[i] = data->reg_offsets[i];
+       }
  
         if (!tmdev->ops || !tmdev->ops->init || !tmdev->ops->get_temp)
                 return -EINVAL;
diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h

index 14331eb..7b7feee 100644 (file)
--- a/drivers/thermal/qcom/tsens.h
+++ b/drivers/thermal/qcom/tsens.h
@@ -1,15 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
   */
+
  #ifndef __QCOM_TSENS_H__
  #define __QCOM_TSENS_H__
  
@@ -55,15 +48,23 @@ struct tsens_ops {
         int (*get_trend)(struct tsens_device *, int, enum thermal_trend *);
  };
  
+enum reg_list {
+       SROT_CTRL_OFFSET,
+
+       REG_ARRAY_SIZE,
+};
+
  /**
   * struct tsens_data - tsens instance specific data
   * @num_sensors: Max number of sensors supported by platform
   * @ops: operations the tsens instance supports
   * @hw_ids: Subset of sensors ids supported by platform, if not the first n
+ * @reg_offsets: Register offsets for commonly used registers
   */
  struct tsens_data {
         const u32               num_sensors;
         const struct tsens_ops  *ops;
+       const u16               reg_offsets[REG_ARRAY_SIZE];
         unsigned int            *hw_ids;
  };
  
@@ -76,8 +77,10 @@ struct tsens_context {
  struct tsens_device {
         struct device                   *dev;
         u32                             num_sensors;
-       struct regmap                   *map;
+       struct regmap                   *tm_map;
+       struct regmap                   *srot_map;
         u32                             tm_offset;
+       u16                             reg_offsets[REG_ARRAY_SIZE];
         struct tsens_context            ctx;
         const struct tsens_ops          *ops;
         struct tsens_sensor             sensor[0];
diff --git a/drivers/thermal/qoriq_thermal.c b/drivers/thermal/qoriq_thermal.c

index 450ed66..18c711b 100644 (file)
--- a/drivers/thermal/qoriq_thermal.c
+++ b/drivers/thermal/qoriq_thermal.c
@@ -119,8 +119,8 @@ static int qoriq_tmu_get_sensor_id(void)
         if (sensor_specs.args_count >= 1) {
                 id = sensor_specs.args[0];
                 WARN(sensor_specs.args_count > 1,
-                               "%s: too many cells in sensor specifier %d\n",
-                               sensor_specs.np->name, sensor_specs.args_count);
+                               "%pOFn: too many cells in sensor specifier %d\n",
+                               sensor_specs.np, sensor_specs.args_count);
         } else {
                 id = 0;
         }
@@ -294,6 +294,7 @@ static SIMPLE_DEV_PM_OPS(qoriq_tmu_pm_ops,
  
  static const struct of_device_id qoriq_tmu_match[] = {
         { .compatible = "fsl,qoriq-tmu", },
+       { .compatible = "fsl,imx8mq-tmu", },
         {},
  };
  MODULE_DEVICE_TABLE(of, qoriq_tmu_match);
diff --git a/drivers/thermal/rcar_gen3_thermal.c b/drivers/thermal/rcar_gen3_thermal.c

index 7aed533..75786cc 100644 (file)
--- a/drivers/thermal/rcar_gen3_thermal.c
+++ b/drivers/thermal/rcar_gen3_thermal.c
@@ -318,9 +318,11 @@ static void rcar_gen3_thermal_init(struct rcar_gen3_thermal_tsc *tsc)
  }
  
  static const struct of_device_id rcar_gen3_thermal_dt_ids[] = {
+       { .compatible = "renesas,r8a774a1-thermal", },
         { .compatible = "renesas,r8a7795-thermal", },
         { .compatible = "renesas,r8a7796-thermal", },
         { .compatible = "renesas,r8a77965-thermal", },
+       { .compatible = "renesas,r8a77980-thermal", },
         {},
  };
  MODULE_DEVICE_TABLE(of, rcar_gen3_thermal_dt_ids);
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c

index 78f9328..8014a20 100644 (file)
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -112,6 +112,10 @@ static const struct of_device_id rcar_thermal_dt_ids[] = {
                 .compatible = "renesas,rcar-gen2-thermal",
                  .data = &rcar_gen2_thermal,
         },
+       {
+               .compatible = "renesas,thermal-r8a77970",
+               .data = &rcar_gen3_thermal,
+       },
         {
                 .compatible = "renesas,thermal-r8a77995",
                 .data = &rcar_gen3_thermal,
@@ -434,8 +438,8 @@ static irqreturn_t rcar_thermal_irq(int irq, void *data)
         rcar_thermal_for_each_priv(priv, common) {
                 if (rcar_thermal_had_changed(priv, status)) {
                         rcar_thermal_irq_disable(priv);
-                       schedule_delayed_work(&priv->work,
-                                             msecs_to_jiffies(300));
+                       queue_delayed_work(system_freezable_wq, &priv->work,
+                                          msecs_to_jiffies(300));
                 }
         }
  
@@ -453,6 +457,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
  
         rcar_thermal_for_each_priv(priv, common) {
                 rcar_thermal_irq_disable(priv);
+               cancel_delayed_work_sync(&priv->work);
                 if (priv->chip->use_of_thermal)
                         thermal_remove_hwmon_sysfs(priv->zone);
                 else
@@ -492,7 +497,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
         pm_runtime_get_sync(dev);
  
         for (i = 0; i < chip->nirqs; i++) {
-               irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+               irq = platform_get_resource(pdev, IORESOURCE_IRQ, i);
                 if (!irq)
                         continue;
                 if (!common->base) {
diff --git a/drivers/thermal/st/Kconfig b/drivers/thermal/st/Kconfig

index 490fdbe..b80f9a9 100644 (file)
--- a/drivers/thermal/st/Kconfig
+++ b/drivers/thermal/st/Kconfig
@@ -1,3 +1,7 @@
+#
+# STMicroelectronics thermal drivers configuration
+#
+
  config ST_THERMAL
         tristate "Thermal sensors on STMicroelectronics STi series of SoCs"
         help
@@ -10,3 +14,13 @@ config ST_THERMAL_SYSCFG
  config ST_THERMAL_MEMMAP
         select ST_THERMAL
         tristate "STi series memory mapped access based thermal sensors"
+
+config STM32_THERMAL
+       tristate "Thermal framework support on STMicroelectronics STM32 series of SoCs"
+       depends on MACH_STM32MP157
+       default y
+       help
+       Support for thermal framework on STMicroelectronics STM32 series of
+       SoCs. This thermal driver allows to access to general thermal framework
+       functionalities and to acces to SoC sensor functionalities. This
+       configuration is fully dependent of MACH_STM32MP157.
diff --git a/drivers/thermal/st/Makefile b/drivers/thermal/st/Makefile

index b388789..b2b9e9b 100644 (file)
--- a/drivers/thermal/st/Makefile
+++ b/drivers/thermal/st/Makefile
@@ -1,3 +1,4 @@
  obj-$(CONFIG_ST_THERMAL)               := st_thermal.o
  obj-$(CONFIG_ST_THERMAL_SYSCFG)                += st_thermal_syscfg.o
  obj-$(CONFIG_ST_THERMAL_MEMMAP)                += st_thermal_memmap.o
+obj-$(CONFIG_STM32_THERMAL)            := stm_thermal.o
+\ No newline at end of file
diff --git a/drivers/thermal/st/stm_thermal.c b/drivers/thermal/st/stm_thermal.c

new file mode 100644 (file)

index 0000000..47623da
--- /dev/null
+++ b/drivers/thermal/st/stm_thermal.c
@@ -0,0 +1,760 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) STMicroelectronics 2018 - All Rights Reserved
+ * Author: David Hernandez Sanchez <david.hernandezsanchez@st.com> for
+ * STMicroelectronics.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#include "../thermal_core.h"
+#include "../thermal_hwmon.h"
+
+/* DTS register offsets */
+#define DTS_CFGR1_OFFSET       0x0
+#define DTS_T0VALR1_OFFSET     0x8
+#define DTS_RAMPVALR_OFFSET    0X10
+#define DTS_ITR1_OFFSET                0x14
+#define DTS_DR_OFFSET          0x1C
+#define DTS_SR_OFFSET          0x20
+#define DTS_ITENR_OFFSET       0x24
+#define DTS_CIFR_OFFSET                0x28
+
+/* DTS_CFGR1 register mask definitions */
+#define HSREF_CLK_DIV_MASK     GENMASK(30, 24)
+#define TS1_SMP_TIME_MASK      GENMASK(19, 16)
+#define TS1_INTRIG_SEL_MASK    GENMASK(11, 8)
+
+/* DTS_T0VALR1 register mask definitions */
+#define TS1_T0_MASK            GENMASK(17, 16)
+#define TS1_FMT0_MASK          GENMASK(15, 0)
+
+/* DTS_RAMPVALR register mask definitions */
+#define TS1_RAMP_COEFF_MASK    GENMASK(15, 0)
+
+/* DTS_ITR1 register mask definitions */
+#define TS1_HITTHD_MASK                GENMASK(31, 16)
+#define TS1_LITTHD_MASK                GENMASK(15, 0)
+
+/* DTS_DR register mask definitions */
+#define TS1_MFREQ_MASK         GENMASK(15, 0)
+
+/* Less significant bit position definitions */
+#define TS1_T0_POS             16
+#define TS1_SMP_TIME_POS       16
+#define TS1_HITTHD_POS         16
+#define HSREF_CLK_DIV_POS      24
+
+/* DTS_CFGR1 bit definitions */
+#define TS1_EN                 BIT(0)
+#define TS1_START              BIT(4)
+#define REFCLK_SEL             BIT(20)
+#define REFCLK_LSE             REFCLK_SEL
+#define Q_MEAS_OPT             BIT(21)
+#define CALIBRATION_CONTROL    Q_MEAS_OPT
+
+/* DTS_SR bit definitions */
+#define TS_RDY                 BIT(15)
+/* Bit definitions below are common for DTS_SR, DTS_ITENR and DTS_CIFR */
+#define HIGH_THRESHOLD         BIT(2)
+#define LOW_THRESHOLD          BIT(1)
+
+/* Constants */
+#define ADJUST                 100
+#define ONE_MHZ                        1000000
+#define POLL_TIMEOUT           5000
+#define STARTUP_TIME           40
+#define TS1_T0_VAL0            30
+#define TS1_T0_VAL1            130
+#define NO_HW_TRIG             0
+
+/* The Thermal Framework expects millidegrees */
+#define mcelsius(temp)         ((temp) * 1000)
+
+/* The Sensor expects oC degrees */
+#define celsius(temp)          ((temp) / 1000)
+
+struct stm_thermal_sensor {
+       struct device *dev;
+       struct thermal_zone_device *th_dev;
+       enum thermal_device_mode mode;
+       struct clk *clk;
+       int high_temp;
+       int low_temp;
+       int temp_critical;
+       int temp_passive;
+       unsigned int low_temp_enabled;
+       int num_trips;
+       int irq;
+       unsigned int irq_enabled;
+       void __iomem *base;
+       int t0, fmt0, ramp_coeff;
+};
+
+static irqreturn_t stm_thermal_alarm_irq(int irq, void *sdata)
+{
+       struct stm_thermal_sensor *sensor = sdata;
+
+       disable_irq_nosync(irq);
+       sensor->irq_enabled = false;
+
+       return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t stm_thermal_alarm_irq_thread(int irq, void *sdata)
+{
+       u32 value;
+       struct stm_thermal_sensor *sensor = sdata;
+
+       /* read IT reason in SR and clear flags */
+       value = readl_relaxed(sensor->base + DTS_SR_OFFSET);
+
+       if ((value & LOW_THRESHOLD) == LOW_THRESHOLD)
+               writel_relaxed(LOW_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+       if ((value & HIGH_THRESHOLD) == HIGH_THRESHOLD)
+               writel_relaxed(HIGH_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+       thermal_zone_device_update(sensor->th_dev, THERMAL_EVENT_UNSPECIFIED);
+
+       return IRQ_HANDLED;
+}
+
+static int stm_sensor_power_on(struct stm_thermal_sensor *sensor)
+{
+       int ret;
+       u32 value;
+
+       /* Enable sensor */
+       value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+       value |= TS1_EN;
+       writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+       /*
+        * The DTS block can be enabled by setting TSx_EN bit in
+        * DTS_CFGRx register. It requires a startup time of
+        * 40μs. Use 5 ms as arbitrary timeout.
+        */
+       ret = readl_poll_timeout(sensor->base + DTS_SR_OFFSET,
+                                value, (value & TS_RDY),
+                                STARTUP_TIME, POLL_TIMEOUT);
+       if (ret)
+               return ret;
+
+       /* Start continuous measuring */
+       value = readl_relaxed(sensor->base +
+                             DTS_CFGR1_OFFSET);
+       value |= TS1_START;
+       writel_relaxed(value, sensor->base +
+                      DTS_CFGR1_OFFSET);
+
+       return 0;
+}
+
+static int stm_sensor_power_off(struct stm_thermal_sensor *sensor)
+{
+       u32 value;
+
+       /* Stop measuring */
+       value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+       value &= ~TS1_START;
+       writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+       /* Ensure stop is taken into account */
+       usleep_range(STARTUP_TIME, POLL_TIMEOUT);
+
+       /* Disable sensor */
+       value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+       value &= ~TS1_EN;
+       writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+       /* Ensure disable is taken into account */
+       return readl_poll_timeout(sensor->base + DTS_SR_OFFSET, value,
+                                 !(value & TS_RDY),
+                                 STARTUP_TIME, POLL_TIMEOUT);
+}
+
+static int stm_thermal_calibration(struct stm_thermal_sensor *sensor)
+{
+       u32 value, clk_freq;
+       u32 prescaler;
+
+       /* Figure out prescaler value for PCLK during calibration */
+       clk_freq = clk_get_rate(sensor->clk);
+       if (!clk_freq)
+               return -EINVAL;
+
+       prescaler = 0;
+       clk_freq /= ONE_MHZ;
+       if (clk_freq) {
+               while (prescaler <= clk_freq)
+                       prescaler++;
+       }
+
+       value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+
+       /* Clear prescaler */
+       value &= ~HSREF_CLK_DIV_MASK;
+
+       /* Set prescaler. pclk_freq/prescaler < 1MHz */
+       value |= (prescaler << HSREF_CLK_DIV_POS);
+
+       /* Select PCLK as reference clock */
+       value &= ~REFCLK_SEL;
+
+       /* Set maximal sampling time for better precision */
+       value |= TS1_SMP_TIME_MASK;
+
+       /* Measure with calibration */
+       value &= ~CALIBRATION_CONTROL;
+
+       /* select trigger */
+       value &= ~TS1_INTRIG_SEL_MASK;
+       value |= NO_HW_TRIG;
+
+       writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+       return 0;
+}
+
+/* Fill in DTS structure with factory sensor values */
+static int stm_thermal_read_factory_settings(struct stm_thermal_sensor *sensor)
+{
+       /* Retrieve engineering calibration temperature */
+       sensor->t0 = readl_relaxed(sensor->base + DTS_T0VALR1_OFFSET) &
+                                       TS1_T0_MASK;
+       if (!sensor->t0)
+               sensor->t0 = TS1_T0_VAL0;
+       else
+               sensor->t0 = TS1_T0_VAL1;
+
+       /* Retrieve fmt0 and put it on Hz */
+       sensor->fmt0 = ADJUST * readl_relaxed(sensor->base + DTS_T0VALR1_OFFSET)
+                                             & TS1_FMT0_MASK;
+
+       /* Retrieve ramp coefficient */
+       sensor->ramp_coeff = readl_relaxed(sensor->base + DTS_RAMPVALR_OFFSET) &
+                                          TS1_RAMP_COEFF_MASK;
+
+       if (!sensor->fmt0 || !sensor->ramp_coeff) {
+               dev_err(sensor->dev, "%s: wrong setting\n", __func__);
+               return -EINVAL;
+       }
+
+       dev_dbg(sensor->dev, "%s: T0 = %doC, FMT0 = %dHz, RAMP_COEFF = %dHz/oC",
+               __func__, sensor->t0, sensor->fmt0, sensor->ramp_coeff);
+
+       return 0;
+}
+
+static int stm_thermal_calculate_threshold(struct stm_thermal_sensor *sensor,
+                                          int temp, u32 *th)
+{
+       int freqM;
+       u32 sampling_time;
+
+       /* Retrieve the number of periods to sample */
+       sampling_time = (readl_relaxed(sensor->base + DTS_CFGR1_OFFSET) &
+                       TS1_SMP_TIME_MASK) >> TS1_SMP_TIME_POS;
+
+       /* Figure out the CLK_PTAT frequency for a given temperature */
+       freqM = ((temp - sensor->t0) * sensor->ramp_coeff)
+                + sensor->fmt0;
+
+       dev_dbg(sensor->dev, "%s: freqM for threshold = %d Hz",
+               __func__, freqM);
+
+       /* Figure out the threshold sample number */
+       *th = clk_get_rate(sensor->clk);
+       if (!*th)
+               return -EINVAL;
+
+       *th = *th / freqM;
+
+       *th *= sampling_time;
+
+       return 0;
+}
+
+static int stm_thermal_set_threshold(struct stm_thermal_sensor *sensor)
+{
+       u32 value, th;
+       int ret;
+
+       value = readl_relaxed(sensor->base + DTS_ITR1_OFFSET);
+
+       /* Erase threshold content */
+       value &= ~(TS1_LITTHD_MASK | TS1_HITTHD_MASK);
+
+       /* Retrieve the sample threshold number th for a given temperature */
+       ret = stm_thermal_calculate_threshold(sensor, sensor->high_temp, &th);
+       if (ret)
+               return ret;
+
+       value |= th & TS1_LITTHD_MASK;
+
+       if (sensor->low_temp_enabled) {
+               /* Retrieve the sample threshold */
+               ret = stm_thermal_calculate_threshold(sensor, sensor->low_temp,
+                                                     &th);
+               if (ret)
+                       return ret;
+
+               value |= (TS1_HITTHD_MASK  & (th << TS1_HITTHD_POS));
+       }
+
+       /* Write value on the Low interrupt threshold */
+       writel_relaxed(value, sensor->base + DTS_ITR1_OFFSET);
+
+       return 0;
+}
+
+/* Disable temperature interrupt */
+static int stm_disable_irq(struct stm_thermal_sensor *sensor)
+{
+       u32 value;
+
+       /* Disable IT generation for low and high thresholds */
+       value = readl_relaxed(sensor->base + DTS_ITENR_OFFSET);
+       writel_relaxed(value & ~(LOW_THRESHOLD | HIGH_THRESHOLD),
+                      sensor->base + DTS_ITENR_OFFSET);
+
+       dev_dbg(sensor->dev, "%s: IT disabled on sensor side", __func__);
+
+       return 0;
+}
+
+/* Enable temperature interrupt */
+static int stm_enable_irq(struct stm_thermal_sensor *sensor)
+{
+       u32 value;
+
+       /*
+        * Code below enables High temperature threshold using a low threshold
+        * sampling value
+        */
+
+       /* Make sure LOW_THRESHOLD IT is clear before enabling */
+       writel_relaxed(LOW_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+       /* Enable IT generation for low threshold */
+       value = readl_relaxed(sensor->base + DTS_ITENR_OFFSET);
+       value |= LOW_THRESHOLD;
+
+       /* Enable the low temperature threshold if needed */
+       if (sensor->low_temp_enabled) {
+               /* Make sure HIGH_THRESHOLD IT is clear before enabling */
+               writel_relaxed(HIGH_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+               /* Enable IT generation for high threshold */
+               value |= HIGH_THRESHOLD;
+       }
+
+       /* Enable thresholds */
+       writel_relaxed(value, sensor->base + DTS_ITENR_OFFSET);
+
+       dev_dbg(sensor->dev, "%s: IT enabled on sensor side", __func__);
+
+       return 0;
+}
+
+static int stm_thermal_update_threshold(struct stm_thermal_sensor *sensor)
+{
+       int ret;
+
+       sensor->mode = THERMAL_DEVICE_DISABLED;
+
+       ret = stm_sensor_power_off(sensor);
+       if (ret)
+               return ret;
+
+       ret = stm_disable_irq(sensor);
+       if (ret)
+               return ret;
+
+       ret = stm_thermal_set_threshold(sensor);
+       if (ret)
+               return ret;
+
+       ret = stm_enable_irq(sensor);
+       if (ret)
+               return ret;
+
+       ret = stm_sensor_power_on(sensor);
+       if (ret)
+               return ret;
+
+       sensor->mode = THERMAL_DEVICE_ENABLED;
+
+       return 0;
+}
+
+/* Callback to get temperature from HW */
+static int stm_thermal_get_temp(void *data, int *temp)
+{
+       struct stm_thermal_sensor *sensor = data;
+       u32 sampling_time;
+       int freqM, ret;
+
+       if (sensor->mode != THERMAL_DEVICE_ENABLED)
+               return -EAGAIN;
+
+       /* Retrieve the number of samples */
+       ret = readl_poll_timeout(sensor->base + DTS_DR_OFFSET, freqM,
+                                (freqM & TS1_MFREQ_MASK), STARTUP_TIME,
+                                POLL_TIMEOUT);
+
+       if (ret)
+               return ret;
+
+       if (!freqM)
+               return -ENODATA;
+
+       /* Retrieve the number of periods sampled */
+       sampling_time = (readl_relaxed(sensor->base + DTS_CFGR1_OFFSET) &
+                       TS1_SMP_TIME_MASK) >> TS1_SMP_TIME_POS;
+
+       /* Figure out the number of samples per period */
+       freqM /= sampling_time;
+
+       /* Figure out the CLK_PTAT frequency */
+       freqM = clk_get_rate(sensor->clk) / freqM;
+       if (!freqM)
+               return -EINVAL;
+
+       dev_dbg(sensor->dev, "%s: freqM=%d\n", __func__, freqM);
+
+       /* Figure out the temperature in mili celsius */
+       *temp = mcelsius(sensor->t0 + ((freqM - sensor->fmt0) /
+                        sensor->ramp_coeff));
+
+       dev_dbg(sensor->dev, "%s: temperature = %d millicelsius",
+               __func__, *temp);
+
+       /* Update thresholds */
+       if (sensor->num_trips > 1) {
+               /* Update alarm threshold value to next higher trip point */
+               if (sensor->high_temp == sensor->temp_passive &&
+                   celsius(*temp) >= sensor->temp_passive) {
+                       sensor->high_temp = sensor->temp_critical;
+                       sensor->low_temp = sensor->temp_passive;
+                       sensor->low_temp_enabled = true;
+                       ret = stm_thermal_update_threshold(sensor);
+                       if (ret)
+                               return ret;
+               }
+
+               if (sensor->high_temp == sensor->temp_critical &&
+                   celsius(*temp) < sensor->temp_passive) {
+                       sensor->high_temp = sensor->temp_passive;
+                       sensor->low_temp_enabled = false;
+                       ret = stm_thermal_update_threshold(sensor);
+                       if (ret)
+                               return ret;
+               }
+
+               /*
+                * Re-enable alarm IRQ if temperature below critical
+                * temperature
+                */
+               if (!sensor->irq_enabled &&
+                   (celsius(*temp) < sensor->temp_critical)) {
+                       sensor->irq_enabled = true;
+                       enable_irq(sensor->irq);
+               }
+       }
+
+       return 0;
+}
+
+/* Registers DTS irq to be visible by GIC */
+static int stm_register_irq(struct stm_thermal_sensor *sensor)
+{
+       struct device *dev = sensor->dev;
+       struct platform_device *pdev = to_platform_device(dev);
+       int ret;
+
+       sensor->irq = platform_get_irq(pdev, 0);
+       if (sensor->irq < 0) {
+               dev_err(dev, "%s: Unable to find IRQ\n", __func__);
+               return sensor->irq;
+       }
+
+       ret = devm_request_threaded_irq(dev, sensor->irq,
+                                       stm_thermal_alarm_irq,
+                                       stm_thermal_alarm_irq_thread,
+                                       IRQF_ONESHOT,
+                                       dev->driver->name, sensor);
+       if (ret) {
+               dev_err(dev, "%s: Failed to register IRQ %d\n", __func__,
+                       sensor->irq);
+               return ret;
+       }
+
+       sensor->irq_enabled = true;
+
+       dev_dbg(dev, "%s: thermal IRQ registered", __func__);
+
+       return 0;
+}
+
+static int stm_thermal_sensor_off(struct stm_thermal_sensor *sensor)
+{
+       int ret;
+
+       ret = stm_sensor_power_off(sensor);
+       if (ret)
+               return ret;
+
+       clk_disable_unprepare(sensor->clk);
+
+       return 0;
+}
+
+static int stm_thermal_prepare(struct stm_thermal_sensor *sensor)
+{
+       int ret;
+       struct device *dev = sensor->dev;
+
+       ret = clk_prepare_enable(sensor->clk);
+       if (ret)
+               return ret;
+
+       ret = stm_thermal_calibration(sensor);
+       if (ret)
+               goto thermal_unprepare;
+
+       /* Set threshold(s) for IRQ */
+       ret = stm_thermal_set_threshold(sensor);
+       if (ret)
+               goto thermal_unprepare;
+
+       ret = stm_enable_irq(sensor);
+       if (ret)
+               goto thermal_unprepare;
+
+       ret = stm_sensor_power_on(sensor);
+       if (ret) {
+               dev_err(dev, "%s: failed to power on sensor\n", __func__);
+               goto irq_disable;
+       }
+
+       return 0;
+
+irq_disable:
+       stm_disable_irq(sensor);
+
+thermal_unprepare:
+       clk_disable_unprepare(sensor->clk);
+
+       return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int stm_thermal_suspend(struct device *dev)
+{
+       int ret;
+       struct platform_device *pdev = to_platform_device(dev);
+       struct stm_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+       ret = stm_thermal_sensor_off(sensor);
+       if (ret)
+               return ret;
+
+       sensor->mode = THERMAL_DEVICE_DISABLED;
+
+       return 0;
+}
+
+static int stm_thermal_resume(struct device *dev)
+{
+       int ret;
+       struct platform_device *pdev = to_platform_device(dev);
+       struct stm_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+       ret = stm_thermal_prepare(sensor);
+       if (ret)
+               return ret;
+
+       sensor->mode = THERMAL_DEVICE_ENABLED;
+
+       return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+SIMPLE_DEV_PM_OPS(stm_thermal_pm_ops, stm_thermal_suspend, stm_thermal_resume);
+
+static const struct thermal_zone_of_device_ops stm_tz_ops = {
+       .get_temp       = stm_thermal_get_temp,
+};
+
+static const struct of_device_id stm_thermal_of_match[] = {
+               { .compatible = "st,stm32-thermal"},
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, stm_thermal_of_match);
+
+static int stm_thermal_probe(struct platform_device *pdev)
+{
+       struct stm_thermal_sensor *sensor;
+       struct resource *res;
+       const struct thermal_trip *trip;
+       void __iomem *base;
+       int ret, i;
+
+       if (!pdev->dev.of_node) {
+               dev_err(&pdev->dev, "%s: device tree node not found\n",
+                       __func__);
+               return -EINVAL;
+       }
+
+       sensor = devm_kzalloc(&pdev->dev, sizeof(*sensor), GFP_KERNEL);
+       if (!sensor)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, sensor);
+
+       sensor->dev = &pdev->dev;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
+
+       /* Populate sensor */
+       sensor->base = base;
+
+       ret = stm_thermal_read_factory_settings(sensor);
+       if (ret)
+               return ret;
+
+       sensor->clk = devm_clk_get(&pdev->dev, "pclk");
+       if (IS_ERR(sensor->clk)) {
+               dev_err(&pdev->dev, "%s: failed to fetch PCLK clock\n",
+                       __func__);
+               return PTR_ERR(sensor->clk);
+       }
+
+       /* Register IRQ into GIC */
+       ret = stm_register_irq(sensor);
+       if (ret)
+               return ret;
+
+       sensor->th_dev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0,
+                                                             sensor,
+                                                             &stm_tz_ops);
+
+       if (IS_ERR(sensor->th_dev)) {
+               dev_err(&pdev->dev, "%s: thermal zone sensor registering KO\n",
+                       __func__);
+               ret = PTR_ERR(sensor->th_dev);
+               return ret;
+       }
+
+       if (!sensor->th_dev->ops->get_crit_temp) {
+               /* Critical point must be provided */
+               ret = -EINVAL;
+               goto err_tz;
+       }
+
+       ret = sensor->th_dev->ops->get_crit_temp(sensor->th_dev,
+                       &sensor->temp_critical);
+       if (ret) {
+               dev_err(&pdev->dev,
+                       "Not able to read critical_temp: %d\n", ret);
+               goto err_tz;
+       }
+
+       sensor->temp_critical = celsius(sensor->temp_critical);
+
+       /* Set thresholds for IRQ */
+       sensor->high_temp = sensor->temp_critical;
+
+       trip = of_thermal_get_trip_points(sensor->th_dev);
+       sensor->num_trips = of_thermal_get_ntrips(sensor->th_dev);
+
+       /* Find out passive temperature if it exists */
+       for (i = (sensor->num_trips - 1); i >= 0;  i--) {
+               if (trip[i].type == THERMAL_TRIP_PASSIVE) {
+                       sensor->temp_passive = celsius(trip[i].temperature);
+                       /* Update high temperature threshold */
+                       sensor->high_temp = sensor->temp_passive;
+                       }
+       }
+
+       /*
+        * Ensure low_temp_enabled flag is disabled.
+        * By disabling low_temp_enabled, low threshold IT will not be
+        * configured neither enabled because it is not needed as high
+        * threshold is set on the lowest temperature trip point after
+        * probe.
+        */
+       sensor->low_temp_enabled = false;
+
+       /* Configure and enable HW sensor */
+       ret = stm_thermal_prepare(sensor);
+       if (ret) {
+               dev_err(&pdev->dev,
+                       "Not able to enable sensor: %d\n", ret);
+               goto err_tz;
+       }
+
+       /*
+        * Thermal_zone doesn't enable hwmon as default,
+        * enable it here
+        */
+       sensor->th_dev->tzp->no_hwmon = false;
+       ret = thermal_add_hwmon_sysfs(sensor->th_dev);
+       if (ret)
+               goto err_tz;
+
+       sensor->mode = THERMAL_DEVICE_ENABLED;
+
+       dev_info(&pdev->dev, "%s: Driver initialized successfully\n",
+                __func__);
+
+       return 0;
+
+err_tz:
+       thermal_zone_of_sensor_unregister(&pdev->dev, sensor->th_dev);
+       return ret;
+}
+
+static int stm_thermal_remove(struct platform_device *pdev)
+{
+       struct stm_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+       stm_thermal_sensor_off(sensor);
+       thermal_remove_hwmon_sysfs(sensor->th_dev);
+       thermal_zone_of_sensor_unregister(&pdev->dev, sensor->th_dev);
+
+       return 0;
+}
+
+static struct platform_driver stm_thermal_driver = {
+       .driver = {
+               .name   = "stm_thermal",
+               .pm     = &stm_thermal_pm_ops,
+               .of_match_table = stm_thermal_of_match,
+       },
+       .probe          = stm_thermal_probe,
+       .remove         = stm_thermal_remove,
+};
+module_platform_driver(stm_thermal_driver);
+
+MODULE_DESCRIPTION("STMicroelectronics STM32 Thermal Sensor Driver");
+MODULE_AUTHOR("David Hernandez Sanchez <david.hernandezsanchez@st.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:stm_thermal");
diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c

index f103665..40182ed 100644 (file)
--- a/drivers/video/fbdev/chipsfb.c
+++ b/drivers/video/fbdev/chipsfb.c
@@ -27,7 +27,6 @@
  #include <linux/init.h>
  #include <linux/pci.h>
  #include <linux/console.h>
-#include <asm/io.h>
  
  #ifdef CONFIG_PMAC_BACKLIGHT
  #include <asm/backlight.h>
@@ -401,7 +400,7 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent)
  #endif /* CONFIG_PMAC_BACKLIGHT */
  
  #ifdef CONFIG_PPC
-       p->screen_base = __ioremap(addr, 0x200000, _PAGE_NO_CACHE);
+       p->screen_base = ioremap_wc(addr, 0x200000);
  #else
         p->screen_base = ioremap(addr, 0x200000);
  #endif
diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c

index 8d14b29..9cb0ef7 100644 (file)
--- a/drivers/video/fbdev/controlfb.c
+++ b/drivers/video/fbdev/controlfb.c
@@ -48,9 +48,7 @@
  #include <linux/nvram.h>
  #include <linux/adb.h>
  #include <linux/cuda.h>
-#include <asm/io.h>
  #include <asm/prom.h>
-#include <asm/pgtable.h>
  #include <asm/btext.h>
  
  #include "macmodes.h"
@@ -715,8 +713,7 @@ static int __init control_of_init(struct device_node *dp)
                 goto error_out;
         }
         /* map at most 8MB for the frame buffer */
-       p->frame_buffer = __ioremap(p->frame_buffer_phys, 0x800000,
-                                   _PAGE_WRITETHRU);
+       p->frame_buffer = ioremap_wt(p->frame_buffer_phys, 0x800000);
  
         if (!p->control_regs_phys ||
             !request_mem_region(p->control_regs_phys, p->control_regs_size,
diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c

index bc9eb8a..332a56b 100644 (file)
--- a/drivers/video/fbdev/fsl-diu-fb.c
+++ b/drivers/video/fbdev/fsl-diu-fb.c
@@ -1925,7 +1925,7 @@ static int __init fsl_diu_init(void)
         pr_info("Freescale Display Interface Unit (DIU) framebuffer driver\n");
  
  #ifdef CONFIG_NOT_COHERENT_CACHE
-       np = of_find_node_by_type(NULL, "cpu");
+       np = of_get_cpu_node(0, NULL);
         if (!np) {
                 pr_err("fsl-diu-fb: can't find 'cpu' device node\n");
                 return -ENODEV;
diff --git a/drivers/video/fbdev/platinumfb.c b/drivers/video/fbdev/platinumfb.c

index 377d339..bf6b7fb 100644 (file)
--- a/drivers/video/fbdev/platinumfb.c
+++ b/drivers/video/fbdev/platinumfb.c
@@ -32,9 +32,7 @@
  #include <linux/nvram.h>
  #include <linux/of_device.h>
  #include <linux/of_platform.h>
-#include <asm/io.h>
  #include <asm/prom.h>
-#include <asm/pgtable.h>
  
  #include "macmodes.h"
  #include "platinumfb.h"
@@ -577,8 +575,7 @@ static int platinumfb_probe(struct platform_device* odev)
  
         /* frame buffer - map only 4MB */
         pinfo->frame_buffer_phys = pinfo->rsrc_fb.start;
-       pinfo->frame_buffer = __ioremap(pinfo->rsrc_fb.start, 0x400000,
-                                       _PAGE_WRITETHRU);
+       pinfo->frame_buffer = ioremap_wt(pinfo->rsrc_fb.start, 0x400000);
         pinfo->base_frame_buffer = pinfo->frame_buffer;
  
         /* registers */
diff --git a/drivers/video/fbdev/valkyriefb.c b/drivers/video/fbdev/valkyriefb.c

index 275fb98..d51c3a8 100644 (file)
--- a/drivers/video/fbdev/valkyriefb.c
+++ b/drivers/video/fbdev/valkyriefb.c
@@ -54,13 +54,11 @@
  #include <linux/nvram.h>
  #include <linux/adb.h>
  #include <linux/cuda.h>
-#include <asm/io.h>
  #ifdef CONFIG_MAC
  #include <asm/macintosh.h>
  #else
  #include <asm/prom.h>
  #endif
-#include <asm/pgtable.h>
  
  #include "macmodes.h"
  #include "valkyriefb.h"
@@ -318,7 +316,7 @@ static void __init valkyrie_choose_mode(struct fb_info_valkyrie *p)
  int __init valkyriefb_init(void)
  {
         struct fb_info_valkyrie *p;
-       unsigned long frame_buffer_phys, cmap_regs_phys, flags;
+       unsigned long frame_buffer_phys, cmap_regs_phys;
         int err;
         char *option = NULL;
  
@@ -337,7 +335,6 @@ int __init valkyriefb_init(void)
         /* Hardcoded addresses... welcome to 68k Macintosh country :-) */
         frame_buffer_phys = 0xf9000000;
         cmap_regs_phys = 0x50f24000;
-       flags = IOMAP_NOCACHE_SER; /* IOMAP_WRITETHROUGH?? */
  #else /* ppc (!CONFIG_MAC) */
         {
                 struct device_node *dp;
@@ -354,7 +351,6 @@ int __init valkyriefb_init(void)
  
                 frame_buffer_phys = r.start;
                 cmap_regs_phys = r.start + 0x304000;
-               flags = _PAGE_WRITETHRU;
         }
  #endif /* ppc (!CONFIG_MAC) */
  
@@ -369,7 +365,11 @@ int __init valkyriefb_init(void)
         }
         p->total_vram = 0x100000;
         p->frame_buffer_phys = frame_buffer_phys;
-       p->frame_buffer = __ioremap(frame_buffer_phys, p->total_vram, flags);
+#ifdef CONFIG_MAC
+       p->frame_buffer = ioremap_nocache(frame_buffer_phys, p->total_vram);
+#else
+       p->frame_buffer = ioremap_wt(frame_buffer_phys, p->total_vram);
+#endif
         p->cmap_regs_phys = cmap_regs_phys;
         p->cmap_regs = ioremap(p->cmap_regs_phys, 0x1000);
         p->valkyrie_regs_phys = cmap_regs_phys+0x6000;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c

index f1fbea9..3e81242 100644 (file)
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -132,7 +132,7 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
         struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr;
         struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&iface->sockaddr;
  
-       seq_printf(m, "\t\tSpeed: %zu bps\n", iface->speed);
+       seq_printf(m, "\tSpeed: %zu bps\n", iface->speed);
         seq_puts(m, "\t\tCapabilities: ");
         if (iface->rdma_capable)
                 seq_puts(m, "rdma ");
@@ -285,7 +285,7 @@ skip_rdma:
                         if ((ses->serverDomain == NULL) ||
                                 (ses->serverOS == NULL) ||
                                 (ses->serverNOS == NULL)) {
-                               seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t",
+                               seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
                                         i, ses->serverName, ses->ses_count,
                                         ses->capabilities, ses->status);
                                 if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
@@ -296,16 +296,18 @@ skip_rdma:
                                 seq_printf(m,
                                     "\n%d) Name: %s  Domain: %s Uses: %d OS:"
                                     " %s\n\tNOS: %s\tCapability: 0x%x\n\tSMB"
-                                   " session status: %d\t",
+                                   " session status: %d ",
                                 i, ses->serverName, ses->serverDomain,
                                 ses->ses_count, ses->serverOS, ses->serverNOS,
                                 ses->capabilities, ses->status);
                         }
                         if (server->rdma)
                                 seq_printf(m, "RDMA\n\t");
-                       seq_printf(m, "TCP status: %d\n\tLocal Users To "
+                       seq_printf(m, "TCP status: %d Instance: %d\n\tLocal Users To "
                                    "Server: %d SecMode: 0x%x Req On Wire: %d",
-                                  server->tcpStatus, server->srv_count,
+                                  server->tcpStatus,
+                                  server->reconnect_instance,
+                                  server->srv_count,
                                    server->sec_mode, in_flight(server));
  
  #ifdef CONFIG_CIFS_STATS2
@@ -352,7 +354,7 @@ skip_rdma:
                                 seq_printf(m, "\n\tServer interfaces: %zu\n",
                                            ses->iface_count);
                         for (j = 0; j < ses->iface_count; j++) {
-                               seq_printf(m, "\t%d)\n", j);
+                               seq_printf(m, "\t%d)", j);
                                 cifs_dump_iface(m, &ses->iface_list[j]);
                         }
                         spin_unlock(&ses->iface_lock);
@@ -383,6 +385,9 @@ static ssize_t cifs_stats_proc_write(struct file *file,
                 atomic_set(&totBufAllocCount, 0);
                 atomic_set(&totSmBufAllocCount, 0);
  #endif /* CONFIG_CIFS_STATS2 */
+               atomic_set(&tcpSesReconnectCount, 0);
+               atomic_set(&tconInfoReconnectCount, 0);
+
                 spin_lock(&GlobalMid_Lock);
                 GlobalMaxActiveXid = 0;
                 GlobalCurrentXid = 0;
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h

index f4f3f08..631dc1b 100644 (file)
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -47,6 +47,29 @@ extern int cifsFYI;
   */
  #ifdef CONFIG_CIFS_DEBUG
  
+
+/*
+ * When adding tracepoints and debug messages we have various choices.
+ * Some considerations:
+ *
+ * Use cifs_dbg(VFS, ...) for things we always want logged, and the user to see
+ *     cifs_info(...) slightly less important, admin can filter via loglevel > 6
+ *     cifs_dbg(FYI, ...) minor debugging messages, off by default
+ *     trace_smb3_*  ftrace functions are preferred for complex debug messages
+ *                 intended for developers or experienced admins, off by default
+ */
+
+/* Information level messages, minor events */
+#define cifs_info_func(ratefunc, fmt, ...)                     \
+do {                                                           \
+       pr_info_ ## ratefunc("CIFS: " fmt, ##__VA_ARGS__);      \
+} while (0)
+
+#define cifs_info(fmt, ...)                                    \
+do {                                                           \
+       cifs_info_func(ratelimited, fmt, ##__VA_ARGS__);        \
+} while (0)
+
  /* information message: e.g., configuration, major event */
  #define cifs_dbg_func(ratefunc, type, fmt, ...)                        \
  do {                                                           \
@@ -81,6 +104,11 @@ do {                                                                        \
         if (0)                                                          \
                 pr_debug(fmt, ##__VA_ARGS__);                           \
  } while (0)
+
+#define cifs_info(fmt, ...)                                            \
+do {                                                                   \
+       pr_info("CIFS: "fmt, ##__VA_ARGS__);                            \
+} while (0)
  #endif
  
  #endif                         /* _H_CIFS_DEBUG */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c

index 6b61df1..b97c74e 100644 (file)
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -304,12 +304,17 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
          */
         mnt = ERR_PTR(-ENOMEM);
  
+       cifs_sb = CIFS_SB(mntpt->d_sb);
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) {
+               mnt = ERR_PTR(-EREMOTE);
+               goto cdda_exit;
+       }
+
         /* always use tree name prefix */
         full_path = build_path_from_dentry_optional_prefix(mntpt, true);
         if (full_path == NULL)
                 goto cdda_exit;
  
-       cifs_sb = CIFS_SB(mntpt->d_sb);
         tlink = cifs_sb_tlink(cifs_sb);
         if (IS_ERR(tlink)) {
                 mnt = ERR_CAST(tlink);
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h

index 9731d0d..63d7530 100644 (file)
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -51,6 +51,7 @@
                                               */
  #define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */
  #define CIFS_MOUNT_NO_HANDLE_CACHE 0x4000000 /* disable caching dir handles */
+#define CIFS_MOUNT_NO_DFS 0x8000000 /* disable DFS resolving */
  
  struct cifs_sb_info {
         struct rb_root tlink_tree;
diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h

index 57ff075..d8bce2f 100644 (file)
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -43,8 +43,19 @@ struct smb_snapshot_array {
         /*      snapshots[]; */
  } __packed;
  
+struct smb_query_info {
+       __u32   info_type;
+       __u32   file_info_class;
+       __u32   additional_information;
+       __u32   flags;
+       __u32   input_buffer_length;
+       __u32   output_buffer_length;
+       /* char buffer[]; */
+} __packed;
+
  #define CIFS_IOCTL_MAGIC       0xCF
  #define CIFS_IOC_COPYCHUNK_FILE        _IOW(CIFS_IOCTL_MAGIC, 3, int)
  #define CIFS_IOC_SET_INTEGRITY  _IO(CIFS_IOCTL_MAGIC, 4)
  #define CIFS_IOC_GET_MNT_INFO _IOR(CIFS_IOCTL_MAGIC, 5, struct smb_mnt_fs_info)
  #define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array)
+#define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c

index 7065426..7de9603 100644 (file)
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -81,6 +81,14 @@ module_param(cifs_max_pending, uint, 0444);
  MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
                                    "CIFS/SMB1 dialect (N/A for SMB3) "
                                    "Default: 32767 Range: 2 to 32767.");
+#ifdef CONFIG_CIFS_STATS2
+unsigned int slow_rsp_threshold = 1;
+module_param(slow_rsp_threshold, uint, 0644);
+MODULE_PARM_DESC(slow_rsp_threshold, "Amount of time (in seconds) to wait "
+                                  "before logging that a response is delayed. "
+                                  "Default: 1 (if set to 0 disables msg).");
+#endif /* STATS2 */
+
  module_param(enable_oplocks, bool, 0644);
  MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
  
@@ -492,6 +500,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                 seq_puts(s, ",unix");
         else
                 seq_puts(s, ",nounix");
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+               seq_puts(s, ",nodfs");
         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
                 seq_puts(s, ",posixpaths");
         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
@@ -707,7 +717,14 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
         struct cifs_mnt_data mnt_data;
         struct dentry *root;
  
-       cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags);
+       /*
+        * Prints in Kernel / CIFS log the attempted mount operation
+        *      If CIFS_DEBUG && cifs_FYI
+        */
+       if (cifsFYI)
+               cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags);
+       else
+               cifs_info("Attempting to mount %s\n", dev_name);
  
         volume_info = cifs_get_volume_info((char *)data, dev_name, is_smb3);
         if (IS_ERR(volume_info))
@@ -1418,6 +1435,11 @@ init_cifs(void)
  #ifdef CONFIG_CIFS_STATS2
         atomic_set(&totBufAllocCount, 0);
         atomic_set(&totSmBufAllocCount, 0);
+       if (slow_rsp_threshold < 1)
+               cifs_dbg(FYI, "slow_response_threshold msgs disabled\n");
+       else if (slow_rsp_threshold > 32767)
+               cifs_dbg(VFS,
+                      "slow response threshold set higher than recommended (0 to 32767)\n");
  #endif /* CONFIG_CIFS_STATS2 */
  
         atomic_set(&midCount, 0);
@@ -1538,11 +1560,11 @@ exit_cifs(void)
         cifs_proc_clean();
  }
  
-MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
+MODULE_AUTHOR("Steve French");
  MODULE_LICENSE("GPL"); /* combination of LGPL + GPL source behaves as GPL */
  MODULE_DESCRIPTION
-    ("VFS to access servers complying with the SNIA CIFS Specification "
-     "e.g. Samba and Windows");
+       ("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and "
+       "also older servers complying with the SNIA CIFS Specification)");
  MODULE_VERSION(CIFS_VERSION);
  MODULE_SOFTDEP("pre: arc4");
  MODULE_SOFTDEP("pre: des");
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h

index f047e87..24e265a 100644 (file)
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -148,5 +148,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
  extern const struct export_operations cifs_export_ops;
  #endif /* CONFIG_CIFS_NFSD_EXPORT */
  
-#define CIFS_VERSION   "2.13"
+#define CIFS_VERSION   "2.14"
  #endif                         /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h

index 9dcaed0..ed1e0fc 100644 (file)
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -33,6 +33,7 @@
  
  #define CIFS_MAGIC_NUMBER 0xFF534D42      /* the first four bytes of SMB PDUs */
  
+#define SMB_PATH_MAX 260
  #define CIFS_PORT 445
  #define RFC1001_PORT 139
  
@@ -465,6 +466,11 @@ struct smb_version_operations {
         enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
                             enum securityEnum);
         int (*next_header)(char *);
+       /* ioctl passthrough for query_info */
+       int (*ioctl_query_info)(const unsigned int xid,
+                               struct cifs_tcon *tcon,
+                               __le16 *path, int is_dir,
+                               unsigned long p);
  };
  
  struct smb_version_values {
@@ -654,6 +660,7 @@ struct TCP_Server_Info {
         /* 16th byte of RFC1001 workstation name is always null */
         char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
         __u32 sequence_number; /* for signing, protected by srv_mutex */
+       __u32 reconnect_instance; /* incremented on each reconnect */
         struct session_key session_key;
         unsigned long lstrp; /* when we got last response from this server */
         struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
@@ -798,6 +805,7 @@ compare_mid(__u16 mid, const struct smb_hdr *smb)
   * a single wsize request with a single call.
   */
  #define CIFS_DEFAULT_IOSIZE (1024 * 1024)
+#define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024)
  
  /*
   * Windows only supports a max of 60kb reads and 65535 byte writes. Default to
@@ -924,6 +932,8 @@ struct cifs_tcon {
         struct list_head tcon_list;
         int tc_count;
         struct list_head rlist; /* reconnect list */
+       atomic_t num_local_opens;  /* num of all opens including disconnected */
+       atomic_t num_remote_opens; /* num of all network opens on server */
         struct list_head openFileList;
         spinlock_t open_file_lock; /* protects list above */
         struct cifs_ses *ses;   /* pointer to session associated with */
@@ -1072,7 +1082,8 @@ struct cifsLockInfo {
         __u64 offset;
         __u64 length;
         __u32 pid;
-       __u32 type;
+       __u16 type;
+       __u16 flags;
  };
  
  /*
@@ -1715,6 +1726,7 @@ GLOBAL_EXTERN atomic_t bufAllocCount;    /* current number allocated  */
  #ifdef CONFIG_CIFS_STATS2
  GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */
  GLOBAL_EXTERN atomic_t totSmBufAllocCount;
+extern unsigned int slow_rsp_threshold; /* number of secs before logging */
  #endif
  GLOBAL_EXTERN atomic_t smBufAllocCount;
  GLOBAL_EXTERN atomic_t midCount;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h

index 20adda4..fa361bc 100644 (file)
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -219,7 +219,7 @@ extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
  extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon);
  
  extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
-                                   __u64 length, __u8 type,
+                                   __u64 length, __u8 type, __u16 flags,
                                     struct cifsLockInfo **conf_lock,
                                     int rw_check);
  extern void cifs_add_pending_open(struct cifs_fid *fid,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c

index 5657b79..f82fd34 100644 (file)
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1607,6 +1607,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
         struct smb_rqst rqst = { .rq_iov = rdata->iov,
                                  .rq_nvec = 2,
                                  .rq_pages = rdata->pages,
+                                .rq_offset = rdata->page_offset,
                                  .rq_npages = rdata->nr_pages,
                                  .rq_pagesz = rdata->pagesz,
                                  .rq_tailsz = rdata->tailsz };
@@ -2210,6 +2211,7 @@ cifs_async_writev(struct cifs_writedata *wdata,
         rqst.rq_iov = iov;
         rqst.rq_nvec = 2;
         rqst.rq_pages = wdata->pages;
+       rqst.rq_offset = wdata->page_offset;
         rqst.rq_npages = wdata->nr_pages;
         rqst.rq_pagesz = wdata->pagesz;
         rqst.rq_tailsz = wdata->tailsz;
@@ -5027,6 +5029,13 @@ oldQFSInfoRetry:
                                 le16_to_cpu(response_data->BytesPerSector) *
                                 le32_to_cpu(response_data->
                                         SectorsPerAllocationUnit);
+                       /*
+                        * much prefer larger but if server doesn't report
+                        * a valid size than 4K is a reasonable minimum
+                        */
+                       if (FSData->f_bsize < 512)
+                               FSData->f_bsize = 4096;
+
                         FSData->f_blocks =
                                le32_to_cpu(response_data->TotalAllocationUnits);
                         FSData->f_bfree = FSData->f_bavail =
@@ -5107,6 +5116,13 @@ QFSInfoRetry:
                             le32_to_cpu(response_data->BytesPerSector) *
                             le32_to_cpu(response_data->
                                         SectorsPerAllocationUnit);
+                       /*
+                        * much prefer larger but if server doesn't report
+                        * a valid size than 4K is a reasonable minimum
+                        */
+                       if (FSData->f_bsize < 512)
+                               FSData->f_bsize = 4096;
+
                         FSData->f_blocks =
                             le64_to_cpu(response_data->TotalAllocationUnits);
                         FSData->f_bfree = FSData->f_bavail =
@@ -5470,6 +5486,13 @@ QFSPosixRetry:
                                  data_offset);
                         FSData->f_bsize =
                                         le32_to_cpu(response_data->BlockSize);
+                       /*
+                        * much prefer larger but if server doesn't report
+                        * a valid size than 4K is a reasonable minimum
+                        */
+                       if (FSData->f_bsize < 512)
+                               FSData->f_bsize = 4096;
+
                         FSData->f_blocks =
                                         le64_to_cpu(response_data->TotalBlocks);
                         FSData->f_bfree =
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c

index 52d71b6..d82f0cc 100644 (file)
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -250,6 +250,7 @@ static const match_table_t cifs_mount_option_tokens = {
         { Opt_ignore, "dev" },
         { Opt_ignore, "mand" },
         { Opt_ignore, "nomand" },
+       { Opt_ignore, "relatime" },
         { Opt_ignore, "_netdev" },
  
         { Opt_err, NULL }
@@ -347,7 +348,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
         server->maxBuf = 0;
         server->max_read = 0;
  
-       cifs_dbg(FYI, "Reconnecting tcp session\n");
+       cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
         trace_smb3_reconnect(server->CurrentMid, server->hostname);
  
         /* before reconnecting the tcp session, mark the smb session (uid)
@@ -2396,6 +2397,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
                 volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
         tcp_ses->session_estab = false;
         tcp_ses->sequence_number = 0;
+       tcp_ses->reconnect_instance = 0;
         tcp_ses->lstrp = jiffies;
         spin_lock_init(&tcp_ses->req_lock);
         INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
@@ -3085,10 +3087,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
         if (rc)
                 goto out_fail;
  
-       if (volume_info->nodfs) {
-               tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
-               cifs_dbg(FYI, "DFS disabled (%d)\n", tcon->Flags);
-       }
         tcon->use_persistent = false;
         /* check if SMB2 or later, CIFS does not support persistent handles */
         if (volume_info->persistent) {
@@ -3663,6 +3661,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
         cifs_sb->actimeo = pvolume_info->actimeo;
         cifs_sb->local_nls = pvolume_info->local_nls;
  
+       if (pvolume_info->nodfs)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS;
         if (pvolume_info->noperm)
                 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
         if (pvolume_info->setuids)
@@ -3819,6 +3819,9 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
         struct dfs_info3_param *referrals = NULL;
         char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
  
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+               return -EREMOTE;
+
         full_path = build_unc_path_to_root(volume_info, cifs_sb);
         if (IS_ERR(full_path))
                 return PTR_ERR(full_path);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c

index 8d41ca7..c620d4b 100644 (file)
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -334,6 +334,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
         server->ops->set_fid(cfile, fid, oplock);
  
         list_add(&cfile->tlist, &tcon->openFileList);
+       atomic_inc(&tcon->num_local_opens);
  
         /* if readable file instance put first in list*/
         if (file->f_mode & FMODE_READ)
@@ -395,6 +396,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
         /* remove it from the lists */
         list_del(&cifs_file->flist);
         list_del(&cifs_file->tlist);
+       atomic_dec(&tcon->num_local_opens);
  
         if (list_empty(&cifsi->openFileList)) {
                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
@@ -864,7 +866,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
  }
  
  static struct cifsLockInfo *
-cifs_lock_init(__u64 offset, __u64 length, __u8 type)
+cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
  {
         struct cifsLockInfo *lock =
                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
@@ -874,6 +876,7 @@ cifs_lock_init(__u64 offset, __u64 length, __u8 type)
         lock->length = length;
         lock->type = type;
         lock->pid = current->tgid;
+       lock->flags = flags;
         INIT_LIST_HEAD(&lock->blist);
         init_waitqueue_head(&lock->block_q);
         return lock;
@@ -896,7 +899,8 @@ cifs_del_lock_waiters(struct cifsLockInfo *lock)
  /* @rw_check : 0 - no op, 1 - read, 2 - write */
  static bool
  cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
-                           __u64 length, __u8 type, struct cifsFileInfo *cfile,
+                           __u64 length, __u8 type, __u16 flags,
+                           struct cifsFileInfo *cfile,
                             struct cifsLockInfo **conf_lock, int rw_check)
  {
         struct cifsLockInfo *li;
@@ -918,6 +922,10 @@ cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
                     ((server->ops->compare_fids(cfile, cur_cfile) &&
                      current->tgid == li->pid) || type == li->type))
                         continue;
+               if (rw_check == CIFS_LOCK_OP &&
+                   (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
+                   server->ops->compare_fids(cfile, cur_cfile))
+                       continue;
                 if (conf_lock)
                         *conf_lock = li;
                 return true;
@@ -927,8 +935,8 @@ cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
  
  bool
  cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
-                       __u8 type, struct cifsLockInfo **conf_lock,
-                       int rw_check)
+                       __u8 type, __u16 flags,
+                       struct cifsLockInfo **conf_lock, int rw_check)
  {
         bool rc = false;
         struct cifs_fid_locks *cur;
@@ -936,7 +944,8 @@ cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
  
         list_for_each_entry(cur, &cinode->llist, llist) {
                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
-                                                cfile, conf_lock, rw_check);
+                                                flags, cfile, conf_lock,
+                                                rw_check);
                 if (rc)
                         break;
         }
@@ -964,7 +973,8 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
         down_read(&cinode->lock_sem);
  
         exist = cifs_find_lock_conflict(cfile, offset, length, type,
-                                       &conf_lock, CIFS_LOCK_OP);
+                                       flock->fl_flags, &conf_lock,
+                                       CIFS_LOCK_OP);
         if (exist) {
                 flock->fl_start = conf_lock->offset;
                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
@@ -1011,7 +1021,8 @@ try_again:
         down_write(&cinode->lock_sem);
  
         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
-                                       lock->type, &conf_lock, CIFS_LOCK_OP);
+                                       lock->type, lock->flags, &conf_lock,
+                                       CIFS_LOCK_OP);
         if (!exist && cinode->can_cache_brlcks) {
                 list_add_tail(&lock->llist, &cfile->llist->locks);
                 up_write(&cinode->lock_sem);
@@ -1321,7 +1332,7 @@ cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
         if (flock->fl_flags &
             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
-              FL_ACCESS | FL_LEASE | FL_CLOSE)))
+              FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
  
         *type = server->vals->large_lock_type;
@@ -1584,7 +1595,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
         if (lock) {
                 struct cifsLockInfo *lock;
  
-               lock = cifs_lock_init(flock->fl_start, length, type);
+               lock = cifs_lock_init(flock->fl_start, length, type,
+                                     flock->fl_flags);
                 if (!lock)
                         return -ENOMEM;
  
@@ -1653,7 +1665,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
  
         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
                         tcon->ses->server);
-
         cifs_sb = CIFS_FILE_SB(file);
         netfid = cfile->fid.netfid;
         cinode = CIFS_I(file_inode(file));
@@ -2098,6 +2109,7 @@ static int cifs_writepages(struct address_space *mapping,
         pgoff_t end, index;
         struct cifs_writedata *wdata;
         int rc = 0;
+       unsigned int xid;
  
         /*
          * If wsize is smaller than the page cache size, default to writing
@@ -2106,6 +2118,7 @@ static int cifs_writepages(struct address_space *mapping,
         if (cifs_sb->wsize < PAGE_SIZE)
                 return generic_writepages(mapping, wbc);
  
+       xid = get_xid();
         if (wbc->range_cyclic) {
                 index = mapping->writeback_index; /* Start from prev offset */
                 end = -1;
@@ -2199,6 +2212,7 @@ retry:
         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                 mapping->writeback_index = index;
  
+       free_xid(xid);
         return rc;
  }
  
@@ -2817,8 +2831,8 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
                 goto out;
  
         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
-                                    server->vals->exclusive_lock_type, NULL,
-                                    CIFS_WRITE_OP))
+                                    server->vals->exclusive_lock_type, 0,
+                                    NULL, CIFS_WRITE_OP))
                 rc = __generic_file_write_iter(iocb, from);
         else
                 rc = -EACCES;
@@ -3388,7 +3402,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
         down_read(&cinode->lock_sem);
         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
                                      tcon->ses->server->vals->shared_lock_type,
-                                    NULL, CIFS_READ_OP))
+                                    0, NULL, CIFS_READ_OP))
                 rc = generic_file_read_iter(iocb, to);
         up_read(&cinode->lock_sem);
         return rc;
@@ -3743,7 +3757,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
         struct TCP_Server_Info *server;
         pid_t pid;
+       unsigned int xid;
  
+       xid = get_xid();
         /*
          * Reads as many pages as possible from fscache. Returns -ENOBUFS
          * immediately if the cookie is negative
@@ -3753,8 +3769,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
          */
         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
                                          &num_pages);
-       if (rc == 0)
+       if (rc == 0) {
+               free_xid(xid);
                 return rc;
+       }
  
         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
                 pid = open_file->pid;
@@ -3798,6 +3816,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
                  */
                 if (unlikely(rsize < PAGE_SIZE)) {
                         add_credits_and_wake_if(server, credits, 0);
+                       free_xid(xid);
                         return 0;
                 }
  
@@ -3862,6 +3881,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
          * allocator.
          */
         cifs_fscache_readpages_cancel(mapping->host, page_list);
+       free_xid(xid);
         return rc;
  }
  
@@ -3889,8 +3909,12 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
         else
                 cifs_dbg(FYI, "Bytes read %d\n", rc);
  
-       file_inode(file)->i_atime =
-               current_time(file_inode(file));
+       /* we do not want atime to be less than mtime, it broke some apps */
+       file_inode(file)->i_atime = current_time(file_inode(file));
+       if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
+               file_inode(file)->i_atime = file_inode(file)->i_mtime;
+       else
+               file_inode(file)->i_atime = current_time(file_inode(file));
  
         if (PAGE_SIZE > rc)
                 memset(read_data + rc, 0, PAGE_SIZE - rc);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c

index 6e8765f..1023d78 100644 (file)
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -162,7 +162,11 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
         cifs_revalidate_cache(inode, fattr);
  
         spin_lock(&inode->i_lock);
-       inode->i_atime = fattr->cf_atime;
+       /* we do not want atime to be less than mtime, it broke some apps */
+       if (timespec64_compare(&fattr->cf_atime, &fattr->cf_mtime))
+               inode->i_atime = fattr->cf_mtime;
+       else
+               inode->i_atime = fattr->cf_atime;
         inode->i_mtime = fattr->cf_mtime;
         inode->i_ctime = fattr->cf_ctime;
         inode->i_rdev = fattr->cf_rdev;
@@ -777,38 +781,53 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
         } else if (rc == -EREMOTE) {
                 cifs_create_dfs_fattr(&fattr, sb);
                 rc = 0;
-       } else if (rc == -EACCES && backup_cred(cifs_sb)) {
-                       srchinf = kzalloc(sizeof(struct cifs_search_info),
-                                               GFP_KERNEL);
-                       if (srchinf == NULL) {
-                               rc = -ENOMEM;
-                               goto cgii_exit;
-                       }
+       } else if ((rc == -EACCES) && backup_cred(cifs_sb) &&
+                  (strcmp(server->vals->version_string, SMB1_VERSION_STRING)
+                     == 0)) {
+               /*
+                * For SMB2 and later the backup intent flag is already
+                * sent if needed on open and there is no path based
+                * FindFirst operation to use to retry with
+                */
  
-                       srchinf->endOfSearch = false;
+               srchinf = kzalloc(sizeof(struct cifs_search_info),
+                                       GFP_KERNEL);
+               if (srchinf == NULL) {
+                       rc = -ENOMEM;
+                       goto cgii_exit;
+               }
+
+               srchinf->endOfSearch = false;
+               if (tcon->unix_ext)
+                       srchinf->info_level = SMB_FIND_FILE_UNIX;
+               else if ((tcon->ses->capabilities &
+                        tcon->ses->server->vals->cap_nt_find) == 0)
+                       srchinf->info_level = SMB_FIND_FILE_INFO_STANDARD;
+               else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
                         srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
+               else /* no srvino useful for fallback to some netapp */
+                       srchinf->info_level = SMB_FIND_FILE_DIRECTORY_INFO;
  
-                       srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
-                                       CIFS_SEARCH_CLOSE_AT_END |
-                                       CIFS_SEARCH_BACKUP_SEARCH;
+               srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
+                               CIFS_SEARCH_CLOSE_AT_END |
+                               CIFS_SEARCH_BACKUP_SEARCH;
  
-                       rc = CIFSFindFirst(xid, tcon, full_path,
-                               cifs_sb, NULL, srchflgs, srchinf, false);
-                       if (!rc) {
-                               data =
-                               (FILE_ALL_INFO *)srchinf->srch_entries_start;
+               rc = CIFSFindFirst(xid, tcon, full_path,
+                       cifs_sb, NULL, srchflgs, srchinf, false);
+               if (!rc) {
+                       data = (FILE_ALL_INFO *)srchinf->srch_entries_start;
  
-                               cifs_dir_info_to_fattr(&fattr,
-                               (FILE_DIRECTORY_INFO *)data, cifs_sb);
-                               fattr.cf_uniqueid = le64_to_cpu(
-                               ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
-                               validinum = true;
+                       cifs_dir_info_to_fattr(&fattr,
+                       (FILE_DIRECTORY_INFO *)data, cifs_sb);
+                       fattr.cf_uniqueid = le64_to_cpu(
+                       ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
+                       validinum = true;
  
-                               cifs_buf_release(srchinf->ntwrk_buf_start);
-                       }
-                       kfree(srchinf);
-                       if (rc)
-                               goto cgii_exit;
+                       cifs_buf_release(srchinf->ntwrk_buf_start);
+               }
+               kfree(srchinf);
+               if (rc)
+                       goto cgii_exit;
         } else
                 goto cgii_exit;
  
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c

index 54f32f9..76ddd98 100644 (file)
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -32,8 +32,51 @@
  #include "cifs_debug.h"
  #include "cifsfs.h"
  #include "cifs_ioctl.h"
+#include "smb2proto.h"
  #include <linux/btrfs.h>
  
+static long cifs_ioctl_query_info(unsigned int xid, struct file *filep,
+                                 unsigned long p)
+{
+       struct inode *inode = file_inode(filep);
+       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+       struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+       struct dentry *dentry = filep->f_path.dentry;
+       unsigned char *path;
+       __le16 *utf16_path = NULL, root_path;
+       int rc = 0;
+
+       path = build_path_from_dentry(dentry);
+       if (path == NULL)
+               return -ENOMEM;
+
+       cifs_dbg(FYI, "%s %s\n", __func__, path);
+
+       if (!path[0]) {
+               root_path = 0;
+               utf16_path = &root_path;
+       } else {
+               utf16_path = cifs_convert_path_to_utf16(path + 1, cifs_sb);
+               if (!utf16_path) {
+                       rc = -ENOMEM;
+                       goto ici_exit;
+               }
+       }
+
+       if (tcon->ses->server->ops->ioctl_query_info)
+               rc = tcon->ses->server->ops->ioctl_query_info(
+                               xid, tcon, utf16_path,
+                               filep->private_data ? 0 : 1, p);
+       else
+               rc = -EOPNOTSUPP;
+
+ ici_exit:
+       if (utf16_path != &root_path)
+               kfree(utf16_path);
+       kfree(path);
+       return rc;
+}
+
  static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file,
                         unsigned long srcfd)
  {
@@ -123,7 +166,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
         struct inode *inode = file_inode(filep);
         int rc = -ENOTTY; /* strange error - but the precedent */
         unsigned int xid;
-       struct cifs_sb_info *cifs_sb;
         struct cifsFileInfo *pSMBFile = filep->private_data;
         struct cifs_tcon *tcon;
         __u64   ExtAttrBits = 0;
@@ -131,7 +173,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
  
         xid = get_xid();
  
-       cifs_sb = CIFS_SB(inode->i_sb);
         cifs_dbg(FYI, "cifs ioctl 0x%x\n", command);
         switch (command) {
                 case FS_IOC_GETFLAGS:
@@ -196,6 +237,9 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                 case CIFS_IOC_COPYCHUNK_FILE:
                         rc = cifs_ioctl_copychunk(xid, filep, arg);
                         break;
+               case CIFS_QUERY_INFO:
+                       rc = cifs_ioctl_query_info(xid, filep, arg);
+                       break;
                 case CIFS_IOC_SET_INTEGRITY:
                         if (pSMBFile == NULL)
                                 break;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c

index 6926685..fc43d5d 100644 (file)
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -123,6 +123,8 @@ tconInfoAlloc(void)
                 ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid),
                                              GFP_KERNEL);
                 spin_lock_init(&ret_buf->stat_lock);
+               atomic_set(&ret_buf->num_local_opens, 0);
+               atomic_set(&ret_buf->num_remote_opens, 0);
         }
         return ret_buf;
  }
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h

index 0ffa180..dd10f0c 100644 (file)
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -33,7 +33,7 @@
  
  /*
   * Identifiers for functions that use the open, operation, close pattern
- * in smb2inode.c:smb2_open_op_close()
+ * in smb2inode.c:smb2_compound_op()
   */
  #define SMB2_OP_SET_DELETE 1
  #define SMB2_OP_SET_INFO 2
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c

index 1eef179..9e7ef7e 100644 (file)
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -38,54 +38,83 @@
  #include "smb2proto.h"
  
  static int
-smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
-                  struct cifs_sb_info *cifs_sb, const char *full_path,
-                  __u32 desired_access, __u32 create_disposition,
-                  __u32 create_options, void *data, int command)
+smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
+                struct cifs_sb_info *cifs_sb, const char *full_path,
+                __u32 desired_access, __u32 create_disposition,
+                __u32 create_options, void *ptr, int command)
  {
-       int rc, tmprc = 0;
+       int rc;
         __le16 *utf16_path = NULL;
         __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
         struct cifs_open_parms oparms;
         struct cifs_fid fid;
-       bool use_cached_root_handle = false;
-
-       if ((strcmp(full_path, "") == 0) && (create_options == 0) &&
-           (desired_access == FILE_READ_ATTRIBUTES) &&
-           (create_disposition == FILE_OPEN) &&
-           (tcon->nohandlecache == false)) {
-               rc = open_shroot(xid, tcon, &fid);
-               if (rc == 0)
-                       use_cached_root_handle = true;
-       }
+       struct cifs_ses *ses = tcon->ses;
+       struct TCP_Server_Info *server = ses->server;
+       int num_rqst = 0;
+       struct smb_rqst rqst[3];
+       int resp_buftype[3];
+       struct kvec rsp_iov[3];
+       struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
+       struct kvec qi_iov[1];
+       struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE];
+       struct kvec close_iov[1];
+       struct smb2_query_info_rsp *qi_rsp = NULL;
+       int flags = 0;
+       __u8 delete_pending[8] = {1, 0, 0, 0, 0, 0, 0, 0};
+       unsigned int size[2];
+       void *data[2];
+       struct smb2_file_rename_info rename_info;
+       struct smb2_file_link_info link_info;
+       int len;
  
-       if (use_cached_root_handle == false) {
-               utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
-               if (!utf16_path)
-                       return -ENOMEM;
-
-               oparms.tcon = tcon;
-               oparms.desired_access = desired_access;
-               oparms.disposition = create_disposition;
-               oparms.create_options = create_options;
-               oparms.fid = &fid;
-               oparms.reconnect = false;
-
-               rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL,
-                              NULL);
-               if (rc) {
-                       kfree(utf16_path);
-                       return rc;
-               }
-       }
+       if (smb3_encryption_required(tcon))
+               flags |= CIFS_TRANSFORM_REQ;
+
+       memset(rqst, 0, sizeof(rqst));
+       resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
+       memset(rsp_iov, 0, sizeof(rsp_iov));
+
+       /* Open */
+       utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+       if (!utf16_path)
+               return -ENOMEM;
+
+       oparms.tcon = tcon;
+       oparms.desired_access = desired_access;
+       oparms.disposition = create_disposition;
+       oparms.create_options = create_options;
+       if (backup_cred(cifs_sb))
+               oparms.create_options |= CREATE_OPEN_BACKUP_INTENT;
+       oparms.fid = &fid;
+       oparms.reconnect = false;
  
+       memset(&open_iov, 0, sizeof(open_iov));
+       rqst[num_rqst].rq_iov = open_iov;
+       rqst[num_rqst].rq_nvec = SMB2_CREATE_IOV_SIZE;
+       rc = SMB2_open_init(tcon, &rqst[num_rqst], &oplock, &oparms,
+                           utf16_path);
+       kfree(utf16_path);
+       if (rc)
+               goto finished;
+
+       smb2_set_next_command(server, &rqst[num_rqst++]);
+
+       /* Operation */
         switch (command) {
-       case SMB2_OP_DELETE:
-               break;
         case SMB2_OP_QUERY_INFO:
-               tmprc = SMB2_query_info(xid, tcon, fid.persistent_fid,
-                                       fid.volatile_fid,
-                                       (struct smb2_file_all_info *)data);
+               memset(&qi_iov, 0, sizeof(qi_iov));
+               rqst[num_rqst].rq_iov = qi_iov;
+               rqst[num_rqst].rq_nvec = 1;
+
+               rc = SMB2_query_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+                               COMPOUND_FID, FILE_ALL_INFORMATION,
+                               SMB2_O_INFO_FILE, 0,
+                               sizeof(struct smb2_file_all_info) +
+                                         PATH_MAX * 2, 0, NULL);
+               smb2_set_next_command(server, &rqst[num_rqst]);
+               smb2_set_related(&rqst[num_rqst++]);
+               break;
+       case SMB2_OP_DELETE:
                 break;
         case SMB2_OP_MKDIR:
                 /*
@@ -94,39 +123,156 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
                  */
                 break;
         case SMB2_OP_RMDIR:
-               tmprc = SMB2_rmdir(xid, tcon, fid.persistent_fid,
-                                  fid.volatile_fid);
-               break;
-       case SMB2_OP_RENAME:
-               tmprc = SMB2_rename(xid, tcon, fid.persistent_fid,
-                                   fid.volatile_fid, (__le16 *)data);
-               break;
-       case SMB2_OP_HARDLINK:
-               tmprc = SMB2_set_hardlink(xid, tcon, fid.persistent_fid,
-                                         fid.volatile_fid, (__le16 *)data);
+               memset(&si_iov, 0, sizeof(si_iov));
+               rqst[num_rqst].rq_iov = si_iov;
+               rqst[num_rqst].rq_nvec = 1;
+
+               size[0] = 8;
+               data[0] = &delete_pending[0];
+
+               rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+                                       COMPOUND_FID, current->tgid,
+                                       FILE_DISPOSITION_INFORMATION,
+                                       SMB2_O_INFO_FILE, 0, data, size);
+               smb2_set_next_command(server, &rqst[num_rqst]);
+               smb2_set_related(&rqst[num_rqst++]);
                 break;
         case SMB2_OP_SET_EOF:
-               tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid,
-                                    fid.volatile_fid, current->tgid,
-                                    (__le64 *)data, false);
+               memset(&si_iov, 0, sizeof(si_iov));
+               rqst[num_rqst].rq_iov = si_iov;
+               rqst[num_rqst].rq_nvec = 1;
+
+               size[0] = 8; /* sizeof __le64 */
+               data[0] = ptr;
+
+               rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+                                       COMPOUND_FID, current->tgid,
+                                       FILE_END_OF_FILE_INFORMATION,
+                                       SMB2_O_INFO_FILE, 0, data, size);
+               smb2_set_next_command(server, &rqst[num_rqst]);
+               smb2_set_related(&rqst[num_rqst++]);
                 break;
         case SMB2_OP_SET_INFO:
-               tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid,
-                                     fid.volatile_fid,
-                                     (FILE_BASIC_INFO *)data);
+               memset(&si_iov, 0, sizeof(si_iov));
+               rqst[num_rqst].rq_iov = si_iov;
+               rqst[num_rqst].rq_nvec = 1;
+
+
+               size[0] = sizeof(FILE_BASIC_INFO);
+               data[0] = ptr;
+
+               rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+                                       COMPOUND_FID, current->tgid,
+                                       FILE_BASIC_INFORMATION,
+                                       SMB2_O_INFO_FILE, 0, data, size);
+               smb2_set_next_command(server, &rqst[num_rqst]);
+               smb2_set_related(&rqst[num_rqst++]);
+               break;
+       case SMB2_OP_RENAME:
+               memset(&si_iov, 0, sizeof(si_iov));
+               rqst[num_rqst].rq_iov = si_iov;
+               rqst[num_rqst].rq_nvec = 2;
+
+               len = (2 * UniStrnlen((wchar_t *)ptr, PATH_MAX));
+
+               rename_info.ReplaceIfExists = 1;
+               rename_info.RootDirectory = 0;
+               rename_info.FileNameLength = cpu_to_le32(len);
+
+               size[0] = sizeof(struct smb2_file_rename_info);
+               data[0] = &rename_info;
+
+               size[1] = len + 2 /* null */;
+               data[1] = (__le16 *)ptr;
+
+               rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+                                       COMPOUND_FID, current->tgid,
+                                       FILE_RENAME_INFORMATION,
+                                       SMB2_O_INFO_FILE, 0, data, size);
+               smb2_set_next_command(server, &rqst[num_rqst]);
+               smb2_set_related(&rqst[num_rqst++]);
+               break;
+       case SMB2_OP_HARDLINK:
+               memset(&si_iov, 0, sizeof(si_iov));
+               rqst[num_rqst].rq_iov = si_iov;
+               rqst[num_rqst].rq_nvec = 2;
+
+               len = (2 * UniStrnlen((wchar_t *)ptr, PATH_MAX));
+
+               link_info.ReplaceIfExists = 0;
+               link_info.RootDirectory = 0;
+               link_info.FileNameLength = cpu_to_le32(len);
+
+               size[0] = sizeof(struct smb2_file_link_info);
+               data[0] = &link_info;
+
+               size[1] = len + 2 /* null */;
+               data[1] = (__le16 *)ptr;
+
+               rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+                                       COMPOUND_FID, current->tgid,
+                                       FILE_LINK_INFORMATION,
+                                       SMB2_O_INFO_FILE, 0, data, size);
+               smb2_set_next_command(server, &rqst[num_rqst]);
+               smb2_set_related(&rqst[num_rqst++]);
                 break;
         default:
                 cifs_dbg(VFS, "Invalid command\n");
-               break;
+               rc = -EINVAL;
         }
+       if (rc)
+               goto finished;
  
-       if (use_cached_root_handle)
-               close_shroot(&tcon->crfid);
-       else
-               rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
-       if (tmprc)
-               rc = tmprc;
-       kfree(utf16_path);
+       /* Close */
+       memset(&close_iov, 0, sizeof(close_iov));
+       rqst[num_rqst].rq_iov = close_iov;
+       rqst[num_rqst].rq_nvec = 1;
+       rc = SMB2_close_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+                            COMPOUND_FID);
+       smb2_set_related(&rqst[num_rqst++]);
+       if (rc)
+               goto finished;
+
+       rc = compound_send_recv(xid, ses, flags, num_rqst, rqst,
+                               resp_buftype, rsp_iov);
+
+ finished:
+       SMB2_open_free(&rqst[0]);
+       switch (command) {
+       case SMB2_OP_QUERY_INFO:
+               if (rc == 0) {
+                       qi_rsp = (struct smb2_query_info_rsp *)
+                               rsp_iov[1].iov_base;
+                       rc = smb2_validate_and_copy_iov(
+                               le16_to_cpu(qi_rsp->OutputBufferOffset),
+                               le32_to_cpu(qi_rsp->OutputBufferLength),
+                               &rsp_iov[1], sizeof(struct smb2_file_all_info),
+                               ptr);
+               }
+               if (rqst[1].rq_iov)
+                       SMB2_query_info_free(&rqst[1]);
+               if (rqst[2].rq_iov)
+                       SMB2_close_free(&rqst[2]);
+               break;
+       case SMB2_OP_DELETE:
+       case SMB2_OP_MKDIR:
+               if (rqst[1].rq_iov)
+                       SMB2_close_free(&rqst[1]);
+               break;
+       case SMB2_OP_HARDLINK:
+       case SMB2_OP_RENAME:
+       case SMB2_OP_RMDIR:
+       case SMB2_OP_SET_EOF:
+       case SMB2_OP_SET_INFO:
+               if (rqst[1].rq_iov)
+                       SMB2_set_info_free(&rqst[1]);
+               if (rqst[2].rq_iov)
+                       SMB2_close_free(&rqst[2]);
+               break;
+       }
+       free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+       free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+       free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
         return rc;
  }
  
@@ -147,6 +293,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
  {
         int rc;
         struct smb2_file_all_info *smb2_data;
+       __u32 create_options = 0;
  
         *adjust_tz = false;
         *symlink = false;
@@ -155,17 +302,21 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
                             GFP_KERNEL);
         if (smb2_data == NULL)
                 return -ENOMEM;
+       if (backup_cred(cifs_sb))
+               create_options |= CREATE_OPEN_BACKUP_INTENT;
  
-       rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
-                               FILE_READ_ATTRIBUTES, FILE_OPEN, 0,
-                               smb2_data, SMB2_OP_QUERY_INFO);
+       rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
+                             FILE_READ_ATTRIBUTES, FILE_OPEN, create_options,
+                             smb2_data, SMB2_OP_QUERY_INFO);
         if (rc == -EOPNOTSUPP) {
                 *symlink = true;
+               create_options |= OPEN_REPARSE_POINT;
+
                 /* Failed on a symbolic link - query a reparse point info */
-               rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
-                                       FILE_READ_ATTRIBUTES, FILE_OPEN,
-                                       OPEN_REPARSE_POINT, smb2_data,
-                                       SMB2_OP_QUERY_INFO);
+               rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
+                                     FILE_READ_ATTRIBUTES, FILE_OPEN,
+                                     create_options, smb2_data,
+                                     SMB2_OP_QUERY_INFO);
         }
         if (rc)
                 goto out;
@@ -180,9 +331,9 @@ int
  smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
            struct cifs_sb_info *cifs_sb)
  {
-       return smb2_open_op_close(xid, tcon, cifs_sb, name,
-                                 FILE_WRITE_ATTRIBUTES, FILE_CREATE,
-                                 CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR);
+       return smb2_compound_op(xid, tcon, cifs_sb, name,
+                               FILE_WRITE_ATTRIBUTES, FILE_CREATE,
+                               CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR);
  }
  
  void
@@ -199,9 +350,9 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name,
         cifs_i = CIFS_I(inode);
         dosattrs = cifs_i->cifsAttrs | ATTR_READONLY;
         data.Attributes = cpu_to_le32(dosattrs);
-       tmprc = smb2_open_op_close(xid, tcon, cifs_sb, name,
-                                  FILE_WRITE_ATTRIBUTES, FILE_CREATE,
-                                  CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO);
+       tmprc = smb2_compound_op(xid, tcon, cifs_sb, name,
+                                FILE_WRITE_ATTRIBUTES, FILE_CREATE,
+                                CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO);
         if (tmprc == 0)
                 cifs_i->cifsAttrs = dosattrs;
  }
@@ -210,18 +361,18 @@ int
  smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
            struct cifs_sb_info *cifs_sb)
  {
-       return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
-                                 CREATE_NOT_FILE,
-                                 NULL, SMB2_OP_RMDIR);
+       return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
+                               CREATE_NOT_FILE,
+                               NULL, SMB2_OP_RMDIR);
  }
  
  int
  smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
             struct cifs_sb_info *cifs_sb)
  {
-       return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
-                                 CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
-                                 NULL, SMB2_OP_DELETE);
+       return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
+                               CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
+                               NULL, SMB2_OP_DELETE);
  }
  
  static int
@@ -238,8 +389,8 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
                 goto smb2_rename_path;
         }
  
-       rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access,
-                               FILE_OPEN, 0, smb2_to_name, command);
+       rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access,
+                             FILE_OPEN, 0, smb2_to_name, command);
  smb2_rename_path:
         kfree(smb2_to_name);
         return rc;
@@ -269,9 +420,10 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
                    struct cifs_sb_info *cifs_sb, bool set_alloc)
  {
         __le64 eof = cpu_to_le64(size);
-       return smb2_open_op_close(xid, tcon, cifs_sb, full_path,
-                                 FILE_WRITE_DATA, FILE_OPEN, 0, &eof,
-                                 SMB2_OP_SET_EOF);
+
+       return smb2_compound_op(xid, tcon, cifs_sb, full_path,
+                               FILE_WRITE_DATA, FILE_OPEN, 0, &eof,
+                               SMB2_OP_SET_EOF);
  }
  
  int
@@ -291,9 +443,9 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
         if (IS_ERR(tlink))
                 return PTR_ERR(tlink);
  
-       rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path,
-                               FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
-                               SMB2_OP_SET_INFO);
+       rc = smb2_compound_op(xid, tlink_tcon(tlink), cifs_sb, full_path,
+                             FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
+                             SMB2_OP_SET_INFO);
         cifs_put_tlink(tlink);
         return rc;
  }
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c

index 20a2d30..d47b7f5 100644 (file)
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -288,7 +288,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
         {STATUS_FLT_BUFFER_TOO_SMALL, -ENOBUFS, "STATUS_FLT_BUFFER_TOO_SMALL"},
         {STATUS_FVE_PARTIAL_METADATA, -EIO, "STATUS_FVE_PARTIAL_METADATA"},
         {STATUS_UNSUCCESSFUL, -EIO, "STATUS_UNSUCCESSFUL"},
-       {STATUS_NOT_IMPLEMENTED, -ENOSYS, "STATUS_NOT_IMPLEMENTED"},
+       {STATUS_NOT_IMPLEMENTED, -EOPNOTSUPP, "STATUS_NOT_IMPLEMENTED"},
         {STATUS_INVALID_INFO_CLASS, -EIO, "STATUS_INVALID_INFO_CLASS"},
         {STATUS_INFO_LENGTH_MISMATCH, -EIO, "STATUS_INFO_LENGTH_MISMATCH"},
         {STATUS_ACCESS_VIOLATION, -EACCES, "STATUS_ACCESS_VIOLATION"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c

index 89985a0..f85fc5a 100644 (file)
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -74,6 +74,12 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
         int *val, rc = 0;
         spin_lock(&server->req_lock);
         val = server->ops->get_credits_field(server, optype);
+
+       /* eg found case where write overlapping reconnect messed up credits */
+       if (((optype & CIFS_OP_MASK) == CIFS_NEG_OP) && (*val != 0))
+               trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
+                       server->hostname, *val);
+
         *val += add;
         if (*val > 65000) {
                 *val = 65000; /* Don't get near 64K credits, avoid srv bugs */
@@ -104,7 +110,12 @@ smb2_set_credits(struct TCP_Server_Info *server, const int val)
  {
         spin_lock(&server->req_lock);
         server->credits = val;
+       if (val == 1)
+               server->reconnect_instance++;
         spin_unlock(&server->req_lock);
+       /* don't log while holding the lock */
+       if (val == 1)
+               cifs_dbg(FYI, "set credits to 1 due to smb2 reconnect\n");
  }
  
  static int *
@@ -269,6 +280,31 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
         return wsize;
  }
  
+static unsigned int
+smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+       struct TCP_Server_Info *server = tcon->ses->server;
+       unsigned int wsize;
+
+       /* start with specified wsize, or default */
+       wsize = volume_info->wsize ? volume_info->wsize : SMB3_DEFAULT_IOSIZE;
+       wsize = min_t(unsigned int, wsize, server->max_write);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+       if (server->rdma) {
+               if (server->sign)
+                       wsize = min_t(unsigned int,
+                               wsize, server->smbd_conn->max_fragmented_send_size);
+               else
+                       wsize = min_t(unsigned int,
+                               wsize, server->smbd_conn->max_readwrite_size);
+       }
+#endif
+       if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
+               wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
+
+       return wsize;
+}
+
  static unsigned int
  smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
  {
@@ -295,6 +331,31 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
         return rsize;
  }
  
+static unsigned int
+smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+       struct TCP_Server_Info *server = tcon->ses->server;
+       unsigned int rsize;
+
+       /* start with specified rsize, or default */
+       rsize = volume_info->rsize ? volume_info->rsize : SMB3_DEFAULT_IOSIZE;
+       rsize = min_t(unsigned int, rsize, server->max_read);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+       if (server->rdma) {
+               if (server->sign)
+                       rsize = min_t(unsigned int,
+                               rsize, server->smbd_conn->max_fragmented_recv_size);
+               else
+                       rsize = min_t(unsigned int,
+                               rsize, server->smbd_conn->max_readwrite_size);
+       }
+#endif
+
+       if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
+               rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
+
+       return rsize;
+}
  
  static int
  parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
@@ -962,6 +1023,9 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
         seq_printf(m, "\nBytes read: %llu  Bytes written: %llu",
                    (long long)(tcon->bytes_read),
                    (long long)(tcon->bytes_written));
+       seq_printf(m, "\nOpen files: %d total (local), %d open on server",
+                  atomic_read(&tcon->num_local_opens),
+                  atomic_read(&tcon->num_remote_opens));
         seq_printf(m, "\nTreeConnects: %d total %d failed",
                    atomic_read(&sent[SMB2_TREE_CONNECT_HE]),
                    atomic_read(&failed[SMB2_TREE_CONNECT_HE]));
@@ -1057,6 +1121,131 @@ req_res_key_exit:
         return rc;
  }
  
+static int
+smb2_ioctl_query_info(const unsigned int xid,
+                     struct cifs_tcon *tcon,
+                     __le16 *path, int is_dir,
+                     unsigned long p)
+{
+       struct cifs_ses *ses = tcon->ses;
+       char __user *arg = (char __user *)p;
+       struct smb_query_info qi;
+       struct smb_query_info __user *pqi;
+       int rc = 0;
+       int flags = 0;
+       struct smb2_query_info_rsp *rsp = NULL;
+       void *buffer = NULL;
+       struct smb_rqst rqst[3];
+       int resp_buftype[3];
+       struct kvec rsp_iov[3];
+       struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
+       struct cifs_open_parms oparms;
+       u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+       struct cifs_fid fid;
+       struct kvec qi_iov[1];
+       struct kvec close_iov[1];
+
+       memset(rqst, 0, sizeof(rqst));
+       resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
+       memset(rsp_iov, 0, sizeof(rsp_iov));
+
+       if (copy_from_user(&qi, arg, sizeof(struct smb_query_info)))
+               return -EFAULT;
+
+       if (qi.output_buffer_length > 1024)
+               return -EINVAL;
+
+       if (!ses || !(ses->server))
+               return -EIO;
+
+       if (smb3_encryption_required(tcon))
+               flags |= CIFS_TRANSFORM_REQ;
+
+       buffer = kmalloc(qi.output_buffer_length, GFP_KERNEL);
+       if (buffer == NULL)
+               return -ENOMEM;
+
+       if (copy_from_user(buffer, arg + sizeof(struct smb_query_info),
+                          qi.output_buffer_length)) {
+               rc = -EFAULT;
+               goto iqinf_exit;
+       }
+
+       /* Open */
+       memset(&open_iov, 0, sizeof(open_iov));
+       rqst[0].rq_iov = open_iov;
+       rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE;
+
+       memset(&oparms, 0, sizeof(oparms));
+       oparms.tcon = tcon;
+       oparms.desired_access = FILE_READ_ATTRIBUTES | READ_CONTROL;
+       oparms.disposition = FILE_OPEN;
+       if (is_dir)
+               oparms.create_options = CREATE_NOT_FILE;
+       else
+               oparms.create_options = CREATE_NOT_DIR;
+       oparms.fid = &fid;
+       oparms.reconnect = false;
+
+       rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, path);
+       if (rc)
+               goto iqinf_exit;
+       smb2_set_next_command(ses->server, &rqst[0]);
+
+       /* Query */
+       memset(&qi_iov, 0, sizeof(qi_iov));
+       rqst[1].rq_iov = qi_iov;
+       rqst[1].rq_nvec = 1;
+
+       rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
+                                 qi.file_info_class, qi.info_type,
+                                 qi.additional_information,
+                                 qi.input_buffer_length,
+                                 qi.output_buffer_length, buffer);
+       if (rc)
+               goto iqinf_exit;
+       smb2_set_next_command(ses->server, &rqst[1]);
+       smb2_set_related(&rqst[1]);
+
+       /* Close */
+       memset(&close_iov, 0, sizeof(close_iov));
+       rqst[2].rq_iov = close_iov;
+       rqst[2].rq_nvec = 1;
+
+       rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID);
+       if (rc)
+               goto iqinf_exit;
+       smb2_set_related(&rqst[2]);
+
+       rc = compound_send_recv(xid, ses, flags, 3, rqst,
+                               resp_buftype, rsp_iov);
+       if (rc)
+               goto iqinf_exit;
+       pqi = (struct smb_query_info __user *)arg;
+       rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
+       if (le32_to_cpu(rsp->OutputBufferLength) < qi.input_buffer_length)
+               qi.input_buffer_length = le32_to_cpu(rsp->OutputBufferLength);
+       if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length,
+                        sizeof(qi.input_buffer_length))) {
+               rc = -EFAULT;
+               goto iqinf_exit;
+       }
+       if (copy_to_user(pqi + 1, rsp->Buffer, qi.input_buffer_length)) {
+               rc = -EFAULT;
+               goto iqinf_exit;
+       }
+
+ iqinf_exit:
+       kfree(buffer);
+       SMB2_open_free(&rqst[0]);
+       SMB2_query_info_free(&rqst[1]);
+       SMB2_close_free(&rqst[2]);
+       free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+       free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+       free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+       return rc;
+}
+
  static ssize_t
  smb2_copychunk_range(const unsigned int xid,
                         struct cifsFileInfo *srcfile,
@@ -1301,7 +1490,7 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
         }
  
         return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
-                           cfile->fid.volatile_fid, cfile->pid, &eof, false);
+                           cfile->fid.volatile_fid, cfile->pid, &eof);
  }
  
  static int
@@ -1556,7 +1745,7 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
                                  CIFS_CACHE_READ(cinode) ? 1 : 0);
  }
  
-static void
+void
  smb2_set_related(struct smb_rqst *rqst)
  {
         struct smb2_sync_hdr *shdr;
@@ -1567,7 +1756,7 @@ smb2_set_related(struct smb_rqst *rqst)
  
  char smb2_padding[7] = {0, 0, 0, 0, 0, 0, 0};
  
-static void
+void
  smb2_set_next_command(struct TCP_Server_Info *server, struct smb_rqst *rqst)
  {
         struct smb2_sync_hdr *shdr;
@@ -1610,7 +1799,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
                 flags |= CIFS_TRANSFORM_REQ;
  
         memset(rqst, 0, sizeof(rqst));
-       memset(resp_buftype, 0, sizeof(resp_buftype));
+       resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
         memset(rsp_iov, 0, sizeof(rsp_iov));
  
         memset(&open_iov, 0, sizeof(open_iov));
@@ -1636,7 +1825,8 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
         rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
                                   FS_FULL_SIZE_INFORMATION,
                                   SMB2_O_INFO_FILESYSTEM, 0,
-                                 sizeof(struct smb2_fs_full_size_info));
+                                 sizeof(struct smb2_fs_full_size_info), 0,
+                                 NULL);
         if (rc)
                 goto qfs_exit;
         smb2_set_next_command(server, &rqst[1]);
@@ -3303,6 +3493,7 @@ struct smb_version_operations smb20_operations = {
         .set_acl = set_smb2_acl,
  #endif /* CIFS_ACL */
         .next_header = smb2_next_header,
+       .ioctl_query_info = smb2_ioctl_query_info,
  };
  
  struct smb_version_operations smb21_operations = {
@@ -3398,6 +3589,7 @@ struct smb_version_operations smb21_operations = {
         .set_acl = set_smb2_acl,
  #endif /* CIFS_ACL */
         .next_header = smb2_next_header,
+       .ioctl_query_info = smb2_ioctl_query_info,
  };
  
  struct smb_version_operations smb30_operations = {
@@ -3425,8 +3617,8 @@ struct smb_version_operations smb30_operations = {
         .downgrade_oplock = smb2_downgrade_oplock,
         .need_neg = smb2_need_neg,
         .negotiate = smb2_negotiate,
-       .negotiate_wsize = smb2_negotiate_wsize,
-       .negotiate_rsize = smb2_negotiate_rsize,
+       .negotiate_wsize = smb3_negotiate_wsize,
+       .negotiate_rsize = smb3_negotiate_rsize,
         .sess_setup = SMB2_sess_setup,
         .logoff = SMB2_logoff,
         .tree_connect = SMB2_tcon,
@@ -3502,6 +3694,7 @@ struct smb_version_operations smb30_operations = {
         .set_acl = set_smb2_acl,
  #endif /* CIFS_ACL */
         .next_header = smb2_next_header,
+       .ioctl_query_info = smb2_ioctl_query_info,
  };
  
  struct smb_version_operations smb311_operations = {
@@ -3529,8 +3722,8 @@ struct smb_version_operations smb311_operations = {
         .downgrade_oplock = smb2_downgrade_oplock,
         .need_neg = smb2_need_neg,
         .negotiate = smb2_negotiate,
-       .negotiate_wsize = smb2_negotiate_wsize,
-       .negotiate_rsize = smb2_negotiate_rsize,
+       .negotiate_wsize = smb3_negotiate_wsize,
+       .negotiate_rsize = smb3_negotiate_rsize,
         .sess_setup = SMB2_sess_setup,
         .logoff = SMB2_logoff,
         .tree_connect = SMB2_tcon,
@@ -3607,6 +3800,7 @@ struct smb_version_operations smb311_operations = {
         .set_acl = set_smb2_acl,
  #endif /* CIFS_ACL */
         .next_header = smb2_next_header,
+       .ioctl_query_info = smb2_ioctl_query_info,
  };
  
  struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c

index f54d07b..7d7b016 100644 (file)
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1478,7 +1478,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
  
         /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
         tcon->tid = 0;
-
+       atomic_set(&tcon->num_remote_opens, 0);
         rc = smb2_plain_req_init(SMB2_TREE_CONNECT, tcon, (void **) &req,
                              &total_len);
         if (rc) {
@@ -2243,10 +2243,12 @@ SMB2_open_free(struct smb_rqst *rqst)
  {
         int i;
  
-       cifs_small_buf_release(rqst->rq_iov[0].iov_base);
-       for (i = 1; i < rqst->rq_nvec; i++)
-               if (rqst->rq_iov[i].iov_base != smb2_padding)
-                       kfree(rqst->rq_iov[i].iov_base);
+       if (rqst && rqst->rq_iov) {
+               cifs_small_buf_release(rqst->rq_iov[0].iov_base);
+               for (i = 1; i < rqst->rq_nvec; i++)
+                       if (rqst->rq_iov[i].iov_base != smb2_padding)
+                               kfree(rqst->rq_iov[i].iov_base);
+       }
  }
  
  int
@@ -2261,7 +2263,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
         struct cifs_ses *ses = tcon->ses;
         struct kvec iov[SMB2_CREATE_IOV_SIZE];
         struct kvec rsp_iov = {NULL, 0};
-       int resp_buftype;
+       int resp_buftype = CIFS_NO_BUFFER;
         int rc = 0;
         int flags = 0;
  
@@ -2303,6 +2305,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
                                      ses->Suid, oparms->create_options,
                                      oparms->desired_access);
  
+       atomic_inc(&tcon->num_remote_opens);
         oparms->fid->persistent_fid = rsp->PersistentFileId;
         oparms->fid->volatile_fid = rsp->VolatileFileId;
  
@@ -2474,13 +2477,13 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
                 goto ioctl_exit;
         }
  
-       *out_data = kmalloc(*plen, GFP_KERNEL);
+       *out_data = kmemdup((char *)rsp + le32_to_cpu(rsp->OutputOffset),
+                           *plen, GFP_KERNEL);
         if (*out_data == NULL) {
                 rc = -ENOMEM;
                 goto ioctl_exit;
         }
  
-       memcpy(*out_data, (char *)rsp + le32_to_cpu(rsp->OutputOffset), *plen);
  ioctl_exit:
         free_rsp_buf(resp_buftype, rsp);
         return rc;
@@ -2535,7 +2538,8 @@ SMB2_close_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
  void
  SMB2_close_free(struct smb_rqst *rqst)
  {
-       cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+       if (rqst && rqst->rq_iov)
+               cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
  }
  
  int
@@ -2547,7 +2551,7 @@ SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
         struct cifs_ses *ses = tcon->ses;
         struct kvec iov[1];
         struct kvec rsp_iov;
-       int resp_buftype;
+       int resp_buftype = CIFS_NO_BUFFER;
         int rc = 0;
  
         cifs_dbg(FYI, "Close\n");
@@ -2577,6 +2581,8 @@ SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
                 goto close_exit;
         }
  
+       atomic_dec(&tcon->num_remote_opens);
+
         /* BB FIXME - decode close response, update inode for caching */
  
  close_exit:
@@ -2627,10 +2633,10 @@ smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
   * If SMB buffer fields are valid, copy into temporary buffer to hold result.
   * Caller must free buffer.
   */
-static int
-validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
-                     struct kvec *iov, unsigned int minbufsize,
-                     char *data)
+int
+smb2_validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
+                          struct kvec *iov, unsigned int minbufsize,
+                          char *data)
  {
         char *begin_of_buf = offset + (char *)iov->iov_base;
         int rc;
@@ -2651,7 +2657,7 @@ int
  SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
                      u64 persistent_fid, u64 volatile_fid,
                      u8 info_class, u8 info_type, u32 additional_info,
-                    size_t output_len)
+                    size_t output_len, size_t input_len, void *input)
  {
         struct smb2_query_info_req *req;
         struct kvec *iov = rqst->rq_iov;
@@ -2669,23 +2675,25 @@ SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
         req->VolatileFileId = volatile_fid;
         req->AdditionalInformation = cpu_to_le32(additional_info);
  
-       /*
-        * We do not use the input buffer (do not send extra byte)
-        */
-       req->InputBufferOffset = 0;
-
         req->OutputBufferLength = cpu_to_le32(output_len);
+       if (input_len) {
+               req->InputBufferLength = cpu_to_le32(input_len);
+               /* total_len for smb query request never close to le16 max */
+               req->InputBufferOffset = cpu_to_le16(total_len - 1);
+               memcpy(req->Buffer, input, input_len);
+       }
  
         iov[0].iov_base = (char *)req;
         /* 1 for Buffer */
-       iov[0].iov_len = total_len - 1;
+       iov[0].iov_len = total_len - 1 + input_len;
         return 0;
  }
  
  void
  SMB2_query_info_free(struct smb_rqst *rqst)
  {
-       cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+       if (rqst && rqst->rq_iov)
+               cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
  }
  
  static int
@@ -2699,7 +2707,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
         struct kvec iov[1];
         struct kvec rsp_iov;
         int rc = 0;
-       int resp_buftype;
+       int resp_buftype = CIFS_NO_BUFFER;
         struct cifs_ses *ses = tcon->ses;
         int flags = 0;
  
@@ -2718,7 +2726,7 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
  
         rc = SMB2_query_info_init(tcon, &rqst, persistent_fid, volatile_fid,
                                   info_class, info_type, additional_info,
-                                 output_len);
+                                 output_len, 0, NULL);
         if (rc)
                 goto qinf_exit;
  
@@ -2746,9 +2754,9 @@ query_info(const unsigned int xid, struct cifs_tcon *tcon,
                 }
         }
  
-       rc = validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
-                                  le32_to_cpu(rsp->OutputBufferLength),
-                                  &rsp_iov, min_len, *data);
+       rc = smb2_validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
+                                       le32_to_cpu(rsp->OutputBufferLength),
+                                       &rsp_iov, min_len, *data);
  
  qinf_exit:
         SMB2_query_info_free(&rqst);
@@ -3754,45 +3762,22 @@ qdir_exit:
         return rc;
  }
  
-static int
-send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
+int
+SMB2_set_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
                u64 persistent_fid, u64 volatile_fid, u32 pid, u8 info_class,
-              u8 info_type, u32 additional_info, unsigned int num,
+              u8 info_type, u32 additional_info,
                 void **data, unsigned int *size)
  {
-       struct smb_rqst rqst;
         struct smb2_set_info_req *req;
-       struct smb2_set_info_rsp *rsp = NULL;
-       struct kvec *iov;
-       struct kvec rsp_iov;
-       int rc = 0;
-       int resp_buftype;
-       unsigned int i;
-       struct cifs_ses *ses = tcon->ses;
-       int flags = 0;
-       unsigned int total_len;
-
-       if (!ses || !(ses->server))
-               return -EIO;
-
-       if (!num)
-               return -EINVAL;
-
-       iov = kmalloc_array(num, sizeof(struct kvec), GFP_KERNEL);
-       if (!iov)
-               return -ENOMEM;
+       struct kvec *iov = rqst->rq_iov;
+       unsigned int i, total_len;
+       int rc;
  
         rc = smb2_plain_req_init(SMB2_SET_INFO, tcon, (void **) &req, &total_len);
-       if (rc) {
-               kfree(iov);
+       if (rc)
                 return rc;
-       }
-
-       if (smb3_encryption_required(tcon))
-               flags |= CIFS_TRANSFORM_REQ;
  
         req->sync_hdr.ProcessId = cpu_to_le32(pid);
-
         req->InfoType = info_type;
         req->FileInfoClass = info_class;
         req->PersistentFileId = persistent_fid;
@@ -3810,19 +3795,66 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
         /* 1 for Buffer */
         iov[0].iov_len = total_len - 1;
  
-       for (i = 1; i < num; i++) {
+       for (i = 1; i < rqst->rq_nvec; i++) {
                 le32_add_cpu(&req->BufferLength, size[i]);
                 iov[i].iov_base = (char *)data[i];
                 iov[i].iov_len = size[i];
         }
  
+       return 0;
+}
+
+void
+SMB2_set_info_free(struct smb_rqst *rqst)
+{
+       if (rqst && rqst->rq_iov)
+               cifs_buf_release(rqst->rq_iov[0].iov_base); /* request */
+}
+
+static int
+send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
+              u64 persistent_fid, u64 volatile_fid, u32 pid, u8 info_class,
+              u8 info_type, u32 additional_info, unsigned int num,
+               void **data, unsigned int *size)
+{
+       struct smb_rqst rqst;
+       struct smb2_set_info_rsp *rsp = NULL;
+       struct kvec *iov;
+       struct kvec rsp_iov;
+       int rc = 0;
+       int resp_buftype;
+       struct cifs_ses *ses = tcon->ses;
+       int flags = 0;
+
+       if (!ses || !(ses->server))
+               return -EIO;
+
+       if (!num)
+               return -EINVAL;
+
+       if (smb3_encryption_required(tcon))
+               flags |= CIFS_TRANSFORM_REQ;
+
+       iov = kmalloc_array(num, sizeof(struct kvec), GFP_KERNEL);
+       if (!iov)
+               return -ENOMEM;
+
         memset(&rqst, 0, sizeof(struct smb_rqst));
         rqst.rq_iov = iov;
         rqst.rq_nvec = num;
  
+       rc = SMB2_set_info_init(tcon, &rqst, persistent_fid, volatile_fid, pid,
+                               info_class, info_type, additional_info,
+                               data, size);
+       if (rc) {
+               kfree(iov);
+               return rc;
+       }
+
+
         rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
                             &rsp_iov);
-       cifs_buf_release(req);
+       SMB2_set_info_free(&rqst);
         rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
  
         if (rc != 0) {
@@ -3836,89 +3868,9 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
         return rc;
  }
  
-int
-SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
-           u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
-{
-       struct smb2_file_rename_info info;
-       void **data;
-       unsigned int size[2];
-       int rc;
-       int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
-
-       data = kmalloc_array(2, sizeof(void *), GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       info.ReplaceIfExists = 1; /* 1 = replace existing target with new */
-                             /* 0 = fail if target already exists */
-       info.RootDirectory = 0;  /* MBZ for network ops (why does spec say?) */
-       info.FileNameLength = cpu_to_le32(len);
-
-       data[0] = &info;
-       size[0] = sizeof(struct smb2_file_rename_info);
-
-       data[1] = target_file;
-       size[1] = len + 2 /* null */;
-
-       rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
-               current->tgid, FILE_RENAME_INFORMATION, SMB2_O_INFO_FILE,
-               0, 2, data, size);
-       kfree(data);
-       return rc;
-}
-
-int
-SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
-                 u64 persistent_fid, u64 volatile_fid)
-{
-       __u8 delete_pending = 1;
-       void *data;
-       unsigned int size;
-
-       data = &delete_pending;
-       size = 1; /* sizeof __u8 */
-
-       return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-               current->tgid, FILE_DISPOSITION_INFORMATION, SMB2_O_INFO_FILE,
-               0, 1, &data, &size);
-}
-
-int
-SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
-                 u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
-{
-       struct smb2_file_link_info info;
-       void **data;
-       unsigned int size[2];
-       int rc;
-       int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
-
-       data = kmalloc_array(2, sizeof(void *), GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       info.ReplaceIfExists = 0; /* 1 = replace existing link with new */
-                             /* 0 = fail if link already exists */
-       info.RootDirectory = 0;  /* MBZ for network ops (why does spec say?) */
-       info.FileNameLength = cpu_to_le32(len);
-
-       data[0] = &info;
-       size[0] = sizeof(struct smb2_file_link_info);
-
-       data[1] = target_file;
-       size[1] = len + 2 /* null */;
-
-       rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
-                       current->tgid, FILE_LINK_INFORMATION, SMB2_O_INFO_FILE,
-                       0, 2, data, size);
-       kfree(data);
-       return rc;
-}
-
  int
  SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
-            u64 volatile_fid, u32 pid, __le64 *eof, bool is_falloc)
+            u64 volatile_fid, u32 pid, __le64 *eof)
  {
         struct smb2_file_eof_info info;
         void *data;
@@ -3929,27 +3881,11 @@ SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
         data = &info;
         size = sizeof(struct smb2_file_eof_info);
  
-       if (is_falloc)
-               return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-                       pid, FILE_ALLOCATION_INFORMATION, SMB2_O_INFO_FILE,
-                       0, 1, &data, &size);
-       else
-               return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+       return send_set_info(xid, tcon, persistent_fid, volatile_fid,
                         pid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE,
                         0, 1, &data, &size);
  }
  
-int
-SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
-             u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf)
-{
-       unsigned int size;
-       size = sizeof(FILE_BASIC_INFO);
-       return send_set_info(xid, tcon, persistent_fid, volatile_fid,
-               current->tgid, FILE_BASIC_INFORMATION, SMB2_O_INFO_FILE,
-               0, 1, (void **)&buf, &size);
-}
-
  int
  SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
                 u64 persistent_fid, u64 volatile_fid,
@@ -4350,6 +4286,8 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
         struct kvec iov[1];
         struct kvec rsp_iov;
         int resp_buf_type;
+       __u64 *please_key_high;
+       __u64 *please_key_low;
  
         cifs_dbg(FYI, "SMB2_lease_break\n");
         rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req,
@@ -4379,10 +4317,16 @@ SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon,
         rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
         cifs_small_buf_release(req);
  
+       please_key_low = (__u64 *)req->LeaseKey;
+       please_key_high = (__u64 *)(req->LeaseKey+8);
         if (rc) {
                 cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
+               trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid,
+                       ses->Suid, *please_key_low, *please_key_high, rc);
                 cifs_dbg(FYI, "Send error in Lease Break = %d\n", rc);
-       }
+       } else
+               trace_smb3_lease_done(le32_to_cpu(lease_state), tcon->tid,
+                       ses->Suid, *please_key_low, *please_key_high);
  
         return rc;
  }
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h

index 8fb7887..f753f42 100644 (file)
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -613,6 +613,8 @@ struct smb2_tree_disconnect_rsp {
  #define SVHDX_OPEN_DEVICE_CONTEX       0x9CCBCF9E04C1E643980E158DA1F6EC83
  #define SMB2_CREATE_TAG_POSIX          0x93AD25509CB411E7B42383DE968BCD7C
  
+/* Flag (SMB3 open response) values */
+#define SMB2_CREATE_FLAG_REPARSEPOINT 0x01
  
  /*
   * Maximum number of iovs we need for an open/create request.
@@ -650,7 +652,7 @@ struct smb2_create_rsp {
         struct smb2_sync_hdr sync_hdr;
         __le16 StructureSize;   /* Must be 89 */
         __u8   OplockLevel;
-       __u8   Reserved;
+       __u8   Flag;  /* 0x01 if reparse point */
         __le32 CreateAction;
         __le64 CreationTime;
         __le64 LastAccessTime;
@@ -1174,6 +1176,15 @@ struct smb2_query_info_rsp {
         __u8   Buffer[1];
  } __packed;
  
+/*
+ * Maximum number of iovs we need for a set-info request.
+ * The largest one is rename/hardlink
+ * [0] : struct smb2_set_info_req + smb2_file_[rename|link]_info
+ * [1] : path
+ * [2] : compound padding
+ */
+#define SMB2_SET_INFO_IOV_SIZE 3
+
  struct smb2_set_info_req {
         struct smb2_sync_hdr sync_hdr;
         __le16 StructureSize; /* Must be 33 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h

index b407657..9f4e9ed 100644 (file)
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -116,6 +116,9 @@ extern void smb2_reconnect_server(struct work_struct *work);
  extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server);
  extern unsigned long smb_rqst_len(struct TCP_Server_Info *server,
                                   struct smb_rqst *rqst);
+extern void smb2_set_next_command(struct TCP_Server_Info *server,
+                                 struct smb_rqst *rqst);
+extern void smb2_set_related(struct smb_rqst *rqst);
  
  /*
   * SMB2 Worker functions - most of protocol specific implementation details
@@ -160,7 +163,8 @@ extern int SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
  extern int SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
                                 u64 persistent_fid, u64 volatile_fid,
                                 u8 info_class, u8 info_type,
-                               u32 additional_info, size_t output_len);
+                               u32 additional_info, size_t output_len,
+                               size_t input_len, void *input);
  extern void SMB2_query_info_free(struct smb_rqst *rqst);
  extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
                            u64 persistent_file_id, u64 volatile_file_id,
@@ -179,20 +183,14 @@ extern int SMB2_echo(struct TCP_Server_Info *server);
  extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
                                 u64 persistent_fid, u64 volatile_fid, int index,
                                 struct cifs_search_info *srch_inf);
-extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
-                      u64 persistent_fid, u64 volatile_fid,
-                      __le16 *target_file);
-extern int SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
-                     u64 persistent_fid, u64 volatile_fid);
-extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
-                            u64 persistent_fid, u64 volatile_fid,
-                            __le16 *target_file);
  extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
                         u64 persistent_fid, u64 volatile_fid, u32 pid,
-                       __le64 *eof, bool is_fallocate);
-extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
-                        u64 persistent_fid, u64 volatile_fid,
-                        FILE_BASIC_INFO *buf);
+                       __le64 *eof);
+extern int SMB2_set_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+                             u64 persistent_fid, u64 volatile_fid, u32 pid,
+                             u8 info_class, u8 info_type, u32 additional_info,
+                             void **data, unsigned int *size);
+extern void SMB2_set_info_free(struct smb_rqst *rqst);
  extern int SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
                         u64 persistent_fid, u64 volatile_fid,
                         struct cifs_ntsd *pnntsd, int pacllen, int aclflag);
@@ -232,6 +230,10 @@ extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
  extern int smb3_encryption_required(const struct cifs_tcon *tcon);
  extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
                              struct kvec *iov, unsigned int min_buf_size);
+extern int smb2_validate_and_copy_iov(unsigned int offset,
+                                     unsigned int buffer_length,
+                                     struct kvec *iov,
+                                     unsigned int minbufsize, char *data);
  extern void smb2_copy_fs_info_to_kstatfs(
          struct smb2_fs_full_size_info *pfs_inf,
          struct kstatfs *kst);
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c

index 5fdb9a5..5e28236 100644 (file)
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -2295,8 +2295,12 @@ static void smbd_mr_recovery_work(struct work_struct *work)
         int rc;
  
         list_for_each_entry(smbdirect_mr, &info->mr_list, list) {
-               if (smbdirect_mr->state == MR_INVALIDATED ||
-                       smbdirect_mr->state == MR_ERROR) {
+               if (smbdirect_mr->state == MR_INVALIDATED)
+                       ib_dma_unmap_sg(
+                               info->id->device, smbdirect_mr->sgl,
+                               smbdirect_mr->sgl_count,
+                               smbdirect_mr->dir);
+               else if (smbdirect_mr->state == MR_ERROR) {
  
                         /* recover this MR entry */
                         rc = ib_dereg_mr(smbdirect_mr->mr);
@@ -2320,25 +2324,21 @@ static void smbd_mr_recovery_work(struct work_struct *work)
                                 smbd_disconnect_rdma_connection(info);
                                 continue;
                         }
+               } else
+                       /* This MR is being used, don't recover it */
+                       continue;
  
-                       if (smbdirect_mr->state == MR_INVALIDATED)
-                               ib_dma_unmap_sg(
-                                       info->id->device, smbdirect_mr->sgl,
-                                       smbdirect_mr->sgl_count,
-                                       smbdirect_mr->dir);
-
-                       smbdirect_mr->state = MR_READY;
+               smbdirect_mr->state = MR_READY;
  
-                       /* smbdirect_mr->state is updated by this function
-                        * and is read and updated by I/O issuing CPUs trying
-                        * to get a MR, the call to atomic_inc_return
-                        * implicates a memory barrier and guarantees this
-                        * value is updated before waking up any calls to
-                        * get_mr() from the I/O issuing CPUs
-                        */
-                       if (atomic_inc_return(&info->mr_ready_count) == 1)
-                               wake_up_interruptible(&info->wait_mr);
-               }
+               /* smbdirect_mr->state is updated by this function
+                * and is read and updated by I/O issuing CPUs trying
+                * to get a MR, the call to atomic_inc_return
+                * implicates a memory barrier and guarantees this
+                * value is updated before waking up any calls to
+                * get_mr() from the I/O issuing CPUs
+                */
+               if (atomic_inc_return(&info->mr_ready_count) == 1)
+                       wake_up_interruptible(&info->wait_mr);
         }
  }
  
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h

index d4aed52..cce8414 100644 (file)
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -460,6 +460,85 @@ DEFINE_EVENT(smb3_open_done_class, smb3_##name,  \
  DEFINE_SMB3_OPEN_DONE_EVENT(open_done);
  DEFINE_SMB3_OPEN_DONE_EVENT(posix_mkdir_done);
  
+
+DECLARE_EVENT_CLASS(smb3_lease_done_class,
+       TP_PROTO(__u32  lease_state,
+               __u32   tid,
+               __u64   sesid,
+               __u64   lease_key_low,
+               __u64   lease_key_high),
+       TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high),
+       TP_STRUCT__entry(
+               __field(__u32, lease_state)
+               __field(__u32, tid)
+               __field(__u64, sesid)
+               __field(__u64, lease_key_low)
+               __field(__u64, lease_key_high)
+       ),
+       TP_fast_assign(
+               __entry->lease_state = lease_state;
+               __entry->tid = tid;
+               __entry->sesid = sesid;
+               __entry->lease_key_low = lease_key_low;
+               __entry->lease_key_high = lease_key_high;
+       ),
+       TP_printk("sid=0x%llx tid=0x%x lease_key=0x%llx%llx lease_state=0x%x",
+               __entry->sesid, __entry->tid, __entry->lease_key_high,
+               __entry->lease_key_low, __entry->lease_state)
+)
+
+#define DEFINE_SMB3_LEASE_DONE_EVENT(name)        \
+DEFINE_EVENT(smb3_lease_done_class, smb3_##name,  \
+       TP_PROTO(__u32  lease_state,            \
+               __u32   tid,                    \
+               __u64   sesid,                  \
+               __u64   lease_key_low,          \
+               __u64   lease_key_high),        \
+       TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high))
+
+DEFINE_SMB3_LEASE_DONE_EVENT(lease_done);
+
+DECLARE_EVENT_CLASS(smb3_lease_err_class,
+       TP_PROTO(__u32  lease_state,
+               __u32   tid,
+               __u64   sesid,
+               __u64   lease_key_low,
+               __u64   lease_key_high,
+               int     rc),
+       TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high, rc),
+       TP_STRUCT__entry(
+               __field(__u32, lease_state)
+               __field(__u32, tid)
+               __field(__u64, sesid)
+               __field(__u64, lease_key_low)
+               __field(__u64, lease_key_high)
+               __field(int, rc)
+       ),
+       TP_fast_assign(
+               __entry->lease_state = lease_state;
+               __entry->tid = tid;
+               __entry->sesid = sesid;
+               __entry->lease_key_low = lease_key_low;
+               __entry->lease_key_high = lease_key_high;
+               __entry->rc = rc;
+       ),
+       TP_printk("sid=0x%llx tid=0x%x lease_key=0x%llx%llx lease_state=0x%x rc=%d",
+               __entry->sesid, __entry->tid, __entry->lease_key_high,
+               __entry->lease_key_low, __entry->lease_state, __entry->rc)
+)
+
+#define DEFINE_SMB3_LEASE_ERR_EVENT(name)        \
+DEFINE_EVENT(smb3_lease_err_class, smb3_##name,  \
+       TP_PROTO(__u32  lease_state,            \
+               __u32   tid,                    \
+               __u64   sesid,                  \
+               __u64   lease_key_low,          \
+               __u64   lease_key_high,         \
+               int     rc),                    \
+       TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high, rc))
+
+DEFINE_SMB3_LEASE_ERR_EVENT(lease_err);
+
  DECLARE_EVENT_CLASS(smb3_reconnect_class,
         TP_PROTO(__u64  currmid,
                 char *hostname),
@@ -486,6 +565,36 @@ DEFINE_EVENT(smb3_reconnect_class, smb3_##name,  \
  DEFINE_SMB3_RECONNECT_EVENT(reconnect);
  DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
  
+DECLARE_EVENT_CLASS(smb3_credit_class,
+       TP_PROTO(__u64  currmid,
+               char *hostname,
+               int credits),
+       TP_ARGS(currmid, hostname, credits),
+       TP_STRUCT__entry(
+               __field(__u64, currmid)
+               __field(char *, hostname)
+               __field(int, credits)
+       ),
+       TP_fast_assign(
+               __entry->currmid = currmid;
+               __entry->hostname = hostname;
+               __entry->credits = credits;
+       ),
+       TP_printk("server=%s current_mid=0x%llx credits=%d",
+               __entry->hostname,
+               __entry->currmid,
+               __entry->credits)
+)
+
+#define DEFINE_SMB3_CREDIT_EVENT(name)        \
+DEFINE_EVENT(smb3_credit_class, smb3_##name,  \
+       TP_PROTO(__u64  currmid,                \
+               char *hostname,                 \
+               int  credits),                  \
+       TP_ARGS(currmid, hostname, credits))
+
+DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
+
  #endif /* _CIFS_TRACE_H */
  
  #undef TRACE_INCLUDE_PATH
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c

index b48f439..f811243 100644 (file)
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -113,9 +113,18 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
                 cifs_small_buf_release(midEntry->resp_buf);
  #ifdef CONFIG_CIFS_STATS2
         now = jiffies;
-       /* commands taking longer than one second are indications that
-          something is wrong, unless it is quite a slow link or server */
-       if (time_after(now, midEntry->when_alloc + HZ) &&
+       /*
+        * commands taking longer than one second (default) can be indications
+        * that something is wrong, unless it is quite a slow link or a very
+        * busy server. Note that this calc is unlikely or impossible to wrap
+        * as long as slow_rsp_threshold is not set way above recommended max
+        * value (32767 ie 9 hours) and is generally harmless even if wrong
+        * since only affects debug counters - so leaving the calc as simple
+        * comparison rather than doing multiple conversions and overflow
+        * checks
+        */
+       if ((slow_rsp_threshold != 0) &&
+           time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) &&
             (midEntry->command != command)) {
                 /* smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command */
                 if ((le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS) &&
@@ -128,7 +137,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
                 if (cifsFYI & CIFS_TIMER) {
                         pr_debug(" CIFS slow rsp: cmd %d mid %llu",
                                midEntry->command, midEntry->mid);
-                       pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
+                       cifs_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
                                now - midEntry->when_alloc,
                                now - midEntry->when_sent,
                                now - midEntry->when_received);
@@ -786,7 +795,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
         int i, j, rc = 0;
         int timeout, optype;
         struct mid_q_entry *midQ[MAX_COMPOUND];
-       unsigned int credits = 1;
+       unsigned int credits = 0;
         char *buf;
  
         timeout = flags & CIFS_TIMEOUT_MASK;
@@ -851,21 +860,24 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
  
         mutex_unlock(&ses->server->srv_mutex);
  
-       for (i = 0; i < num_rqst; i++) {
-               if (rc < 0)
-                       goto out;
+       if (rc < 0)
+               goto out;
  
-               if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
-                       smb311_update_preauth_hash(ses, rqst[i].rq_iov,
-                                                  rqst[i].rq_nvec);
+       /*
+        * Compounding is never used during session establish.
+        */
+       if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
+               smb311_update_preauth_hash(ses, rqst[0].rq_iov,
+                                          rqst[0].rq_nvec);
  
-               if (timeout == CIFS_ASYNC_OP)
-                       goto out;
+       if (timeout == CIFS_ASYNC_OP)
+               goto out;
  
+       for (i = 0; i < num_rqst; i++) {
                 rc = wait_for_response(ses->server, midQ[i]);
                 if (rc != 0) {
-                       cifs_dbg(FYI, "Cancelling wait for mid %llu\n",
-                                midQ[i]->mid);
+                       cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n",
+                                midQ[i]->mid, le16_to_cpu(midQ[i]->command));
                         send_cancel(ses->server, &rqst[i], midQ[i]);
                         spin_lock(&GlobalMid_Lock);
                         if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
@@ -877,10 +889,21 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
                         }
                         spin_unlock(&GlobalMid_Lock);
                 }
+       }
+
+       for (i = 0; i < num_rqst; i++)
+               if (midQ[i]->resp_buf)
+                       credits += ses->server->ops->get_credits(midQ[i]);
+       if (!credits)
+               credits = 1;
+
+       for (i = 0; i < num_rqst; i++) {
+               if (rc < 0)
+                       goto out;
  
                 rc = cifs_sync_mid_result(midQ[i], ses->server);
                 if (rc != 0) {
-                       add_credits(ses->server, 1, optype);
+                       add_credits(ses->server, credits, optype);
                         return rc;
                 }
  
@@ -901,23 +924,26 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
                 else
                         resp_buf_type[i] = CIFS_SMALL_BUFFER;
  
-               if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
-                       struct kvec iov = {
-                               .iov_base = resp_iov[i].iov_base,
-                               .iov_len = resp_iov[i].iov_len
-                       };
-                       smb311_update_preauth_hash(ses, &iov, 1);
-               }
-
-               credits = ses->server->ops->get_credits(midQ[i]);
-
                 rc = ses->server->ops->check_receive(midQ[i], ses->server,
                                                      flags & CIFS_LOG_ERROR);
  
                 /* mark it so buf will not be freed by cifs_delete_mid */
                 if ((flags & CIFS_NO_RESP) == 0)
                         midQ[i]->resp_buf = NULL;
+
+       }
+
+       /*
+        * Compounding is never used during session establish.
+        */
+       if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
+               struct kvec iov = {
+                       .iov_base = resp_iov[0].iov_base,
+                       .iov_len = resp_iov[0].iov_len
+               };
+               smb311_update_preauth_hash(ses, &iov, 1);
         }
+
  out:
         /*
          * This will dequeue all mids. After this it is important that the
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c

index f033f3a..07b8395 100644 (file)
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -93,7 +93,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags)
         return nfs4_do_check_delegation(inode, flags, false);
  }
  
-static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
+static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
  {
         struct inode *inode = state->inode;
         struct file_lock *fl;
@@ -108,7 +108,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
         spin_lock(&flctx->flc_lock);
  restart:
         list_for_each_entry(fl, list, fl_list) {
-               if (nfs_file_open_context(fl->fl_file) != ctx)
+               if (nfs_file_open_context(fl->fl_file)->state != state)
                         continue;
                 spin_unlock(&flctx->flc_lock);
                 status = nfs4_lock_delegation_recall(fl, state, stateid);
@@ -136,8 +136,8 @@ static int nfs_delegation_claim_opens(struct inode *inode,
         int err;
  
  again:
-       spin_lock(&inode->i_lock);
-       list_for_each_entry(ctx, &nfsi->open_files, list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
                 state = ctx->state;
                 if (state == NULL)
                         continue;
@@ -147,15 +147,16 @@ again:
                         continue;
                 if (!nfs4_stateid_match(&state->stateid, stateid))
                         continue;
-               get_nfs_open_context(ctx);
-               spin_unlock(&inode->i_lock);
+               if (!get_nfs_open_context(ctx))
+                       continue;
+               rcu_read_unlock();
                 sp = state->owner;
                 /* Block nfs4_proc_unlck */
                 mutex_lock(&sp->so_delegreturn_mutex);
                 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
                 err = nfs4_open_delegation_recall(ctx, state, stateid, type);
                 if (!err)
-                       err = nfs_delegation_claim_locks(ctx, state, stateid);
+                       err = nfs_delegation_claim_locks(state, stateid);
                 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
                         err = -EAGAIN;
                 mutex_unlock(&sp->so_delegreturn_mutex);
@@ -164,7 +165,7 @@ again:
                         return err;
                 goto again;
         }
-       spin_unlock(&inode->i_lock);
+       rcu_read_unlock();
         return 0;
  }
  
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index 8bfaa65..71b2e39 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1072,6 +1072,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
         return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
  }
  
+static int
+nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
+                          struct inode *inode, int error)
+{
+       switch (error) {
+       case 1:
+               dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
+                       __func__, dentry);
+               return 1;
+       case 0:
+               nfs_mark_for_revalidate(dir);
+               if (inode && S_ISDIR(inode->i_mode)) {
+                       /* Purge readdir caches. */
+                       nfs_zap_caches(inode);
+                       /*
+                        * We can't d_drop the root of a disconnected tree:
+                        * its d_hash is on the s_anon list and d_drop() would hide
+                        * it from shrink_dcache_for_unmount(), leading to busy
+                        * inodes on unmount and further oopses.
+                        */
+                       if (IS_ROOT(dentry))
+                               return 1;
+               }
+               dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
+                               __func__, dentry);
+               return 0;
+       }
+       dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
+                               __func__, dentry, error);
+       return error;
+}
+
+static int
+nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
+                              unsigned int flags)
+{
+       int ret = 1;
+       if (nfs_neg_need_reval(dir, dentry, flags)) {
+               if (flags & LOOKUP_RCU)
+                       return -ECHILD;
+               ret = 0;
+       }
+       return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
+}
+
+static int
+nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
+                               struct inode *inode)
+{
+       nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+       return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
+}
+
+static int
+nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
+                            struct inode *inode)
+{
+       struct nfs_fh *fhandle;
+       struct nfs_fattr *fattr;
+       struct nfs4_label *label;
+       int ret;
+
+       ret = -ENOMEM;
+       fhandle = nfs_alloc_fhandle();
+       fattr = nfs_alloc_fattr();
+       label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
+       if (fhandle == NULL || fattr == NULL || IS_ERR(label))
+               goto out;
+
+       ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+       if (ret < 0) {
+               if (ret == -ESTALE || ret == -ENOENT)
+                       ret = 0;
+               goto out;
+       }
+       ret = 0;
+       if (nfs_compare_fh(NFS_FH(inode), fhandle))
+               goto out;
+       if (nfs_refresh_inode(inode, fattr) < 0)
+               goto out;
+
+       nfs_setsecurity(inode, fattr, label);
+       nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+
+       /* set a readdirplus hint that we had a cache miss */
+       nfs_force_use_readdirplus(dir);
+       ret = 1;
+out:
+       nfs_free_fattr(fattr);
+       nfs_free_fhandle(fhandle);
+       nfs4_label_free(label);
+       return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
+}
+
  /*
   * This is called every time the dcache has a lookup hit,
   * and we should check whether we can really trust that
@@ -1083,58 +1177,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
   * If the parent directory is seen to have changed, we throw out the
   * cached dentry and do a new lookup.
   */
-static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int
+nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
+                        unsigned int flags)
  {
-       struct inode *dir;
         struct inode *inode;
-       struct dentry *parent;
-       struct nfs_fh *fhandle = NULL;
-       struct nfs_fattr *fattr = NULL;
-       struct nfs4_label *label = NULL;
         int error;
  
-       if (flags & LOOKUP_RCU) {
-               parent = READ_ONCE(dentry->d_parent);
-               dir = d_inode_rcu(parent);
-               if (!dir)
-                       return -ECHILD;
-       } else {
-               parent = dget_parent(dentry);
-               dir = d_inode(parent);
-       }
         nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
         inode = d_inode(dentry);
  
-       if (!inode) {
-               if (nfs_neg_need_reval(dir, dentry, flags)) {
-                       if (flags & LOOKUP_RCU)
-                               return -ECHILD;
-                       goto out_bad;
-               }
-               goto out_valid;
-       }
+       if (!inode)
+               return nfs_lookup_revalidate_negative(dir, dentry, flags);
  
         if (is_bad_inode(inode)) {
-               if (flags & LOOKUP_RCU)
-                       return -ECHILD;
                 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
                                 __func__, dentry);
                 goto out_bad;
         }
  
         if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
-               goto out_set_verifier;
+               return nfs_lookup_revalidate_delegated(dir, dentry, inode);
  
         /* Force a full look up iff the parent directory has changed */
         if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
             nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
                 error = nfs_lookup_verify_inode(inode, flags);
                 if (error) {
-                       if (flags & LOOKUP_RCU)
-                               return -ECHILD;
                         if (error == -ESTALE)
-                               goto out_zap_parent;
-                       goto out_error;
+                               nfs_zap_caches(dir);
+                       goto out_bad;
                 }
                 nfs_advise_use_readdirplus(dir);
                 goto out_valid;
@@ -1146,81 +1218,45 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
         if (NFS_STALE(inode))
                 goto out_bad;
  
-       error = -ENOMEM;
-       fhandle = nfs_alloc_fhandle();
-       fattr = nfs_alloc_fattr();
-       if (fhandle == NULL || fattr == NULL)
-               goto out_error;
-
-       label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
-       if (IS_ERR(label))
-               goto out_error;
-
         trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
-       error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+       error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
         trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
-       if (error == -ESTALE || error == -ENOENT)
-               goto out_bad;
-       if (error)
-               goto out_error;
-       if (nfs_compare_fh(NFS_FH(inode), fhandle))
-               goto out_bad;
-       if ((error = nfs_refresh_inode(inode, fattr)) != 0)
-               goto out_bad;
-
-       nfs_setsecurity(inode, fattr, label);
-
-       nfs_free_fattr(fattr);
-       nfs_free_fhandle(fhandle);
-       nfs4_label_free(label);
+       return error;
+out_valid:
+       return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
+out_bad:
+       if (flags & LOOKUP_RCU)
+               return -ECHILD;
+       return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
+}
  
-       /* set a readdirplus hint that we had a cache miss */
-       nfs_force_use_readdirplus(dir);
+static int
+__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
+                       int (*reval)(struct inode *, struct dentry *, unsigned int))
+{
+       struct dentry *parent;
+       struct inode *dir;
+       int ret;
  
-out_set_verifier:
-       nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
- out_valid:
         if (flags & LOOKUP_RCU) {
+               parent = READ_ONCE(dentry->d_parent);
+               dir = d_inode_rcu(parent);
+               if (!dir)
+                       return -ECHILD;
+               ret = reval(dir, dentry, flags);
                 if (parent != READ_ONCE(dentry->d_parent))
                         return -ECHILD;
-       } else
+       } else {
+               parent = dget_parent(dentry);
+               ret = reval(d_inode(parent), dentry, flags);
                 dput(parent);
-       dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
-                       __func__, dentry);
-       return 1;
-out_zap_parent:
-       nfs_zap_caches(dir);
- out_bad:
-       WARN_ON(flags & LOOKUP_RCU);
-       nfs_free_fattr(fattr);
-       nfs_free_fhandle(fhandle);
-       nfs4_label_free(label);
-       nfs_mark_for_revalidate(dir);
-       if (inode && S_ISDIR(inode->i_mode)) {
-               /* Purge readdir caches. */
-               nfs_zap_caches(inode);
-               /*
-                * We can't d_drop the root of a disconnected tree:
-                * its d_hash is on the s_anon list and d_drop() would hide
-                * it from shrink_dcache_for_unmount(), leading to busy
-                * inodes on unmount and further oopses.
-                */
-               if (IS_ROOT(dentry))
-                       goto out_valid;
         }
-       dput(parent);
-       dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
-                       __func__, dentry);
-       return 0;
-out_error:
-       WARN_ON(flags & LOOKUP_RCU);
-       nfs_free_fattr(fattr);
-       nfs_free_fhandle(fhandle);
-       nfs4_label_free(label);
-       dput(parent);
-       dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
-                       __func__, dentry, error);
-       return error;
+       return ret;
+}
+
+static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+{
+       return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
  }
  
  /*
@@ -1579,62 +1615,55 @@ no_open:
  }
  EXPORT_SYMBOL_GPL(nfs_atomic_open);
  
-static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int
+nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
+                         unsigned int flags)
  {
         struct inode *inode;
-       int ret = 0;
  
         if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
-               goto no_open;
+               goto full_reval;
         if (d_mountpoint(dentry))
-               goto no_open;
-       if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
-               goto no_open;
+               goto full_reval;
  
         inode = d_inode(dentry);
  
         /* We can't create new files in nfs_open_revalidate(), so we
          * optimize away revalidation of negative dentries.
          */
-       if (inode == NULL) {
-               struct dentry *parent;
-               struct inode *dir;
-
-               if (flags & LOOKUP_RCU) {
-                       parent = READ_ONCE(dentry->d_parent);
-                       dir = d_inode_rcu(parent);
-                       if (!dir)
-                               return -ECHILD;
-               } else {
-                       parent = dget_parent(dentry);
-                       dir = d_inode(parent);
-               }
-               if (!nfs_neg_need_reval(dir, dentry, flags))
-                       ret = 1;
-               else if (flags & LOOKUP_RCU)
-                       ret = -ECHILD;
-               if (!(flags & LOOKUP_RCU))
-                       dput(parent);
-               else if (parent != READ_ONCE(dentry->d_parent))
-                       return -ECHILD;
-               goto out;
-       }
+       if (inode == NULL)
+               goto full_reval;
+
+       if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+               return nfs_lookup_revalidate_delegated(dir, dentry, inode);
  
         /* NFS only supports OPEN on regular files */
         if (!S_ISREG(inode->i_mode))
-               goto no_open;
+               goto full_reval;
+
         /* We cannot do exclusive creation on a positive dentry */
-       if (flags & LOOKUP_EXCL)
-               goto no_open;
+       if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
+               goto reval_dentry;
+
+       /* Check if the directory changed */
+       if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
+               goto reval_dentry;
  
         /* Let f_op->open() actually open (and revalidate) the file */
-       ret = 1;
+       return 1;
+reval_dentry:
+       if (flags & LOOKUP_RCU)
+               return -ECHILD;
+       return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
  
-out:
-       return ret;
+full_reval:
+       return nfs_do_lookup_revalidate(dir, dentry, flags);
+}
  
-no_open:
-       return nfs_lookup_revalidate(dentry, flags);
+static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+{
+       return __nfs_lookup_revalidate(dentry, flags,
+                       nfs4_do_lookup_revalidate);
  }
  
  #endif /* CONFIG_NFSV4 */
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c

index d175724..61f46fa 100644 (file)
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -1164,6 +1164,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
         .id                     = LAYOUT_NFSV4_1_FILES,
         .name                   = "LAYOUT_NFSV4_1_FILES",
         .owner                  = THIS_MODULE,
+       .max_layoutget_response = 4096, /* 1 page or so... */
         .alloc_layout_hdr       = filelayout_alloc_layout_hdr,
         .free_layout_hdr        = filelayout_free_layout_hdr,
         .alloc_lseg             = filelayout_alloc_lseg,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c

index cae4333..86bcba4 100644 (file)
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -2356,6 +2356,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
         .name                   = "LAYOUT_FLEX_FILES",
         .owner                  = THIS_MODULE,
         .flags                  = PNFS_LAYOUTGET_ON_OPEN,
+       .max_layoutget_response = 4096, /* 1 page or so... */
         .set_layoutdriver       = ff_layout_set_layoutdriver,
         .alloc_layout_hdr       = ff_layout_alloc_layout_hdr,
         .free_layout_hdr        = ff_layout_free_layout_hdr,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c

index 59aa049..74d8d53 100644 (file)
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -453,7 +453,7 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx,
         struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
         struct rpc_cred *cred;
  
-       if (mirror) {
+       if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
                 cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
                 if (!cred)
                         cred = get_rpccred(mdscred);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c

index b65aee4..5b1eee4 100644 (file)
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -857,15 +857,14 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
  
  static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
  {
-       struct nfs_lock_context *head = &ctx->lock_context;
-       struct nfs_lock_context *pos = head;
+       struct nfs_lock_context *pos;
  
-       do {
+       list_for_each_entry_rcu(pos, &ctx->lock_context.list, list) {
                 if (pos->lockowner != current->files)
                         continue;
-               refcount_inc(&pos->count);
-               return pos;
-       } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
+               if (refcount_inc_not_zero(&pos->count))
+                       return pos;
+       }
         return NULL;
  }
  
@@ -874,10 +873,10 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
         struct nfs_lock_context *res, *new = NULL;
         struct inode *inode = d_inode(ctx->dentry);
  
-       spin_lock(&inode->i_lock);
+       rcu_read_lock();
         res = __nfs_find_lock_context(ctx);
+       rcu_read_unlock();
         if (res == NULL) {
-               spin_unlock(&inode->i_lock);
                 new = kmalloc(sizeof(*new), GFP_KERNEL);
                 if (new == NULL)
                         return ERR_PTR(-ENOMEM);
@@ -885,14 +884,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
                 spin_lock(&inode->i_lock);
                 res = __nfs_find_lock_context(ctx);
                 if (res == NULL) {
-                       list_add_tail(&new->list, &ctx->lock_context.list);
+                       list_add_tail_rcu(&new->list, &ctx->lock_context.list);
                         new->open_context = ctx;
                         res = new;
                         new = NULL;
                 }
+               spin_unlock(&inode->i_lock);
+               kfree(new);
         }
-       spin_unlock(&inode->i_lock);
-       kfree(new);
         return res;
  }
  EXPORT_SYMBOL_GPL(nfs_get_lock_context);
@@ -904,9 +903,9 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
  
         if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock))
                 return;
-       list_del(&l_ctx->list);
+       list_del_rcu(&l_ctx->list);
         spin_unlock(&inode->i_lock);
-       kfree(l_ctx);
+       kfree_rcu(l_ctx, rcu_head);
  }
  EXPORT_SYMBOL_GPL(nfs_put_lock_context);
  
@@ -978,9 +977,9 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
  
  struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
  {
-       if (ctx != NULL)
-               refcount_inc(&ctx->lock_context.count);
-       return ctx;
+       if (ctx != NULL && refcount_inc_not_zero(&ctx->lock_context.count))
+               return ctx;
+       return NULL;
  }
  EXPORT_SYMBOL_GPL(get_nfs_open_context);
  
@@ -989,13 +988,13 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
         struct inode *inode = d_inode(ctx->dentry);
         struct super_block *sb = ctx->dentry->d_sb;
  
+       if (!refcount_dec_and_test(&ctx->lock_context.count))
+               return;
         if (!list_empty(&ctx->list)) {
-               if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
-                       return;
-               list_del(&ctx->list);
+               spin_lock(&inode->i_lock);
+               list_del_rcu(&ctx->list);
                 spin_unlock(&inode->i_lock);
-       } else if (!refcount_dec_and_test(&ctx->lock_context.count))
-               return;
+       }
         if (inode != NULL)
                 NFS_PROTO(inode)->close_context(ctx, is_sync);
         if (ctx->cred != NULL)
@@ -1003,7 +1002,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
         dput(ctx->dentry);
         nfs_sb_deactive(sb);
         kfree(ctx->mdsthreshold);
-       kfree(ctx);
+       kfree_rcu(ctx, rcu_head);
  }
  
  void put_nfs_open_context(struct nfs_open_context *ctx)
@@ -1027,10 +1026,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
         struct nfs_inode *nfsi = NFS_I(inode);
  
         spin_lock(&inode->i_lock);
-       if (ctx->mode & FMODE_WRITE)
-               list_add(&ctx->list, &nfsi->open_files);
-       else
-               list_add_tail(&ctx->list, &nfsi->open_files);
+       list_add_tail_rcu(&ctx->list, &nfsi->open_files);
         spin_unlock(&inode->i_lock);
  }
  EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
@@ -1051,16 +1047,17 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
         struct nfs_inode *nfsi = NFS_I(inode);
         struct nfs_open_context *pos, *ctx = NULL;
  
-       spin_lock(&inode->i_lock);
-       list_for_each_entry(pos, &nfsi->open_files, list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(pos, &nfsi->open_files, list) {
                 if (cred != NULL && pos->cred != cred)
                         continue;
                 if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
                         continue;
                 ctx = get_nfs_open_context(pos);
-               break;
+               if (ctx)
+                       break;
         }
-       spin_unlock(&inode->i_lock);
+       rcu_read_unlock();
         return ctx;
  }
  
@@ -1078,9 +1075,6 @@ void nfs_file_clear_open_context(struct file *filp)
                 if (ctx->error < 0)
                         invalidate_inode_pages2(inode->i_mapping);
                 filp->private_data = NULL;
-               spin_lock(&inode->i_lock);
-               list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
-               spin_unlock(&inode->i_lock);
                 put_nfs_open_context_sync(ctx);
         }
  }
@@ -1329,19 +1323,11 @@ static bool nfs_file_has_writers(struct nfs_inode *nfsi)
  {
         struct inode *inode = &nfsi->vfs_inode;
  
-       assert_spin_locked(&inode->i_lock);
-
         if (!S_ISREG(inode->i_mode))
                 return false;
         if (list_empty(&nfsi->open_files))
                 return false;
-       /* Note: This relies on nfsi->open_files being ordered with writers
-        *       being placed at the head of the list.
-        *       See nfs_inode_attach_open_context()
-        */
-       return (list_first_entry(&nfsi->open_files,
-                       struct nfs_open_context,
-                       list)->mode & FMODE_WRITE) == FMODE_WRITE;
+       return inode_is_open_for_write(inode);
  }
  
  static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi)
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c

index ec8a9ef..71bc162 100644 (file)
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -786,6 +786,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
  static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
  {
         struct inode *inode = hdr->inode;
+       struct nfs_server *server = NFS_SERVER(inode);
  
         if (hdr->pgio_done_cb != NULL)
                 return hdr->pgio_done_cb(task, hdr);
@@ -793,6 +794,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
         if (nfs3_async_handle_jukebox(task, inode))
                 return -EAGAIN;
  
+       if (task->tk_status >= 0 && !server->read_hdrsize)
+               cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+
         nfs_invalidate_atime(inode);
         nfs_refresh_inode(inode, &hdr->fattr);
         return 0;
@@ -802,6 +806,7 @@ static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
                                  struct rpc_message *msg)
  {
         msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
+       hdr->args.replen = NFS_SERVER(hdr->inode)->read_hdrsize;
  }
  
  static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c

index 64e4fa3..78df4eb 100644 (file)
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -983,10 +983,11 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
                                    const void *data)
  {
         const struct nfs_pgio_args *args = data;
+       unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
  
         encode_read3args(xdr, args);
         prepare_reply_buffer(req, args->pages, args->pgbase,
-                                       args->count, NFS3_readres_sz);
+                                       args->count, replen);
         req->rq_rcv_buf.flags |= XDRBUF_READ;
  }
  
@@ -1364,10 +1365,12 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
  
         encode_nfs_fh3(xdr, args->fh);
         encode_uint32(xdr, args->mask);
-       if (args->mask & (NFS_ACL | NFS_DFACL))
+       if (args->mask & (NFS_ACL | NFS_DFACL)) {
                 prepare_reply_buffer(req, args->pages, 0,
                                         NFSACL_MAXPAGES << PAGE_SHIFT,
                                         ACL3_getaclres_sz);
+               req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
+       }
  }
  
  static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
@@ -1673,9 +1676,11 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
                                  void *data)
  {
         struct nfs_pgio_res *result = data;
+       unsigned int pos;
         enum nfs_stat status;
         int error;
  
+       pos = xdr_stream_pos(xdr);
         error = decode_nfsstat3(xdr, &status);
         if (unlikely(error))
                 goto out;
@@ -1685,6 +1690,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
         result->op_status = status;
         if (status != NFS3_OK)
                 goto out_status;
+       result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
         error = decode_read3resok(xdr, result);
  out:
         return error;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h

index 3a69041..8d59c96 100644 (file)
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -188,9 +188,10 @@ struct nfs4_state {
         unsigned int n_wronly;          /* Number of write-only references */
         unsigned int n_rdwr;            /* Number of read/write references */
         fmode_t state;                  /* State on the server (R,W, or RW) */
-       atomic_t count;
+       refcount_t count;
  
         wait_queue_head_t waitq;
+       struct rcu_head rcu_head;
  };
  
  
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c

index 146e308..8f53455 100644 (file)
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -950,10 +950,10 @@ EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
  
  /*
   * Session has been established, and the client marked ready.
- * Set the mount rsize and wsize with negotiated fore channel
- * attributes which will be bound checked in nfs_server_set_fsinfo.
+ * Limit the mount rsize, wsize and dtsize using negotiated fore
+ * channel attributes.
   */
-static void nfs4_session_set_rwsize(struct nfs_server *server)
+static void nfs4_session_limit_rwsize(struct nfs_server *server)
  {
  #ifdef CONFIG_NFS_V4_1
         struct nfs4_session *sess;
@@ -966,9 +966,11 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
         server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
         server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
  
-       if (!server->rsize || server->rsize > server_resp_sz)
+       if (server->dtsize > server_resp_sz)
+               server->dtsize = server_resp_sz;
+       if (server->rsize > server_resp_sz)
                 server->rsize = server_resp_sz;
-       if (!server->wsize || server->wsize > server_rqst_sz)
+       if (server->wsize > server_rqst_sz)
                 server->wsize = server_rqst_sz;
  #endif /* CONFIG_NFS_V4_1 */
  }
@@ -1015,12 +1017,12 @@ static int nfs4_server_common_setup(struct nfs_server *server,
                         (unsigned long long) server->fsid.minor);
         nfs_display_fhandle(mntfh, "Pseudo-fs root FH");
  
-       nfs4_session_set_rwsize(server);
-
         error = nfs_probe_fsinfo(server, mntfh, fattr);
         if (error < 0)
                 goto out;
  
+       nfs4_session_limit_rwsize(server);
+
         if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
                 server->namelen = NFS4_MAXNAMLEN;
  
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index 8220a16..db84b4a 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1349,12 +1349,20 @@ static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
         return false;
  }
  
-static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
+static int can_open_cached(struct nfs4_state *state, fmode_t mode,
+               int open_mode, enum open_claim_type4 claim)
  {
         int ret = 0;
  
         if (open_mode & (O_EXCL|O_TRUNC))
                 goto out;
+       switch (claim) {
+       case NFS4_OPEN_CLAIM_NULL:
+       case NFS4_OPEN_CLAIM_FH:
+               goto out;
+       default:
+               break;
+       }
         switch (mode & (FMODE_READ|FMODE_WRITE)) {
                 case FMODE_READ:
                         ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0
@@ -1747,7 +1755,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
  
         for (;;) {
                 spin_lock(&state->owner->so_lock);
-               if (can_open_cached(state, fmode, open_mode)) {
+               if (can_open_cached(state, fmode, open_mode, claim)) {
                         update_open_stateflags(state, fmode);
                         spin_unlock(&state->owner->so_lock);
                         goto out_return_state;
@@ -1777,7 +1785,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
  out:
         return ERR_PTR(ret);
  out_return_state:
-       atomic_inc(&state->count);
+       refcount_inc(&state->count);
         return state;
  }
  
@@ -1849,7 +1857,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
  update:
         update_open_stateid(state, &data->o_res.stateid, NULL,
                             data->o_arg.fmode);
-       atomic_inc(&state->count);
+       refcount_inc(&state->count);
  
         return state;
  }
@@ -1887,7 +1895,7 @@ nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data)
                 return ERR_CAST(inode);
         if (data->state != NULL && data->state->inode == inode) {
                 state = data->state;
-               atomic_inc(&state->count);
+               refcount_inc(&state->count);
         } else
                 state = nfs4_get_open_state(inode, data->owner);
         iput(inode);
@@ -1933,23 +1941,41 @@ nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
         return ret;
  }
  
-static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
+static struct nfs_open_context *
+nfs4_state_find_open_context_mode(struct nfs4_state *state, fmode_t mode)
  {
         struct nfs_inode *nfsi = NFS_I(state->inode);
         struct nfs_open_context *ctx;
  
-       spin_lock(&state->inode->i_lock);
-       list_for_each_entry(ctx, &nfsi->open_files, list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
                 if (ctx->state != state)
                         continue;
-               get_nfs_open_context(ctx);
-               spin_unlock(&state->inode->i_lock);
+               if ((ctx->mode & mode) != mode)
+                       continue;
+               if (!get_nfs_open_context(ctx))
+                       continue;
+               rcu_read_unlock();
                 return ctx;
         }
-       spin_unlock(&state->inode->i_lock);
+       rcu_read_unlock();
         return ERR_PTR(-ENOENT);
  }
  
+static struct nfs_open_context *
+nfs4_state_find_open_context(struct nfs4_state *state)
+{
+       struct nfs_open_context *ctx;
+
+       ctx = nfs4_state_find_open_context_mode(state, FMODE_READ|FMODE_WRITE);
+       if (!IS_ERR(ctx))
+               return ctx;
+       ctx = nfs4_state_find_open_context_mode(state, FMODE_WRITE);
+       if (!IS_ERR(ctx))
+               return ctx;
+       return nfs4_state_find_open_context_mode(state, FMODE_READ);
+}
+
  static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx,
                 struct nfs4_state *state, enum open_claim_type4 claim)
  {
@@ -1960,7 +1986,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
         if (opendata == NULL)
                 return ERR_PTR(-ENOMEM);
         opendata->state = state;
-       atomic_inc(&state->count);
+       refcount_inc(&state->count);
         return opendata;
  }
  
@@ -2276,7 +2302,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
         if (data->state != NULL) {
                 struct nfs_delegation *delegation;
  
-               if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
+               if (can_open_cached(data->state, data->o_arg.fmode,
+                                       data->o_arg.open_flags, claim))
                         goto out_no_action;
                 rcu_read_lock();
                 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c

index 40a08cd..62ae0fd 100644 (file)
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -655,7 +655,7 @@ nfs4_alloc_open_state(void)
         state = kzalloc(sizeof(*state), GFP_NOFS);
         if (!state)
                 return NULL;
-       atomic_set(&state->count, 1);
+       refcount_set(&state->count, 1);
         INIT_LIST_HEAD(&state->lock_states);
         spin_lock_init(&state->state_lock);
         seqlock_init(&state->seqlock);
@@ -684,12 +684,12 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
         struct nfs_inode *nfsi = NFS_I(inode);
         struct nfs4_state *state;
  
-       list_for_each_entry(state, &nfsi->open_states, inode_states) {
+       list_for_each_entry_rcu(state, &nfsi->open_states, inode_states) {
                 if (state->owner != owner)
                         continue;
                 if (!nfs4_valid_open_stateid(state))
                         continue;
-               if (atomic_inc_not_zero(&state->count))
+               if (refcount_inc_not_zero(&state->count))
                         return state;
         }
         return NULL;
@@ -698,7 +698,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
  static void
  nfs4_free_open_state(struct nfs4_state *state)
  {
-       kfree(state);
+       kfree_rcu(state, rcu_head);
  }
  
  struct nfs4_state *
@@ -707,9 +707,9 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
         struct nfs4_state *state, *new;
         struct nfs_inode *nfsi = NFS_I(inode);
  
-       spin_lock(&inode->i_lock);
+       rcu_read_lock();
         state = __nfs4_find_state_byowner(inode, owner);
-       spin_unlock(&inode->i_lock);
+       rcu_read_unlock();
         if (state)
                 goto out;
         new = nfs4_alloc_open_state();
@@ -720,7 +720,7 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
                 state = new;
                 state->owner = owner;
                 atomic_inc(&owner->so_count);
-               list_add(&state->inode_states, &nfsi->open_states);
+               list_add_rcu(&state->inode_states, &nfsi->open_states);
                 ihold(inode);
                 state->inode = inode;
                 spin_unlock(&inode->i_lock);
@@ -743,10 +743,10 @@ void nfs4_put_open_state(struct nfs4_state *state)
         struct inode *inode = state->inode;
         struct nfs4_state_owner *owner = state->owner;
  
-       if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
+       if (!refcount_dec_and_lock(&state->count, &owner->so_lock))
                 return;
         spin_lock(&inode->i_lock);
-       list_del(&state->inode_states);
+       list_del_rcu(&state->inode_states);
         list_del(&state->open_states);
         spin_unlock(&inode->i_lock);
         spin_unlock(&owner->so_lock);
@@ -1437,8 +1437,8 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
         struct nfs4_state *state;
         bool found = false;
  
-       spin_lock(&inode->i_lock);
-       list_for_each_entry(ctx, &nfsi->open_files, list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
                 state = ctx->state;
                 if (state == NULL)
                         continue;
@@ -1456,7 +1456,7 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
                     nfs4_state_mark_reclaim_nograce(clp, state))
                         found = true;
         }
-       spin_unlock(&inode->i_lock);
+       rcu_read_unlock();
  
         nfs_inode_find_delegation_state_and_recover(inode, stateid);
         if (found)
@@ -1469,13 +1469,13 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
         struct nfs_inode *nfsi = NFS_I(inode);
         struct nfs_open_context *ctx;
  
-       spin_lock(&inode->i_lock);
-       list_for_each_entry(ctx, &nfsi->open_files, list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
                 if (ctx->state != state)
                         continue;
                 set_bit(NFS_CONTEXT_BAD, &ctx->flags);
         }
-       spin_unlock(&inode->i_lock);
+       rcu_read_unlock();
  }
  
  static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
@@ -1549,10 +1549,62 @@ out:
         return status;
  }
  
+#ifdef CONFIG_NFS_V4_2
+static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+       struct nfs4_copy_state *copy;
+
+       if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags))
+               return;
+
+       spin_lock(&sp->so_server->nfs_client->cl_lock);
+       list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
+               if (nfs4_stateid_match_other(&state->stateid, &copy->parent_state->stateid))
+                       continue;
+               copy->flags = 1;
+               complete(&copy->completion);
+               break;
+       }
+       spin_unlock(&sp->so_server->nfs_client->cl_lock);
+}
+#else /* !CONFIG_NFS_V4_2 */
+static inline void nfs42_complete_copies(struct nfs4_state_owner *sp,
+                                        struct nfs4_state *state)
+{
+}
+#endif /* CONFIG_NFS_V4_2 */
+
+static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_state *state,
+                                    const struct nfs4_state_recovery_ops *ops)
+{
+       struct nfs4_lock_state *lock;
+       int status;
+
+       status = ops->recover_open(sp, state);
+       if (status < 0)
+               return status;
+
+       status = nfs4_reclaim_locks(state, ops);
+       if (status < 0)
+               return status;
+
+       if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+               spin_lock(&state->state_lock);
+               list_for_each_entry(lock, &state->lock_states, ls_locks) {
+                       if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
+                               pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__);
+               }
+               spin_unlock(&state->state_lock);
+       }
+
+       nfs42_complete_copies(sp, state);
+       clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
+       return status;
+}
+
  static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
  {
         struct nfs4_state *state;
-       struct nfs4_lock_state *lock;
         int status = 0;
  
         /* Note: we rely on the sp->so_states list being ordered 
@@ -1573,79 +1625,45 @@ restart:
                         continue;
                 if (state->state == 0)
                         continue;
-               atomic_inc(&state->count);
+               refcount_inc(&state->count);
                 spin_unlock(&sp->so_lock);
-               status = ops->recover_open(sp, state);
-               if (status >= 0) {
-                       status = nfs4_reclaim_locks(state, ops);
-                       if (status >= 0) {
-                               if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-                                       spin_lock(&state->state_lock);
-                                       list_for_each_entry(lock, &state->lock_states, ls_locks) {
-                                               if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
-                                                       pr_warn_ratelimited("NFS: "
-                                                                           "%s: Lock reclaim "
-                                                                           "failed!\n", __func__);
-                                       }
-                                       spin_unlock(&state->state_lock);
-                               }
-                               clear_bit(NFS_STATE_RECLAIM_NOGRACE,
-                                       &state->flags);
-#ifdef CONFIG_NFS_V4_2
-                               if (test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) {
-                                       struct nfs4_copy_state *copy;
-
-                                       spin_lock(&sp->so_server->nfs_client->cl_lock);
-                                       list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
-                                               if (memcmp(&state->stateid.other, &copy->parent_state->stateid.other, NFS4_STATEID_SIZE))
-                                                       continue;
-                                               copy->flags = 1;
-                                               complete(&copy->completion);
-                                               printk("AGLO: server rebooted waking up the copy\n");
-                                               break;
-                                       }
-                                       spin_unlock(&sp->so_server->nfs_client->cl_lock);
-                               }
-#endif /* CONFIG_NFS_V4_2 */
-                               nfs4_put_open_state(state);
-                               spin_lock(&sp->so_lock);
-                               goto restart;
-                       }
-               }
+               status = __nfs4_reclaim_open_state(sp, state, ops);
+
                 switch (status) {
-                       default:
-                               printk(KERN_ERR "NFS: %s: unhandled error %d\n",
-                                       __func__, status);
-                               /* Fall through */
-                       case -ENOENT:
-                       case -ENOMEM:
-                       case -EACCES:
-                       case -EROFS:
-                       case -EIO:
-                       case -ESTALE:
-                               /* Open state on this file cannot be recovered */
-                               nfs4_state_mark_recovery_failed(state, status);
-                               break;
-                       case -EAGAIN:
-                               ssleep(1);
-                               /* Fall through */
-                       case -NFS4ERR_ADMIN_REVOKED:
-                       case -NFS4ERR_STALE_STATEID:
-                       case -NFS4ERR_OLD_STATEID:
-                       case -NFS4ERR_BAD_STATEID:
-                       case -NFS4ERR_RECLAIM_BAD:
-                       case -NFS4ERR_RECLAIM_CONFLICT:
-                               nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+               default:
+                       if (status >= 0)
                                 break;
-                       case -NFS4ERR_EXPIRED:
-                       case -NFS4ERR_NO_GRACE:
-                               nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
-                       case -NFS4ERR_STALE_CLIENTID:
-                       case -NFS4ERR_BADSESSION:
-                       case -NFS4ERR_BADSLOT:
-                       case -NFS4ERR_BAD_HIGH_SLOT:
-                       case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
-                               goto out_err;
+                       printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status);
+                       /* Fall through */
+               case -ENOENT:
+               case -ENOMEM:
+               case -EACCES:
+               case -EROFS:
+               case -EIO:
+               case -ESTALE:
+                       /* Open state on this file cannot be recovered */
+                       nfs4_state_mark_recovery_failed(state, status);
+                       break;
+               case -EAGAIN:
+                       ssleep(1);
+                       /* Fall through */
+               case -NFS4ERR_ADMIN_REVOKED:
+               case -NFS4ERR_STALE_STATEID:
+               case -NFS4ERR_OLD_STATEID:
+               case -NFS4ERR_BAD_STATEID:
+               case -NFS4ERR_RECLAIM_BAD:
+               case -NFS4ERR_RECLAIM_CONFLICT:
+                       nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+                       break;
+               case -NFS4ERR_EXPIRED:
+               case -NFS4ERR_NO_GRACE:
+                       nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+               case -NFS4ERR_STALE_CLIENTID:
+               case -NFS4ERR_BADSESSION:
+               case -NFS4ERR_BADSLOT:
+               case -NFS4ERR_BAD_HIGH_SLOT:
+               case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+                       goto out_err;
                 }
                 nfs4_put_open_state(state);
                 spin_lock(&sp->so_lock);
@@ -1795,38 +1813,38 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
  static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
  {
         switch (error) {
-               case 0:
-                       break;
-               case -NFS4ERR_CB_PATH_DOWN:
-                       nfs40_handle_cb_pathdown(clp);
-                       break;
-               case -NFS4ERR_NO_GRACE:
-                       nfs4_state_end_reclaim_reboot(clp);
-                       break;
-               case -NFS4ERR_STALE_CLIENTID:
-                       set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
-                       nfs4_state_start_reclaim_reboot(clp);
-                       break;
-               case -NFS4ERR_EXPIRED:
-                       set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
-                       nfs4_state_start_reclaim_nograce(clp);
-                       break;
-               case -NFS4ERR_BADSESSION:
-               case -NFS4ERR_BADSLOT:
-               case -NFS4ERR_BAD_HIGH_SLOT:
-               case -NFS4ERR_DEADSESSION:
-               case -NFS4ERR_SEQ_FALSE_RETRY:
-               case -NFS4ERR_SEQ_MISORDERED:
-                       set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
-                       /* Zero session reset errors */
-                       break;
-               case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
-                       set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
-                       break;
-               default:
-                       dprintk("%s: failed to handle error %d for server %s\n",
-                                       __func__, error, clp->cl_hostname);
-                       return error;
+       case 0:
+               break;
+       case -NFS4ERR_CB_PATH_DOWN:
+               nfs40_handle_cb_pathdown(clp);
+               break;
+       case -NFS4ERR_NO_GRACE:
+               nfs4_state_end_reclaim_reboot(clp);
+               break;
+       case -NFS4ERR_STALE_CLIENTID:
+               set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+               nfs4_state_start_reclaim_reboot(clp);
+               break;
+       case -NFS4ERR_EXPIRED:
+               set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+               nfs4_state_start_reclaim_nograce(clp);
+               break;
+       case -NFS4ERR_BADSESSION:
+       case -NFS4ERR_BADSLOT:
+       case -NFS4ERR_BAD_HIGH_SLOT:
+       case -NFS4ERR_DEADSESSION:
+       case -NFS4ERR_SEQ_FALSE_RETRY:
+       case -NFS4ERR_SEQ_MISORDERED:
+               set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+               /* Zero session reset errors */
+               break;
+       case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+               set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+               break;
+       default:
+               dprintk("%s: failed to handle error %d for server %s\n",
+                               __func__, error, clp->cl_hostname);
+               return error;
         }
         dprintk("%s: handled error %d for server %s\n", __func__, error,
                         clp->cl_hostname);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c

index b7bde12..2fc8f6f 100644 (file)
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3516,7 +3516,7 @@ static int decode_attr_exclcreat_supported(struct xdr_stream *xdr,
  static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh)
  {
         __be32 *p;
-       int len;
+       u32 len;
  
         if (fh != NULL)
                 memset(fh, 0, sizeof(*fh));
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c

index bb5476a..5c4568a 100644 (file)
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -63,14 +63,14 @@ EXPORT_SYMBOL_GPL(nfs_pgheader_init);
  
  void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
  {
-       spin_lock(&hdr->lock);
-       if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags)
-           || pos < hdr->io_start + hdr->good_bytes) {
+       unsigned int new = pos - hdr->io_start;
+
+       if (hdr->good_bytes > new) {
+               hdr->good_bytes = new;
                 clear_bit(NFS_IOHDR_EOF, &hdr->flags);
-               hdr->good_bytes = pos - hdr->io_start;
-               hdr->error = error;
+               if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags))
+                       hdr->error = error;
         }
-       spin_unlock(&hdr->lock);
  }
  
  static inline struct nfs_page *
@@ -494,7 +494,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
  
         if (hdr) {
                 INIT_LIST_HEAD(&hdr->pages);
-               spin_lock_init(&hdr->lock);
                 hdr->rw_ops = ops;
         }
         return hdr;
@@ -1111,6 +1110,20 @@ static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc,
         return ret;
  }
  
+static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc)
+{
+       u32 midx;
+       struct nfs_pgio_mirror *mirror;
+
+       if (!desc->pg_error)
+               return;
+
+       for (midx = 0; midx < desc->pg_mirror_count; midx++) {
+               mirror = &desc->pg_mirrors[midx];
+               desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
+       }
+}
+
  int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                            struct nfs_page *req)
  {
@@ -1161,25 +1174,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
         return 1;
  
  out_failed:
-       /*
-        * We might have failed before sending any reqs over wire.
-        * Clean up rest of the reqs in mirror pg_list.
-        */
-       if (desc->pg_error) {
-               struct nfs_pgio_mirror *mirror;
-               void (*func)(struct list_head *);
-
-               /* remember fatal errors */
-               if (nfs_error_is_fatal(desc->pg_error))
-                       nfs_context_set_write_error(req->wb_context,
-                                                   desc->pg_error);
-
-               func = desc->pg_completion_ops->error_cleanup;
-               for (midx = 0; midx < desc->pg_mirror_count; midx++) {
-                       mirror = &desc->pg_mirrors[midx];
-                       func(&mirror->pg_list);
-               }
-       }
+       nfs_pageio_error_cleanup(desc);
         return 0;
  }
  
@@ -1251,6 +1246,8 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
         for (midx = 0; midx < desc->pg_mirror_count; midx++)
                 nfs_pageio_complete_mirror(desc, midx);
  
+       if (desc->pg_error < 0)
+               nfs_pageio_error_cleanup(desc);
         if (desc->pg_ops->pg_cleanup)
                 desc->pg_ops->pg_cleanup(desc);
         nfs_pageio_cleanup_mirroring(desc);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c

index 7d9a51e..06cb90e 100644 (file)
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -965,7 +965,7 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
         struct page **pages;
         int i;
  
-       pages = kcalloc(size, sizeof(struct page *), gfp_flags);
+       pages = kmalloc_array(size, sizeof(struct page *), gfp_flags);
         if (!pages) {
                 dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
                 return NULL;
@@ -975,7 +975,7 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
                 pages[i] = alloc_page(gfp_flags);
                 if (!pages[i]) {
                         dprintk("%s: failed to allocate page\n", __func__);
-                       nfs4_free_pages(pages, size);
+                       nfs4_free_pages(pages, i);
                         return NULL;
                 }
         }
@@ -991,6 +991,7 @@ pnfs_alloc_init_layoutget_args(struct inode *ino,
            gfp_t gfp_flags)
  {
         struct nfs_server *server = pnfs_find_server(ino, ctx);
+       size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response;
         size_t max_pages = max_response_pages(server);
         struct nfs4_layoutget *lgp;
  
@@ -1000,6 +1001,12 @@ pnfs_alloc_init_layoutget_args(struct inode *ino,
         if (lgp == NULL)
                 return NULL;
  
+       if (max_reply_sz) {
+               size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+               if (npages < max_pages)
+                       max_pages = npages;
+       }
+
         lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
         if (!lgp->args.layout.pages) {
                 kfree(lgp);
@@ -1332,6 +1339,7 @@ bool pnfs_roc(struct inode *ino,
         if (!nfs_have_layout(ino))
                 return false;
  retry:
+       rcu_read_lock();
         spin_lock(&ino->i_lock);
         lo = nfsi->layout;
         if (!lo || !pnfs_layout_is_valid(lo) ||
@@ -1342,6 +1350,7 @@ retry:
         pnfs_get_layout_hdr(lo);
         if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
                 spin_unlock(&ino->i_lock);
+               rcu_read_unlock();
                 wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
                                 TASK_UNINTERRUPTIBLE);
                 pnfs_put_layout_hdr(lo);
@@ -1355,7 +1364,7 @@ retry:
                 skip_read = true;
         }
  
-       list_for_each_entry(ctx, &nfsi->open_files, list) {
+       list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
                 state = ctx->state;
                 if (state == NULL)
                         continue;
@@ -1403,6 +1412,7 @@ retry:
  
  out_noroc:
         spin_unlock(&ino->i_lock);
+       rcu_read_unlock();
         pnfs_layoutcommit_inode(ino, true);
         if (roc) {
                 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h

index ece367e..e2e9fcd 100644 (file)
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -125,6 +125,7 @@ struct pnfs_layoutdriver_type {
         struct module *owner;
         unsigned flags;
         unsigned max_deviceinfo_size;
+       unsigned max_layoutget_response;
  
         int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *);
         int (*clear_layoutdriver) (struct nfs_server *);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c

index 48d7277..f9f1978 100644 (file)
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -276,16 +276,14 @@ static void nfs_readpage_result(struct rpc_task *task,
                                 struct nfs_pgio_header *hdr)
  {
         if (hdr->res.eof) {
-               loff_t bound;
+               loff_t pos = hdr->args.offset + hdr->res.count;
+               unsigned int new = pos - hdr->io_start;
  
-               bound = hdr->args.offset + hdr->res.count;
-               spin_lock(&hdr->lock);
-               if (bound < hdr->io_start + hdr->good_bytes) {
+               if (hdr->good_bytes > new) {
+                       hdr->good_bytes = new;
                         set_bit(NFS_IOHDR_EOF, &hdr->flags);
                         clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
-                       hdr->good_bytes = bound - hdr->io_start;
                 }
-               spin_unlock(&hdr->lock);
         } else if (hdr->res.count < hdr->args.count)
                 nfs_readpage_retry(task, hdr);
  }
diff --git a/include/dt-bindings/clock/exynos3250.h b/include/dt-bindings/clock/exynos3250.h

index c796ff0..fe82140 100644 (file)
--- a/include/dt-bindings/clock/exynos3250.h
+++ b/include/dt-bindings/clock/exynos3250.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2014 Samsung Electronics Co., Ltd.
   *     Author: Tomasz Figa <t.figa@samsung.com>
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
   * Device Tree binding constants for Samsung Exynos3250 clock controllers.
   */
  
diff --git a/include/dt-bindings/clock/exynos4.h b/include/dt-bindings/clock/exynos4.h

index e9f9d40..5b1d685 100644 (file)
--- a/include/dt-bindings/clock/exynos4.h
+++ b/include/dt-bindings/clock/exynos4.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2013 Samsung Electronics Co., Ltd.
   * Author: Andrzej Hajda <a.hajda@samsung.com>
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
   * Device Tree binding constants for Exynos4 clock controller.
-*/
+ */
  
  #ifndef _DT_BINDINGS_CLOCK_EXYNOS_4_H
  #define _DT_BINDINGS_CLOCK_EXYNOS_4_H
diff --git a/include/dt-bindings/clock/exynos5250.h b/include/dt-bindings/clock/exynos5250.h

index 15508ad..bc8a3c5 100644 (file)
--- a/include/dt-bindings/clock/exynos5250.h
+++ b/include/dt-bindings/clock/exynos5250.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2013 Samsung Electronics Co., Ltd.
   * Author: Andrzej Hajda <a.hajda@samsung.com>
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
   * Device Tree binding constants for Exynos5250 clock controller.
-*/
+ */
  
  #ifndef _DT_BINDINGS_CLOCK_EXYNOS_5250_H
  #define _DT_BINDINGS_CLOCK_EXYNOS_5250_H
diff --git a/include/dt-bindings/clock/exynos5260-clk.h b/include/dt-bindings/clock/exynos5260-clk.h

index a4bac9a..98a58cb 100644 (file)
--- a/include/dt-bindings/clock/exynos5260-clk.h
+++ b/include/dt-bindings/clock/exynos5260-clk.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2014 Samsung Electronics Co., Ltd.
   * Author: Rahul Sharma <rahul.sharma@samsung.com>
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
   * Provides Constants for Exynos5260 clocks.
-*/
+ */
  
  #ifndef _DT_BINDINGS_CLK_EXYNOS5260_H
  #define _DT_BINDINGS_CLK_EXYNOS5260_H
diff --git a/include/dt-bindings/clock/exynos5410.h b/include/dt-bindings/clock/exynos5410.h

index 6cb4e90..f179eab 100644 (file)
--- a/include/dt-bindings/clock/exynos5410.h
+++ b/include/dt-bindings/clock/exynos5410.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2014 Samsung Electronics Co., Ltd.
   * Copyright (c) 2016 Krzysztof Kozlowski
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
   * Device Tree binding constants for Exynos5421 clock controller.
-*/
+ */
  
  #ifndef _DT_BINDINGS_CLOCK_EXYNOS_5410_H
  #define _DT_BINDINGS_CLOCK_EXYNOS_5410_H
diff --git a/include/dt-bindings/clock/exynos5420.h b/include/dt-bindings/clock/exynos5420.h

index 2740ae0..355f469 100644 (file)
--- a/include/dt-bindings/clock/exynos5420.h
+++ b/include/dt-bindings/clock/exynos5420.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2013 Samsung Electronics Co., Ltd.
   * Author: Andrzej Hajda <a.hajda@samsung.com>
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
   * Device Tree binding constants for Exynos5420 clock controller.
-*/
+ */
  
  #ifndef _DT_BINDINGS_CLOCK_EXYNOS_5420_H
  #define _DT_BINDINGS_CLOCK_EXYNOS_5420_H
diff --git a/include/dt-bindings/clock/exynos5433.h b/include/dt-bindings/clock/exynos5433.h

index be39d23..98bd85c 100644 (file)
--- a/include/dt-bindings/clock/exynos5433.h
+++ b/include/dt-bindings/clock/exynos5433.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2014 Samsung Electronics Co., Ltd.
   * Author: Chanwoo Choi <cw00.choi@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
   */
  
  #ifndef _DT_BINDINGS_CLOCK_EXYNOS5433_H
diff --git a/include/dt-bindings/clock/exynos7-clk.h b/include/dt-bindings/clock/exynos7-clk.h

index 10c5586..fce33c7 100644 (file)
--- a/include/dt-bindings/clock/exynos7-clk.h
+++ b/include/dt-bindings/clock/exynos7-clk.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2014 Samsung Electronics Co., Ltd.
   * Author: Naveen Krishna Ch <naveenkrishna.ch@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
+ */
  
  #ifndef _DT_BINDINGS_CLOCK_EXYNOS7_H
  #define _DT_BINDINGS_CLOCK_EXYNOS7_H
diff --git a/include/dt-bindings/clock/s3c2410.h b/include/dt-bindings/clock/s3c2410.h

index 352a767..0fb65c3 100644 (file)
--- a/include/dt-bindings/clock/s3c2410.h
+++ b/include/dt-bindings/clock/s3c2410.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
   * Device Tree binding constants clock controllers of Samsung S3C2410 and later.
   */
  
diff --git a/include/dt-bindings/clock/s3c2412.h b/include/dt-bindings/clock/s3c2412.h

index aac1dcf..b465615 100644 (file)
--- a/include/dt-bindings/clock/s3c2412.h
+++ b/include/dt-bindings/clock/s3c2412.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
   * Device Tree binding constants clock controllers of Samsung S3C2412.
   */
  
diff --git a/include/dt-bindings/clock/s3c2443.h b/include/dt-bindings/clock/s3c2443.h

index f3ba68a..a9d2f10 100644 (file)
--- a/include/dt-bindings/clock/s3c2443.h
+++ b/include/dt-bindings/clock/s3c2443.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
  /*
   * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
   * Device Tree binding constants clock controllers of Samsung S3C2443 and later.
   */
  
diff --git a/include/dt-bindings/interrupt-controller/arm-gic.h b/include/dt-bindings/interrupt-controller/arm-gic.h

index 0c85f65..35b6f69 100644 (file)
--- a/include/dt-bindings/interrupt-controller/arm-gic.h
+++ b/include/dt-bindings/interrupt-controller/arm-gic.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
  /*
   * This header provides constants for the ARM GIC.
   */
diff --git a/include/dt-bindings/interrupt-controller/irq.h b/include/dt-bindings/interrupt-controller/irq.h

index a8b3105..9e3d183 100644 (file)
--- a/include/dt-bindings/interrupt-controller/irq.h
+++ b/include/dt-bindings/interrupt-controller/irq.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
  /*
   * This header provides constants for most IRQ bindings.
   *
diff --git a/include/dt-bindings/thermal/thermal_exynos.h b/include/dt-bindings/thermal/thermal_exynos.h

index 0646500..642e4e7 100644 (file)
--- a/include/dt-bindings/thermal/thermal_exynos.h
+++ b/include/dt-bindings/thermal/thermal_exynos.h
@@ -1,19 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
  /*
   * thermal_exynos.h - Samsung EXYNOS TMU device tree definitions
   *
   *  Copyright (C) 2014 Samsung Electronics
   *  Lukasz Majewski <l.majewski@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
   */
  
  #ifndef _EXYNOS_THERMAL_TMU_DT_H
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index 9578c7a..093a818 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -283,8 +283,6 @@ enum req_opf {
         REQ_OP_FLUSH            = 2,
         /* discard sectors */
         REQ_OP_DISCARD          = 3,
-       /* get zone information */
-       REQ_OP_ZONE_REPORT      = 4,
         /* securely erase sectors */
         REQ_OP_SECURE_ERASE     = 5,
         /* seset a zone write pointer */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 7d42372..4293dc1 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -396,16 +396,13 @@ struct queue_limits {
  
  #ifdef CONFIG_BLK_DEV_ZONED
  
-struct blk_zone_report_hdr {
-       unsigned int    nr_zones;
-       u8              padding[60];
-};
-
+extern unsigned int blkdev_nr_zones(struct block_device *bdev);
  extern int blkdev_report_zones(struct block_device *bdev,
                                sector_t sector, struct blk_zone *zones,
                                unsigned int *nr_zones, gfp_t gfp_mask);
  extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
                               sector_t nr_sectors, gfp_t gfp_mask);
+extern int blk_revalidate_disk_zones(struct gendisk *disk);
  
  extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
                                      unsigned int cmd, unsigned long arg);
@@ -414,6 +411,16 @@ extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
  
  #else /* CONFIG_BLK_DEV_ZONED */
  
+static inline unsigned int blkdev_nr_zones(struct block_device *bdev)
+{
+       return 0;
+}
+
+static inline int blk_revalidate_disk_zones(struct gendisk *disk)
+{
+       return 0;
+}
+
  static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
                                             fmode_t mode, unsigned int cmd,
                                             unsigned long arg)
@@ -806,6 +813,11 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
  }
  
  #ifdef CONFIG_BLK_DEV_ZONED
+static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+{
+       return blk_queue_is_zoned(q) ? q->nr_zones : 0;
+}
+
  static inline unsigned int blk_queue_zone_no(struct request_queue *q,
                                              sector_t sector)
  {
@@ -821,6 +833,11 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
                 return false;
         return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
  }
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+{
+       return 0;
+}
  #endif /* CONFIG_BLK_DEV_ZONED */
  
  static inline bool rq_is_sync(struct request *rq)
@@ -1852,6 +1869,9 @@ struct block_device_operations {
         int (*getgeo)(struct block_device *, struct hd_geometry *);
         /* this callback is with swap_lock and sometimes page table lock held */
         void (*swap_slot_free_notify) (struct block_device *, unsigned long);
+       int (*report_zones)(struct gendisk *, sector_t sector,
+                           struct blk_zone *zones, unsigned int *nr_zones,
+                           gfp_t gfp_mask);
         struct module *owner;
         const struct pr_ops *pr_ops;
  };
diff --git a/include/linux/cuda.h b/include/linux/cuda.h

index 056867f..45bfe9d 100644 (file)
--- a/include/linux/cuda.h
+++ b/include/linux/cuda.h
@@ -8,6 +8,7 @@
  #ifndef _LINUX_CUDA_H
  #define _LINUX_CUDA_H
  
+#include <linux/rtc.h>
  #include <uapi/linux/cuda.h>
  
  
@@ -16,4 +17,7 @@ extern int cuda_request(struct adb_request *req,
                         void (*done)(struct adb_request *), int nbytes, ...);
  extern void cuda_poll(void);
  
+extern time64_t cuda_get_time(void);
+extern int cuda_set_rtc_time(struct rtc_time *tm);
+
  #endif /* _LINUX_CUDA_H */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h

index 6fb0808..e528bae 100644 (file)
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -26,9 +26,8 @@ enum dm_queue_mode {
         DM_TYPE_NONE             = 0,
         DM_TYPE_BIO_BASED        = 1,
         DM_TYPE_REQUEST_BASED    = 2,
-       DM_TYPE_MQ_REQUEST_BASED = 3,
-       DM_TYPE_DAX_BIO_BASED    = 4,
-       DM_TYPE_NVME_BIO_BASED   = 5,
+       DM_TYPE_DAX_BIO_BASED    = 3,
+       DM_TYPE_NVME_BIO_BASED   = 4,
  };
  
  typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
@@ -92,6 +91,11 @@ typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv,
  
  typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev);
  
+typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector,
+                                  struct blk_zone *zones,
+                                  unsigned int *nr_zones,
+                                  gfp_t gfp_mask);
+
  /*
   * These iteration functions are typically used to check (and combine)
   * properties of underlying devices.
@@ -180,6 +184,9 @@ struct target_type {
         dm_status_fn status;
         dm_message_fn message;
         dm_prepare_ioctl_fn prepare_ioctl;
+#ifdef CONFIG_BLK_DEV_ZONED
+       dm_report_zones_fn report_zones;
+#endif
         dm_busy_fn busy;
         dm_iterate_devices_fn iterate_devices;
         dm_io_hints_fn io_hints;
@@ -420,8 +427,8 @@ struct gendisk *dm_disk(struct mapped_device *md);
  int dm_suspended(struct dm_target *ti);
  int dm_noflush_suspending(struct dm_target *ti);
  void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
-void dm_remap_zone_report(struct dm_target *ti, struct bio *bio,
-                         sector_t start);
+void dm_remap_zone_report(struct dm_target *ti, sector_t start,
+                         struct blk_zone *zones, unsigned int *nr_zones);
  union map_info *dm_get_rq_mapinfo(struct request *rq);
  
  struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
@@ -490,6 +497,7 @@ sector_t dm_table_get_size(struct dm_table *t);
  unsigned int dm_table_get_num_targets(struct dm_table *t);
  fmode_t dm_table_get_mode(struct dm_table *t);
  struct mapped_device *dm_table_get_md(struct dm_table *t);
+const char *dm_table_device_name(struct dm_table *t);
  
  /*
   * Trigger an event.
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h

index fbca184..bd73e7a 100644 (file)
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,6 +5,8 @@
  #include <linux/dma-mapping.h>
  #include <linux/mem_encrypt.h>
  
+#define DIRECT_MAPPING_ERROR           0
+
  #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
  #include <asm/dma-direct.h>
  #else
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h

index 5160f06..9d3f668 100644 (file)
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -351,6 +351,14 @@ int mc_send_command(struct fsl_mc_io *mc_io, struct fsl_mc_command *cmd);
  #define dev_is_fsl_mc(_dev) (0)
  #endif
  
+/* Macro to check if a device is a container device */
+#define fsl_mc_is_cont_dev(_dev) (to_fsl_mc_device(_dev)->flags & \
+       FSL_MC_IS_DPRC)
+
+/* Macro to get the container device of a MC device */
+#define fsl_mc_cont_dev(_dev) (fsl_mc_is_cont_dev(_dev) ? \
+       (_dev) : (_dev)->parent)
+
  /*
   * module_fsl_mc_driver() - Helper macro for drivers that don't do
   * anything special in module init/exit.  This eliminates a lot of
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h

index 28004d7..b0ae258 100644 (file)
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -72,6 +72,42 @@
  #define        DMAR_PEDATA_REG 0xe4    /* Page request event interrupt data register */
  #define        DMAR_PEADDR_REG 0xe8    /* Page request event interrupt addr register */
  #define        DMAR_PEUADDR_REG 0xec   /* Page request event Upper address register */
+#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */
+#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */
+#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */
+#define DMAR_MTRR_FIX16K_80000_REG 0x128
+#define DMAR_MTRR_FIX16K_A0000_REG 0x130
+#define DMAR_MTRR_FIX4K_C0000_REG 0x138
+#define DMAR_MTRR_FIX4K_C8000_REG 0x140
+#define DMAR_MTRR_FIX4K_D0000_REG 0x148
+#define DMAR_MTRR_FIX4K_D8000_REG 0x150
+#define DMAR_MTRR_FIX4K_E0000_REG 0x158
+#define DMAR_MTRR_FIX4K_E8000_REG 0x160
+#define DMAR_MTRR_FIX4K_F0000_REG 0x168
+#define DMAR_MTRR_FIX4K_F8000_REG 0x170
+#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */
+#define DMAR_MTRR_PHYSMASK0_REG 0x188
+#define DMAR_MTRR_PHYSBASE1_REG 0x190
+#define DMAR_MTRR_PHYSMASK1_REG 0x198
+#define DMAR_MTRR_PHYSBASE2_REG 0x1a0
+#define DMAR_MTRR_PHYSMASK2_REG 0x1a8
+#define DMAR_MTRR_PHYSBASE3_REG 0x1b0
+#define DMAR_MTRR_PHYSMASK3_REG 0x1b8
+#define DMAR_MTRR_PHYSBASE4_REG 0x1c0
+#define DMAR_MTRR_PHYSMASK4_REG 0x1c8
+#define DMAR_MTRR_PHYSBASE5_REG 0x1d0
+#define DMAR_MTRR_PHYSMASK5_REG 0x1d8
+#define DMAR_MTRR_PHYSBASE6_REG 0x1e0
+#define DMAR_MTRR_PHYSMASK6_REG 0x1e8
+#define DMAR_MTRR_PHYSBASE7_REG 0x1f0
+#define DMAR_MTRR_PHYSMASK7_REG 0x1f8
+#define DMAR_MTRR_PHYSBASE8_REG 0x200
+#define DMAR_MTRR_PHYSMASK8_REG 0x208
+#define DMAR_MTRR_PHYSBASE9_REG 0x210
+#define DMAR_MTRR_PHYSMASK9_REG 0x218
+#define DMAR_VCCAP_REG         0xe00 /* Virtual command capability register */
+#define DMAR_VCMD_REG          0xe10 /* Virtual command register */
+#define DMAR_VCRSP_REG         0xe20 /* Virtual command response register */
  
  #define OFFSET_STRIDE          (9)
  
@@ -389,6 +425,33 @@ struct pasid_entry;
  struct pasid_state_entry;
  struct page_req_dsc;
  
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+       u64     lo;
+       u64     hi;
+};
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+       u64 lo;
+       u64 hi;
+};
+
  struct dmar_domain {
         int     nid;                    /* node id */
  
@@ -558,6 +621,15 @@ extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_
  extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
  #endif
  
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+void intel_iommu_debugfs_init(void);
+#else
+static inline void intel_iommu_debugfs_init(void) {}
+#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
+
  extern const struct attribute_group *intel_iommu_groups[];
+bool context_present(struct context_entry *context);
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+                                        u8 devfn, int alloc);
  
  #endif
diff --git a/include/linux/iommu.h b/include/linux/iommu.h

index 87994c2..a1d28f4 100644 (file)
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -124,6 +124,7 @@ enum iommu_attr {
         DOMAIN_ATTR_FSL_PAMU_ENABLE,
         DOMAIN_ATTR_FSL_PAMUV1,
         DOMAIN_ATTR_NESTING,    /* two stages of translation */
+       DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
         DOMAIN_ATTR_MAX,
  };
  
@@ -181,8 +182,6 @@ struct iommu_resv_region {
   * @apply_resv_region: Temporary helper call-back for iova reserved ranges
   * @domain_window_enable: Configure and enable a particular window for a domain
   * @domain_window_disable: Disable a particular window for a domain
- * @domain_set_windows: Set the number of windows for a domain
- * @domain_get_windows: Return the number of windows for a domain
   * @of_xlate: add OF master IDs to iommu grouping
   * @pgsize_bitmap: bitmap of all possible supported page sizes
   */
@@ -223,10 +222,6 @@ struct iommu_ops {
         int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
                                     phys_addr_t paddr, u64 size, int prot);
         void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-       /* Set the number of windows per domain */
-       int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-       /* Get the number of windows per domain */
-       u32 (*domain_get_windows)(struct iommu_domain *domain);
  
         int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
         bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
@@ -293,6 +288,7 @@ extern int iommu_attach_device(struct iommu_domain *domain,
  extern void iommu_detach_device(struct iommu_domain *domain,
                                 struct device *dev);
  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
+extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
                      phys_addr_t paddr, size_t size, int prot);
  extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
@@ -377,6 +373,8 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain)
  extern struct iommu_group *pci_device_group(struct device *dev);
  /* Generic device grouping function */
  extern struct iommu_group *generic_device_group(struct device *dev);
+/* FSL-MC device grouping function */
+struct iommu_group *fsl_mc_device_group(struct device *dev);
  
  /**
   * struct iommu_fwspec - per-device IOMMU instance data
diff --git a/include/linux/iova.h b/include/linux/iova.h

index 928442d..0b93bf9 100644 (file)
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -75,6 +75,7 @@ struct iova_domain {
         unsigned long   granule;        /* pfn granularity for this domain */
         unsigned long   start_pfn;      /* Lower limit for this domain */
         unsigned long   dma_32bit_pfn;
+       unsigned long   max32_alloc_size; /* Size of last failed allocation */
         struct iova     anchor;         /* rbtree lookup anchor */
         struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];  /* IOVA range caches */
  
diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h

index c6ac1fe..edb0f0c 100644 (file)
--- a/include/linux/libfdt_env.h
+++ b/include/linux/libfdt_env.h
@@ -2,6 +2,7 @@
  #ifndef LIBFDT_ENV_H
  #define LIBFDT_ENV_H
  
+#include <linux/kernel.h>      /* For INT_MAX */
  #include <linux/string.h>
  
  #include <asm/byteorder.h>
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h

index a0831e9..6e0417c 100644 (file)
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -62,6 +62,7 @@ struct nfs_lock_context {
         struct nfs_open_context *open_context;
         fl_owner_t lockowner;
         atomic_t io_count;
+       struct rcu_head rcu_head;
  };
  
  struct nfs4_state;
@@ -82,6 +83,7 @@ struct nfs_open_context {
  
         struct list_head list;
         struct nfs4_threshold   *mdsthreshold;
+       struct rcu_head rcu_head;
  };
  
  struct nfs_open_dir_context {
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h

index bf39d9c..0fc0b91 100644 (file)
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -228,6 +228,9 @@ struct nfs_server {
         unsigned short          mountd_port;
         unsigned short          mountd_protocol;
         struct rpc_wait_queue   uoc_rpcwaitq;
+
+       /* XDR related information */
+       unsigned int            read_hdrsize;
  };
  
  /* Server capabilities */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h

index bd1c889..0e01625 100644 (file)
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -608,8 +608,13 @@ struct nfs_pgio_args {
         __u32                   count;
         unsigned int            pgbase;
         struct page **          pages;
-       const u32 *             bitmask;        /* used by write */
-       enum nfs3_stable_how    stable;         /* used by write */
+       union {
+               unsigned int            replen;                 /* used by read */
+               struct {
+                       const u32 *             bitmask;        /* used by write */
+                       enum nfs3_stable_how    stable;         /* used by write */
+               };
+       };
  };
  
  struct nfs_pgio_res {
@@ -617,10 +622,16 @@ struct nfs_pgio_res {
         struct nfs_fattr *      fattr;
         __u32                   count;
         __u32                   op_status;
-       int                     eof;            /* used by read */
-       struct nfs_writeverf *  verf;           /* used by write */
-       const struct nfs_server *server;        /* used by write */
-
+       union {
+               struct {
+                       unsigned int            replen;         /* used by read */
+                       int                     eof;            /* used by read */
+               };
+               struct {
+                       struct nfs_writeverf *  verf;           /* used by write */
+                       const struct nfs_server *server;        /* used by write */
+               };
+       };
  };
  
  /*
@@ -1471,11 +1482,10 @@ struct nfs_pgio_header {
         const struct nfs_rw_ops *rw_ops;
         struct nfs_io_completion *io_completion;
         struct nfs_direct_req   *dreq;
-       spinlock_t              lock;
-       /* fields protected by lock */
+
         int                     pnfs_error;
         int                     error;          /* merge with pnfs_error */
-       unsigned long           good_bytes;     /* boundary of good data */
+       unsigned int            good_bytes;     /* boundary of good data */
         unsigned long           flags;
  
         /*
diff --git a/include/linux/of.h b/include/linux/of.h

index 99b0ebf..ab96025 100644 (file)
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -247,12 +247,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
  #include <asm/prom.h>
  #endif
  
-/* Default #address and #size cells.  Allow arch asm/prom.h to override */
-#if !defined(OF_ROOT_NODE_ADDR_CELLS_DEFAULT)
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 1
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
-#endif
-
  #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
  #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
  
@@ -353,6 +347,8 @@ extern const void *of_get_property(const struct device_node *node,
                                 const char *name,
                                 int *lenp);
  extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
+extern struct device_node *of_get_next_cpu_node(struct device_node *prev);
+
  #define for_each_property_of_node(dn, pp) \
         for (pp = dn->properties; pp != NULL; pp = pp->next)
  
@@ -550,6 +546,10 @@ bool of_console_check(struct device_node *dn, char *name, int index);
  
  extern int of_cpu_node_to_id(struct device_node *np);
  
+int of_map_rid(struct device_node *np, u32 rid,
+              const char *map_name, const char *map_mask_name,
+              struct device_node **target, u32 *id_out);
+
  #else /* CONFIG_OF */
  
  static inline void of_core_init(void)
@@ -754,6 +754,11 @@ static inline struct device_node *of_get_cpu_node(int cpu,
         return NULL;
  }
  
+static inline struct device_node *of_get_next_cpu_node(struct device_node *prev)
+{
+       return NULL;
+}
+
  static inline int of_n_addr_cells(struct device_node *np)
  {
         return 0;
@@ -952,6 +957,13 @@ static inline int of_cpu_node_to_id(struct device_node *np)
         return -ENODEV;
  }
  
+static inline int of_map_rid(struct device_node *np, u32 rid,
+                            const char *map_name, const char *map_mask_name,
+                            struct device_node **target, u32 *id_out)
+{
+       return -EINVAL;
+}
+
  #define of_match_ptr(_ptr)     NULL
  #define of_match_node(_matches, _node) NULL
  #endif /* CONFIG_OF */
@@ -990,7 +1002,7 @@ static inline struct device_node *of_find_matching_node(
  
  static inline const char *of_node_get_device_type(const struct device_node *np)
  {
-       return of_get_property(np, "type", NULL);
+       return of_get_property(np, "device_type", NULL);
  }
  
  static inline bool of_node_is_type(const struct device_node *np, const char *type)
@@ -1217,6 +1229,10 @@ static inline int of_property_read_s32(const struct device_node *np,
         for (child = of_get_next_available_child(parent, NULL); child != NULL; \
              child = of_get_next_available_child(parent, child))
  
+#define for_each_of_cpu_node(cpu) \
+       for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \
+            cpu = of_get_next_cpu_node(cpu))
+
  #define for_each_node_with_property(dn, prop_name) \
         for (dn = of_find_node_with_property(NULL, prop_name); dn; \
              dn = of_find_node_with_property(dn, prop_name))
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h

index e83d87f..21a89c4 100644 (file)
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -14,9 +14,6 @@ struct device_node *of_pci_find_child_device(struct device_node *parent,
                                              unsigned int devfn);
  int of_pci_get_devfn(struct device_node *np);
  void of_pci_check_probe_only(void);
-int of_pci_map_rid(struct device_node *np, u32 rid,
-                  const char *map_name, const char *map_mask_name,
-                  struct device_node **target, u32 *id_out);
  #else
  static inline struct device_node *of_pci_find_child_device(struct device_node *parent,
                                              unsigned int devfn)
@@ -29,13 +26,6 @@ static inline int of_pci_get_devfn(struct device_node *np)
         return -EINVAL;
  }
  
-static inline int of_pci_map_rid(struct device_node *np, u32 rid,
-                       const char *map_name, const char *map_mask_name,
-                       struct device_node **target, u32 *id_out)
-{
-       return -EINVAL;
-}
-
  static inline void of_pci_check_probe_only(void) { }
  #endif
  
diff --git a/include/linux/pmu.h b/include/linux/pmu.h

index 9ac8fc6..52453a2 100644 (file)
--- a/include/linux/pmu.h
+++ b/include/linux/pmu.h
@@ -9,6 +9,7 @@
  #ifndef _LINUX_PMU_H
  #define _LINUX_PMU_H
  
+#include <linux/rtc.h>
  #include <uapi/linux/pmu.h>
  
  
@@ -36,6 +37,9 @@ static inline void pmu_resume(void)
  
  extern void pmu_enable_irled(int on);
  
+extern time64_t pmu_get_time(void);
+extern int pmu_set_rtc_time(struct rtc_time *tm);
+
  extern void pmu_restart(void);
  extern void pmu_shutdown(void);
  extern void pmu_unlock(void);
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h

index 58a6765..c4db942 100644 (file)
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -67,7 +67,7 @@ struct rpc_cred {
         const struct rpc_credops *cr_ops;
         unsigned long           cr_expire;      /* when to gc */
         unsigned long           cr_flags;       /* various flags */
-       atomic_t                cr_count;       /* ref count */
+       refcount_t              cr_count;       /* ref count */
  
         kuid_t                  cr_uid;
  
@@ -100,7 +100,7 @@ struct rpc_auth {
                                                  * differ from the flavor in
                                                  * au_ops->au_flavor in gss
                                                  * case) */
-       atomic_t                au_count;       /* Reference counter */
+       refcount_t              au_count;       /* Reference counter */
  
         struct rpc_cred_cache * au_credcache;
         /* per-flavor data */
@@ -157,6 +157,7 @@ struct rpc_credops {
         int                     (*crkey_timeout)(struct rpc_cred *);
         bool                    (*crkey_to_expire)(struct rpc_cred *);
         char *                  (*crstringify_acceptor)(struct rpc_cred *);
+       bool                    (*crneed_reencode)(struct rpc_task *);
  };
  
  extern const struct rpc_authops        authunix_ops;
@@ -192,6 +193,7 @@ __be32 *            rpcauth_marshcred(struct rpc_task *, __be32 *);
  __be32 *               rpcauth_checkverf(struct rpc_task *, __be32 *);
  int                    rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
  int                    rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
+bool                   rpcauth_xmit_need_reencode(struct rpc_task *task);
  int                    rpcauth_refreshcred(struct rpc_task *);
  void                   rpcauth_invalcred(struct rpc_task *);
  int                    rpcauth_uptodatecred(struct rpc_task *);
@@ -204,11 +206,11 @@ bool                      rpcauth_cred_key_to_expire(struct rpc_auth *, struct rpc_cred *);
  char *                 rpcauth_stringify_acceptor(struct rpc_cred *);
  
  static inline
-struct rpc_cred *      get_rpccred(struct rpc_cred *cred)
+struct rpc_cred *get_rpccred(struct rpc_cred *cred)
  {
-       if (cred != NULL)
-               atomic_inc(&cred->cr_count);
-       return cred;
+       if (cred != NULL && refcount_inc_not_zero(&cred->cr_count))
+               return cred;
+       return NULL;
  }
  
  /**
@@ -224,9 +226,7 @@ struct rpc_cred *   get_rpccred(struct rpc_cred *cred)
  static inline struct rpc_cred *
  get_rpccred_rcu(struct rpc_cred *cred)
  {
-       if (atomic_inc_not_zero(&cred->cr_count))
-               return cred;
-       return NULL;
+       return get_rpccred(cred);
  }
  
  #endif /* __KERNEL__ */
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h

index 0c9eac3..30427b7 100644 (file)
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -70,6 +70,7 @@ struct gss_cl_ctx {
         refcount_t              count;
         enum rpc_gss_proc       gc_proc;
         u32                     gc_seq;
+       u32                     gc_seq_xmit;
         spinlock_t              gc_seq_lock;
         struct gss_ctx          *gc_gss_ctx;
         struct xdr_netobj       gc_wire_ctx;
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h

index 4397a48..28721cf 100644 (file)
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  #ifdef CONFIG_SUNRPC_BACKCHANNEL
  struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid);
  void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied);
+void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task);
  void xprt_free_bc_request(struct rpc_rqst *req);
  int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
  void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h

index f6e8cea..131424c 100644 (file)
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -118,7 +118,8 @@ struct krb5_ctx {
         u8                      acceptor_integ[GSS_KRB5_MAX_KEYLEN];
  };
  
-extern spinlock_t krb5_seq_lock;
+extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx);
+extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx);
  
  /* The length of the Kerberos GSS token header */
  #define GSS_KRB5_TOK_HDR_LEN   (16)
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h

index 592653b..7b540c0 100644 (file)
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -140,8 +140,9 @@ struct rpc_task_setup {
  #define RPC_TASK_RUNNING       0
  #define RPC_TASK_QUEUED                1
  #define RPC_TASK_ACTIVE                2
-#define RPC_TASK_MSG_RECV      3
-#define RPC_TASK_MSG_RECV_WAIT 4
+#define RPC_TASK_NEED_XMIT     3
+#define RPC_TASK_NEED_RECV     4
+#define RPC_TASK_MSG_PIN_WAIT  5
  
  #define RPC_IS_RUNNING(t)      test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
  #define rpc_set_running(t)     set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
@@ -188,7 +189,6 @@ struct rpc_timer {
  struct rpc_wait_queue {
         spinlock_t              lock;
         struct list_head        tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
-       pid_t                   owner;                  /* process id of last task serviced */
         unsigned char           maxpriority;            /* maximum priority (0 if queue is not a priority queue) */
         unsigned char           priority;               /* current priority */
         unsigned char           nr;                     /* # tasks remaining for cookie */
@@ -204,7 +204,6 @@ struct rpc_wait_queue {
   * from a single cookie.  The aim is to improve
   * performance of NFS operations such as read/write.
   */
-#define RPC_BATCH_COUNT                        16
  #define RPC_IS_PRIORITY(q)             ((q)->maxpriority > 0)
  
  /*
@@ -234,6 +233,9 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
                 struct rpc_task *task);
  void           rpc_wake_up_queued_task(struct rpc_wait_queue *,
                                         struct rpc_task *);
+void           rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *,
+                                                  struct rpc_task *,
+                                                  int);
  void           rpc_wake_up(struct rpc_wait_queue *);
  struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
  struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h

index c3d7206..6b7a86c 100644 (file)
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -84,7 +84,6 @@ struct svc_xprt {
         struct sockaddr_storage xpt_remote;     /* remote peer's address */
         size_t                  xpt_remotelen;  /* length of address */
         char                    xpt_remotebuf[INET6_ADDRSTRLEN + 10];
-       struct rpc_wait_queue   xpt_bc_pending; /* backchannel wait queue */
         struct list_head        xpt_users;      /* callbacks on free */
  
         struct net              *xpt_net;
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h

index 2bd6817..43106ff 100644 (file)
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -18,6 +18,7 @@
  #include <asm/unaligned.h>
  #include <linux/scatterlist.h>
  
+struct bio_vec;
  struct rpc_rqst;
  
  /*
@@ -52,12 +53,14 @@ struct xdr_buf {
         struct kvec     head[1],        /* RPC header + non-page data */
                         tail[1];        /* Appended after page data */
  
+       struct bio_vec  *bvec;
         struct page **  pages;          /* Array of pages */
         unsigned int    page_base,      /* Start of page data */
                         page_len,       /* Length of page data */
                         flags;          /* Flags for data disposition */
  #define XDRBUF_READ            0x01            /* target of file read */
  #define XDRBUF_WRITE           0x02            /* source of file write */
+#define XDRBUF_SPARSE_PAGES    0x04            /* Page array is sparse */
  
         unsigned int    buflen,         /* Total length of storage buffer */
                         len;            /* Length of XDR encoded message */
@@ -69,6 +72,8 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
         buf->head[0].iov_base = start;
         buf->head[0].iov_len = len;
         buf->tail[0].iov_len = 0;
+       buf->bvec = NULL;
+       buf->pages = NULL;
         buf->page_len = 0;
         buf->flags = 0;
         buf->len = 0;
@@ -115,6 +120,9 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *);
  void   xdr_inline_pages(struct xdr_buf *, unsigned int,
                          struct page **, unsigned int, unsigned int);
  void   xdr_terminate_string(struct xdr_buf *, const u32);
+size_t xdr_buf_pagecount(struct xdr_buf *buf);
+int    xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
+void   xdr_free_bvec(struct xdr_buf *buf);
  
  static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
  {
@@ -177,10 +185,7 @@ struct xdr_skb_reader {
  
  typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len);
  
-size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len);
  extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
-extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
-               struct xdr_skb_reader *, xdr_skb_read_actor);
  
  extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
  extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h

index 336fd1a..a4ab4f8 100644 (file)
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -82,7 +82,14 @@ struct rpc_rqst {
         struct page             **rq_enc_pages; /* scratch pages for use by
                                                    gss privacy code */
         void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
-       struct list_head        rq_list;
+
+       union {
+               struct list_head        rq_list;        /* Slot allocation list */
+               struct rb_node          rq_recv;        /* Receive queue */
+       };
+
+       struct list_head        rq_xmit;        /* Send queue */
+       struct list_head        rq_xmit2;       /* Send queue */
  
         void                    *rq_buffer;     /* Call XDR encode buffer */
         size_t                  rq_callsize;
@@ -103,6 +110,7 @@ struct rpc_rqst {
                                                 /* A cookie used to track the
                                                    state of the transport
                                                    connection */
+       atomic_t                rq_pin;
         
         /*
          * Partial send handling
@@ -133,7 +141,8 @@ struct rpc_xprt_ops {
         void            (*connect)(struct rpc_xprt *xprt, struct rpc_task *task);
         int             (*buf_alloc)(struct rpc_task *task);
         void            (*buf_free)(struct rpc_task *task);
-       int             (*send_request)(struct rpc_task *task);
+       void            (*prepare_request)(struct rpc_rqst *req);
+       int             (*send_request)(struct rpc_rqst *req);
         void            (*set_retrans_timeout)(struct rpc_task *task);
         void            (*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
         void            (*release_request)(struct rpc_task *task);
@@ -234,9 +243,12 @@ struct rpc_xprt {
          */
         spinlock_t              transport_lock; /* lock transport info */
         spinlock_t              reserve_lock;   /* lock slot table */
-       spinlock_t              recv_lock;      /* lock receive list */
+       spinlock_t              queue_lock;     /* send/receive queue lock */
         u32                     xid;            /* Next XID value to use */
         struct rpc_task *       snd_task;       /* Task blocked in send */
+
+       struct list_head        xmit_queue;     /* Send queue */
+
         struct svc_xprt         *bc_xprt;       /* NFSv4.1 backchannel */
  #if defined(CONFIG_SUNRPC_BACKCHANNEL)
         struct svc_serv         *bc_serv;       /* The RPC service which will */
@@ -248,7 +260,8 @@ struct rpc_xprt {
         struct list_head        bc_pa_list;     /* List of preallocated
                                                  * backchannel rpc_rqst's */
  #endif /* CONFIG_SUNRPC_BACKCHANNEL */
-       struct list_head        recv;
+
+       struct rb_root          recv_queue;     /* Receive queue */
  
         struct {
                 unsigned long           bind_count,     /* total number of binds */
@@ -325,15 +338,18 @@ struct xprt_class {
  struct rpc_xprt                *xprt_create_transport(struct xprt_create *args);
  void                   xprt_connect(struct rpc_task *task);
  void                   xprt_reserve(struct rpc_task *task);
-void                   xprt_request_init(struct rpc_task *task);
  void                   xprt_retry_reserve(struct rpc_task *task);
  int                    xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
  int                    xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
  void                   xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
  void                   xprt_free_slot(struct rpc_xprt *xprt,
                                        struct rpc_rqst *req);
-void                   xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
+void                   xprt_request_prepare(struct rpc_rqst *req);
  bool                   xprt_prepare_transmit(struct rpc_task *task);
+void                   xprt_request_enqueue_transmit(struct rpc_task *task);
+void                   xprt_request_enqueue_receive(struct rpc_task *task);
+void                   xprt_request_wait_receive(struct rpc_task *task);
+bool                   xprt_request_need_retransmit(struct rpc_task *task);
  void                   xprt_transmit(struct rpc_task *task);
  void                   xprt_end_transmit(struct rpc_task *task);
  int                    xprt_adjust_timeout(struct rpc_rqst *req);
@@ -373,8 +389,8 @@ int                 xprt_load_transport(const char *);
  void                   xprt_set_retrans_timeout_def(struct rpc_task *task);
  void                   xprt_set_retrans_timeout_rtt(struct rpc_task *task);
  void                   xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
-void                   xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
-void                   xprt_write_space(struct rpc_xprt *xprt);
+void                   xprt_wait_for_buffer_space(struct rpc_xprt *xprt);
+bool                   xprt_write_space(struct rpc_xprt *xprt);
  void                   xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result);
  struct rpc_rqst *      xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
  void                   xprt_update_rtt(struct rpc_task *task);
@@ -382,6 +398,7 @@ void                        xprt_complete_rqst(struct rpc_task *task, int copied);
  void                   xprt_pin_rqst(struct rpc_rqst *req);
  void                   xprt_unpin_rqst(struct rpc_rqst *req);
  void                   xprt_release_rqst_cong(struct rpc_task *task);
+bool                   xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req);
  void                   xprt_disconnect_done(struct rpc_xprt *xprt);
  void                   xprt_force_disconnect(struct rpc_xprt *xprt);
  void                   xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
@@ -400,6 +417,8 @@ void                        xprt_unlock_connect(struct rpc_xprt *, void *);
  #define XPRT_BINDING           (5)
  #define XPRT_CLOSING           (6)
  #define XPRT_CONGESTED         (9)
+#define XPRT_CWND_WAIT         (10)
+#define XPRT_WRITE_SPACE       (11)
  
  static inline void xprt_set_connected(struct rpc_xprt *xprt)
  {
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h

index ae0f99b..458bfe0 100644 (file)
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -30,15 +30,25 @@ struct sock_xprt {
         /*
          * State of TCP reply receive
          */
-       __be32                  tcp_fraghdr,
-                               tcp_xid,
-                               tcp_calldir;
+       struct {
+               struct {
+                       __be32  fraghdr,
+                               xid,
+                               calldir;
+               } __attribute__((packed));
  
-       u32                     tcp_offset,
-                               tcp_reclen;
+               u32             offset,
+                               len;
  
-       unsigned long           tcp_copied,
-                               tcp_flags;
+               unsigned long   copied;
+       } recv;
+
+       /*
+        * State of TCP transmit queue
+        */
+       struct {
+               u32             offset;
+       } xmit;
  
         /*
          * Connection of transports
@@ -67,21 +77,9 @@ struct sock_xprt {
         void                    (*old_error_report)(struct sock *);
  };
  
-/*
- * TCP receive state flags
- */
-#define TCP_RCV_LAST_FRAG      (1UL << 0)
-#define TCP_RCV_COPY_FRAGHDR   (1UL << 1)
-#define TCP_RCV_COPY_XID       (1UL << 2)
-#define TCP_RCV_COPY_DATA      (1UL << 3)
-#define TCP_RCV_READ_CALLDIR   (1UL << 4)
-#define TCP_RCV_COPY_CALLDIR   (1UL << 5)
-
  /*
   * TCP RPC flags
   */
-#define TCP_RPC_REPLY          (1UL << 6)
-
  #define XPRT_SOCK_CONNECTING   1U
  #define XPRT_SOCK_DATA_READY   (2)
  #define XPRT_SOCK_UPD_TIMEOUT  (3)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h

index 965be92..a387b59 100644 (file)
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -67,11 +67,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
  
  /* Accessory functions. */
  
-void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
-               gfp_t flags, unsigned long attrs);
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_addr, unsigned long attrs);
-
  extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
                                    unsigned long offset, size_t size,
                                    enum dma_data_direction dir,
@@ -106,9 +101,6 @@ extern void
  swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
                            int nelems, enum dma_data_direction dir);
  
-extern int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
-
  extern int
  swiotlb_dma_supported(struct device *hwdev, u64 mask);
  
@@ -121,7 +113,6 @@ static inline unsigned int swiotlb_max_segment(void) { return 0; }
  #endif
  
  extern void swiotlb_print_info(void);
-extern int is_swiotlb_buffer(phys_addr_t paddr);
  extern void swiotlb_set_max_segment(unsigned int);
  
  extern const struct dma_map_ops swiotlb_dma_ops;
diff --git a/include/linux/tc.h b/include/linux/tc.h

index f92511e..a60639f 100644 (file)
--- a/include/linux/tc.h
+++ b/include/linux/tc.h
@@ -84,6 +84,7 @@ struct tc_dev {
                                            device. */
         struct device   dev;            /* Generic device interface. */
         struct resource resource;       /* Address space of this device. */
+       u64             dma_mask;       /* DMA addressable range. */
         char            vendor[9];
         char            name[9];
         char            firmware[9];
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h

index e8d9456..c596976 100644 (file)
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -226,6 +226,7 @@ struct fib_dump_filter {
         u32                     table_id;
         /* filter_set is an optimization that an entry is set */
         bool                    filter_set;
+       bool                    dump_all_families;
         unsigned char           protocol;
         unsigned char           rt_type;
         unsigned int            flags;
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h

index 7956989..3ec73f1 100644 (file)
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -82,7 +82,6 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
                 { REQ_OP_WRITE,                 "WRITE" },              \
                 { REQ_OP_FLUSH,                 "FLUSH" },              \
                 { REQ_OP_DISCARD,               "DISCARD" },            \
-               { REQ_OP_ZONE_REPORT,           "ZONE_REPORT" },        \
                 { REQ_OP_SECURE_ERASE,          "SECURE_ERASE" },       \
                 { REQ_OP_ZONE_RESET,            "ZONE_RESET" },         \
                 { REQ_OP_WRITE_SAME,            "WRITE_SAME" },         \
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h

index 53df203..b093058 100644 (file)
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -263,7 +263,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr,
  );
  
  #define DEFINE_MR_EVENT(name) \
-               DEFINE_EVENT(xprtrdma_mr, name, \
+               DEFINE_EVENT(xprtrdma_mr, xprtrdma_mr_##name, \
                                 TP_PROTO( \
                                         const struct rpcrdma_mr *mr \
                                 ), \
@@ -306,7 +306,7 @@ DECLARE_EVENT_CLASS(xprtrdma_cb_event,
   ** Connection events
   **/
  
-TRACE_EVENT(xprtrdma_conn_upcall,
+TRACE_EVENT(xprtrdma_cm_event,
         TP_PROTO(
                 const struct rpcrdma_xprt *r_xprt,
                 struct rdma_cm_event *event
@@ -377,7 +377,7 @@ DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
  DEFINE_RXPRT_EVENT(xprtrdma_reconnect);
  DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc);
  
-TRACE_EVENT(xprtrdma_qp_error,
+TRACE_EVENT(xprtrdma_qp_event,
         TP_PROTO(
                 const struct rpcrdma_xprt *r_xprt,
                 const struct ib_event *event
@@ -509,7 +509,7 @@ TRACE_EVENT(xprtrdma_post_send,
         TP_STRUCT__entry(
                 __field(const void *, req)
                 __field(int, num_sge)
-               __field(bool, signaled)
+               __field(int, signaled)
                 __field(int, status)
         ),
  
@@ -651,11 +651,11 @@ DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg);
  DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
  DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
  
-DEFINE_MR_EVENT(xprtrdma_localinv);
-DEFINE_MR_EVENT(xprtrdma_dma_map);
-DEFINE_MR_EVENT(xprtrdma_dma_unmap);
-DEFINE_MR_EVENT(xprtrdma_remoteinv);
-DEFINE_MR_EVENT(xprtrdma_recover_mr);
+DEFINE_MR_EVENT(localinv);
+DEFINE_MR_EVENT(map);
+DEFINE_MR_EVENT(unmap);
+DEFINE_MR_EVENT(remoteinv);
+DEFINE_MR_EVENT(recycle);
  
  /**
   ** Reply events
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h

index bbb08a3..28e3841 100644 (file)
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -470,14 +470,14 @@ TRACE_EVENT(xprt_ping,
                         __get_str(addr), __get_str(port), __entry->status)
  );
  
-TRACE_EVENT(xs_tcp_data_ready,
-       TP_PROTO(struct rpc_xprt *xprt, int err, unsigned int total),
+TRACE_EVENT(xs_stream_read_data,
+       TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total),
  
         TP_ARGS(xprt, err, total),
  
         TP_STRUCT__entry(
-               __field(int, err)
-               __field(unsigned int, total)
+               __field(ssize_t, err)
+               __field(size_t, total)
                 __string(addr, xprt ? xprt->address_strings[RPC_DISPLAY_ADDR] :
                                 "(null)")
                 __string(port, xprt ? xprt->address_strings[RPC_DISPLAY_PORT] :
@@ -493,21 +493,11 @@ TRACE_EVENT(xs_tcp_data_ready,
                         xprt->address_strings[RPC_DISPLAY_PORT] : "(null)");
         ),
  
-       TP_printk("peer=[%s]:%s err=%d total=%u", __get_str(addr),
+       TP_printk("peer=[%s]:%s err=%zd total=%zu", __get_str(addr),
                         __get_str(port), __entry->err, __entry->total)
  );
  
-#define rpc_show_sock_xprt_flags(flags) \
-       __print_flags(flags, "|", \
-               { TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \
-               { TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \
-               { TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \
-               { TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \
-               { TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \
-               { TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \
-               { TCP_RPC_REPLY, "TCP_RPC_REPLY" })
-
-TRACE_EVENT(xs_tcp_data_recv,
+TRACE_EVENT(xs_stream_read_request,
         TP_PROTO(struct sock_xprt *xs),
  
         TP_ARGS(xs),
@@ -516,25 +506,22 @@ TRACE_EVENT(xs_tcp_data_recv,
                 __string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR])
                 __string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT])
                 __field(u32, xid)
-               __field(unsigned long, flags)
                 __field(unsigned long, copied)
                 __field(unsigned int, reclen)
-               __field(unsigned long, offset)
+               __field(unsigned int, offset)
         ),
  
         TP_fast_assign(
                 __assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]);
                 __assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]);
-               __entry->xid = be32_to_cpu(xs->tcp_xid);
-               __entry->flags = xs->tcp_flags;
-               __entry->copied = xs->tcp_copied;
-               __entry->reclen = xs->tcp_reclen;
-               __entry->offset = xs->tcp_offset;
+               __entry->xid = be32_to_cpu(xs->recv.xid);
+               __entry->copied = xs->recv.copied;
+               __entry->reclen = xs->recv.len;
+               __entry->offset = xs->recv.offset;
         ),
  
-       TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu reclen=%u offset=%lu",
+       TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u offset=%u",
                         __get_str(addr), __get_str(port), __entry->xid,
-                       rpc_show_sock_xprt_flags(__entry->flags),
                         __entry->copied, __entry->reclen, __entry->offset)
  );
  
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h

index ff5a5db..8f08ff9 100644 (file)
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -137,8 +137,11 @@ struct blk_zone_range {
   *                 sector specified in the report request structure.
   * @BLKRESETZONE: Reset the write pointer of the zones in the specified
   *                sector range. The sector range must be zone aligned.
+ * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
   */
  #define BLKREPORTZONE  _IOWR(0x12, 130, struct blk_zone_report)
  #define BLKRESETZONE   _IOW(0x12, 131, struct blk_zone_range)
+#define BLKGETZONESZ   _IOW(0x12, 132, __u32)
+#define BLKGETNRZONES  _IOW(0x12, 133, __u32)
  
  #endif /* _UAPI_BLKZONED_H */
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c

index 87a6bc2..f14c376 100644 (file)
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -14,8 +14,6 @@
  #include <linux/pfn.h>
  #include <linux/set_memory.h>
  
-#define DIRECT_MAPPING_ERROR           0
-
  /*
   * Most architectures use ZONE_DMA for the first 16 Megabytes, but
   * some use it for entirely different regions:
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c

index 4f8a6db..ebecaf2 100644 (file)
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -21,6 +21,7 @@
  
  #include <linux/cache.h>
  #include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
  #include <linux/mm.h>
  #include <linux/export.h>
  #include <linux/spinlock.h>
@@ -72,13 +73,6 @@ static phys_addr_t io_tlb_start, io_tlb_end;
   */
  static unsigned long io_tlb_nslabs;
  
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-static phys_addr_t io_tlb_overflow_buffer;
-
  /*
   * This is a free list describing the number of free entries available from
   * each index
@@ -126,7 +120,6 @@ setup_io_tlb_npages(char *str)
         return 0;
  }
  early_param("swiotlb", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
  
  unsigned long swiotlb_nr_tbl(void)
  {
@@ -194,16 +187,10 @@ void __init swiotlb_update_mem_attributes(void)
         bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
         set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
         memset(vaddr, 0, bytes);
-
-       vaddr = phys_to_virt(io_tlb_overflow_buffer);
-       bytes = PAGE_ALIGN(io_tlb_overflow);
-       set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-       memset(vaddr, 0, bytes);
  }
  
  int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
  {
-       void *v_overflow_buffer;
         unsigned long i, bytes;
  
         bytes = nslabs << IO_TLB_SHIFT;
@@ -212,17 +199,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
         io_tlb_start = __pa(tlb);
         io_tlb_end = io_tlb_start + bytes;
  
-       /*
-        * Get the overflow emergency buffer
-        */
-       v_overflow_buffer = memblock_virt_alloc_low_nopanic(
-                                               PAGE_ALIGN(io_tlb_overflow),
-                                               PAGE_SIZE);
-       if (!v_overflow_buffer)
-               return -ENOMEM;
-
-       io_tlb_overflow_buffer = __pa(v_overflow_buffer);
-
         /*
          * Allocate and initialize the free list array.  This array is used
          * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
@@ -330,7 +306,6 @@ int
  swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
  {
         unsigned long i, bytes;
-       unsigned char *v_overflow_buffer;
  
         bytes = nslabs << IO_TLB_SHIFT;
  
@@ -341,19 +316,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
         set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
         memset(tlb, 0, bytes);
  
-       /*
-        * Get the overflow emergency buffer
-        */
-       v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-                                                    get_order(io_tlb_overflow));
-       if (!v_overflow_buffer)
-               goto cleanup2;
-
-       set_memory_decrypted((unsigned long)v_overflow_buffer,
-                       io_tlb_overflow >> PAGE_SHIFT);
-       memset(v_overflow_buffer, 0, io_tlb_overflow);
-       io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
-
         /*
          * Allocate and initialize the free list array.  This array is used
          * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
@@ -390,10 +352,6 @@ cleanup4:
                                                          sizeof(int)));
         io_tlb_list = NULL;
  cleanup3:
-       free_pages((unsigned long)v_overflow_buffer,
-                  get_order(io_tlb_overflow));
-       io_tlb_overflow_buffer = 0;
-cleanup2:
         io_tlb_end = 0;
         io_tlb_start = 0;
         io_tlb_nslabs = 0;
@@ -407,8 +365,6 @@ void __init swiotlb_exit(void)
                 return;
  
         if (late_alloc) {
-               free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
-                          get_order(io_tlb_overflow));
                 free_pages((unsigned long)io_tlb_orig_addr,
                            get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
                 free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -416,8 +372,6 @@ void __init swiotlb_exit(void)
                 free_pages((unsigned long)phys_to_virt(io_tlb_start),
                            get_order(io_tlb_nslabs << IO_TLB_SHIFT));
         } else {
-               memblock_free_late(io_tlb_overflow_buffer,
-                                  PAGE_ALIGN(io_tlb_overflow));
                 memblock_free_late(__pa(io_tlb_orig_addr),
                                    PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
                 memblock_free_late(__pa(io_tlb_list),
@@ -429,7 +383,7 @@ void __init swiotlb_exit(void)
         max_segment = 0;
  }
  
-int is_swiotlb_buffer(phys_addr_t paddr)
+static int is_swiotlb_buffer(phys_addr_t paddr)
  {
         return paddr >= io_tlb_start && paddr < io_tlb_end;
  }
@@ -590,26 +544,6 @@ found:
         return tlb_addr;
  }
  
-/*
- * Allocates bounce buffer and returns its physical address.
- */
-static phys_addr_t
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-          enum dma_data_direction dir, unsigned long attrs)
-{
-       dma_addr_t start_dma_addr;
-
-       if (swiotlb_force == SWIOTLB_NO_FORCE) {
-               dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
-                                    &phys);
-               return SWIOTLB_MAP_ERROR;
-       }
-
-       start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
-       return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
-                                     dir, attrs);
-}
-
  /*
   * tlb_addr is the physical address of the bounce buffer to unmap.
   */
@@ -689,104 +623,32 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
         }
  }
  
-static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
-               size_t size)
-{
-       u64 mask = DMA_BIT_MASK(32);
-
-       if (dev && dev->coherent_dma_mask)
-               mask = dev->coherent_dma_mask;
-       return addr + size - 1 <= mask;
-}
-
-static void *
-swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               unsigned long attrs)
+static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys,
+               size_t size, enum dma_data_direction dir, unsigned long attrs)
  {
-       phys_addr_t phys_addr;
-
-       if (swiotlb_force == SWIOTLB_NO_FORCE)
-               goto out_warn;
-
-       phys_addr = swiotlb_tbl_map_single(dev,
-                       __phys_to_dma(dev, io_tlb_start),
-                       0, size, DMA_FROM_DEVICE, attrs);
-       if (phys_addr == SWIOTLB_MAP_ERROR)
-               goto out_warn;
-
-       *dma_handle = __phys_to_dma(dev, phys_addr);
-       if (!dma_coherent_ok(dev, *dma_handle, size))
-               goto out_unmap;
-
-       memset(phys_to_virt(phys_addr), 0, size);
-       return phys_to_virt(phys_addr);
+       dma_addr_t dma_addr;
  
-out_unmap:
-       dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-               (unsigned long long)dev->coherent_dma_mask,
-               (unsigned long long)*dma_handle);
-
-       /*
-        * DMA_TO_DEVICE to avoid memcpy in unmap_single.
-        * DMA_ATTR_SKIP_CPU_SYNC is optional.
-        */
-       swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-                       DMA_ATTR_SKIP_CPU_SYNC);
-out_warn:
-       if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
-               dev_warn(dev,
-                       "swiotlb: coherent allocation failed, size=%zu\n",
-                       size);
-               dump_stack();
+       if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) {
+               dev_warn_ratelimited(dev,
+                       "Cannot do DMA to address %pa\n", phys);
+               return DIRECT_MAPPING_ERROR;
         }
-       return NULL;
-}
-
-static bool swiotlb_free_buffer(struct device *dev, size_t size,
-               dma_addr_t dma_addr)
-{
-       phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
  
-       WARN_ON_ONCE(irqs_disabled());
-
-       if (!is_swiotlb_buffer(phys_addr))
-               return false;
-
-       /*
-        * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
-        * DMA_ATTR_SKIP_CPU_SYNC is optional.
-        */
-       swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-                                DMA_ATTR_SKIP_CPU_SYNC);
-       return true;
-}
-
-static void
-swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
-            int do_panic)
-{
-       if (swiotlb_force == SWIOTLB_NO_FORCE)
-               return;
-
-       /*
-        * Ran out of IOMMU space for this operation. This is very bad.
-        * Unfortunately the drivers cannot handle this operation properly.
-        * unless they check for dma_mapping_error (most don't)
-        * When the mapping is small enough return a static buffer to limit
-        * the damage, or panic when the transfer is too big.
-        */
-       dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
-                           size);
+       /* Oh well, have to allocate and map a bounce buffer. */
+       *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
+                       *phys, size, dir, attrs);
+       if (*phys == SWIOTLB_MAP_ERROR)
+               return DIRECT_MAPPING_ERROR;
  
-       if (size <= io_tlb_overflow || !do_panic)
-               return;
+       /* Ensure that the address returned is DMA'ble */
+       dma_addr = __phys_to_dma(dev, *phys);
+       if (unlikely(!dma_capable(dev, dma_addr, size))) {
+               swiotlb_tbl_unmap_single(dev, *phys, size, dir,
+                       attrs | DMA_ATTR_SKIP_CPU_SYNC);
+               return DIRECT_MAPPING_ERROR;
+       }
  
-       if (dir == DMA_BIDIRECTIONAL)
-               panic("DMA: Random memory could be DMA accessed\n");
-       if (dir == DMA_FROM_DEVICE)
-               panic("DMA: Random memory could be DMA written\n");
-       if (dir == DMA_TO_DEVICE)
-               panic("DMA: Random memory could be DMA read\n");
+       return dma_addr;
  }
  
  /*
@@ -801,7 +663,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
                             enum dma_data_direction dir,
                             unsigned long attrs)
  {
-       phys_addr_t map, phys = page_to_phys(page) + offset;
+       phys_addr_t phys = page_to_phys(page) + offset;
         dma_addr_t dev_addr = phys_to_dma(dev, phys);
  
         BUG_ON(dir == DMA_NONE);
@@ -810,28 +672,17 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
          * we can safely return the device addr and not worry about bounce
          * buffering it.
          */
-       if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
-               return dev_addr;
-
-       trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
-
-       /* Oh well, have to allocate and map a bounce buffer. */
-       map = map_single(dev, phys, size, dir, attrs);
-       if (map == SWIOTLB_MAP_ERROR) {
-               swiotlb_full(dev, size, dir, 1);
-               return __phys_to_dma(dev, io_tlb_overflow_buffer);
+       if (!dma_capable(dev, dev_addr, size) ||
+           swiotlb_force == SWIOTLB_FORCE) {
+               trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
+               dev_addr = swiotlb_bounce_page(dev, &phys, size, dir, attrs);
         }
  
-       dev_addr = __phys_to_dma(dev, map);
+       if (!dev_is_dma_coherent(dev) &&
+           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+               arch_sync_dma_for_device(dev, phys, size, dir);
  
-       /* Ensure that the address returned is DMA'ble */
-       if (dma_capable(dev, dev_addr, size))
-               return dev_addr;
-
-       attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-       swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
-
-       return __phys_to_dma(dev, io_tlb_overflow_buffer);
+       return dev_addr;
  }
  
  /*
@@ -842,14 +693,18 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
   * After this call, reads by the cpu to the buffer are guaranteed to see
   * whatever the device wrote there.
   */
-static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-                        size_t size, enum dma_data_direction dir,
-                        unsigned long attrs)
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+                       size_t size, enum dma_data_direction dir,
+                       unsigned long attrs)
  {
         phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
  
         BUG_ON(dir == DMA_NONE);
  
+       if (!dev_is_dma_coherent(hwdev) &&
+           (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+               arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
         if (is_swiotlb_buffer(paddr)) {
                 swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
                 return;
@@ -867,13 +722,6 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
         dma_mark_clean(phys_to_virt(paddr), size);
  }
  
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-                       size_t size, enum dma_data_direction dir,
-                       unsigned long attrs)
-{
-       unmap_single(hwdev, dev_addr, size, dir, attrs);
-}
-
  /*
   * Make physical memory consistent for a single streaming mode DMA translation
   * after a transfer.
@@ -893,15 +741,17 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
  
         BUG_ON(dir == DMA_NONE);
  
-       if (is_swiotlb_buffer(paddr)) {
+       if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU)
+               arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
+       if (is_swiotlb_buffer(paddr))
                 swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
-               return;
-       }
  
-       if (dir != DMA_FROM_DEVICE)
-               return;
+       if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE)
+               arch_sync_dma_for_device(hwdev, paddr, size, dir);
  
-       dma_mark_clean(phys_to_virt(paddr), size);
+       if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE)
+               dma_mark_clean(phys_to_virt(paddr), size);
  }
  
  void
@@ -925,48 +775,31 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
   * appropriate dma address and length.  They are obtained via
   * sg_dma_{address,length}(SG).
   *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
   * Device ownership issues as mentioned above for swiotlb_map_page are the
   * same here.
   */
  int
-swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems,
                      enum dma_data_direction dir, unsigned long attrs)
  {
         struct scatterlist *sg;
         int i;
  
-       BUG_ON(dir == DMA_NONE);
-
         for_each_sg(sgl, sg, nelems, i) {
-               phys_addr_t paddr = sg_phys(sg);
-               dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
-
-               if (swiotlb_force == SWIOTLB_FORCE ||
-                   !dma_capable(hwdev, dev_addr, sg->length)) {
-                       phys_addr_t map = map_single(hwdev, sg_phys(sg),
-                                                    sg->length, dir, attrs);
-                       if (map == SWIOTLB_MAP_ERROR) {
-                               /* Don't panic here, we expect map_sg users
-                                  to do proper error handling. */
-                               swiotlb_full(hwdev, sg->length, dir, 0);
-                               attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-                               swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
-                                                      attrs);
-                               sg_dma_len(sgl) = 0;
-                               return 0;
-                       }
-                       sg->dma_address = __phys_to_dma(hwdev, map);
-               } else
-                       sg->dma_address = dev_addr;
+               sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset,
+                               sg->length, dir, attrs);
+               if (sg->dma_address == DIRECT_MAPPING_ERROR)
+                       goto out_error;
                 sg_dma_len(sg) = sg->length;
         }
+
         return nelems;
+
+out_error:
+       swiotlb_unmap_sg_attrs(dev, sgl, i, dir,
+                       attrs | DMA_ATTR_SKIP_CPU_SYNC);
+       sg_dma_len(sgl) = 0;
+       return 0;
  }
  
  /*
@@ -984,7 +817,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
         BUG_ON(dir == DMA_NONE);
  
         for_each_sg(sgl, sg, nelems, i)
-               unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
+               swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), dir,
                              attrs);
  }
  
@@ -1022,12 +855,6 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
         swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
  }
  
-int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-{
-       return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
-}
-
  /*
   * Return whether the given device DMA address mask can be supported
   * properly.  For example, if your device can only drive the low 24-bits
@@ -1040,39 +867,10 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
         return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
  }
  
-void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
-{
-       void *vaddr;
-
-       /* temporary workaround: */
-       if (gfp & __GFP_NOWARN)
-               attrs |= DMA_ATTR_NO_WARN;
-
-       /*
-        * Don't print a warning when the first allocation attempt fails.
-        * swiotlb_alloc_coherent() will print a warning when the DMA memory
-        * allocation ultimately failed.
-        */
-       gfp |= __GFP_NOWARN;
-
-       vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
-       if (!vaddr)
-               vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
-       return vaddr;
-}
-
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_addr, unsigned long attrs)
-{
-       if (!swiotlb_free_buffer(dev, size, dma_addr))
-               dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
  const struct dma_map_ops swiotlb_dma_ops = {
-       .mapping_error          = swiotlb_dma_mapping_error,
-       .alloc                  = swiotlb_alloc,
-       .free                   = swiotlb_free,
+       .mapping_error          = dma_direct_mapping_error,
+       .alloc                  = dma_direct_alloc,
+       .free                   = dma_direct_free,
         .sync_single_for_cpu    = swiotlb_sync_single_for_cpu,
         .sync_single_for_device = swiotlb_sync_single_for_device,
         .sync_sg_for_cpu        = swiotlb_sync_sg_for_cpu,
diff --git a/lib/vsprintf.c b/lib/vsprintf.c

index ad4fbe5..37a54a6 100644 (file)
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1684,6 +1684,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
                 fmt = "f";
  
         for (pass = false; strspn(fmt,"fnpPFcC"); fmt++, pass = true) {
+               int precision;
                 if (pass) {
                         if (buf < end)
                                 *buf = ':';
@@ -1695,7 +1696,11 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
                         buf = device_node_gen_full_name(dn, buf, end);
                         break;
                 case 'n':       /* name */
-                       buf = string(buf, end, dn->name, str_spec);
+                       p = kbasename(of_node_full_name(dn));
+                       precision = str_spec.precision;
+                       str_spec.precision = strchrnul(p, '@') - p;
+                       buf = string(buf, end, p, str_spec);
+                       str_spec.precision = precision;
                         break;
                 case 'p':       /* phandle */
                         buf = number(buf, end, (unsigned int)dn->phandle, num_spec);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c

index 024139b..41cdafb 100644 (file)
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1422,7 +1422,15 @@ static void br_multicast_query_received(struct net_bridge *br,
                 return;
  
         br_multicast_update_query_timer(br, query, max_delay);
-       br_multicast_mark_router(br, port);
+
+       /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules,
+        * the arrival port for IGMP Queries where the source address
+        * is 0.0.0.0 should not be added to router port list.
+        */
+       if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
+           (saddr->proto == htons(ETH_P_IPV6) &&
+            !ipv6_addr_any(&saddr->u.ip6)))
+               br_multicast_mark_router(br, port);
  }
  
  static void br_ip4_multicast_query(struct net_bridge *br,
diff --git a/net/core/datagram.c b/net/core/datagram.c

index 6a034eb..57f3a6f 100644 (file)
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -808,8 +808,9 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
                         return -EINVAL;
                 }
  
-               if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
-                       netdev_rx_csum_fault(skb->dev);
+               if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+                   !skb->csum_complete_sw)
+                       netdev_rx_csum_fault(NULL);
         }
         return 0;
  fault:
diff --git a/net/core/neighbour.c b/net/core/neighbour.c

index ee605d9..41954e4 100644 (file)
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2364,7 +2364,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
         if (!master_idx)
                 return false;
  
-       master = netdev_master_upper_dev_get(dev);
+       master = dev ? netdev_master_upper_dev_get(dev) : NULL;
         if (!master || master->ifindex != master_idx)
                 return true;
  
@@ -2373,7 +2373,7 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx)
  
  static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
  {
-       if (filter_idx && dev->ifindex != filter_idx)
+       if (filter_idx && (!dev || dev->ifindex != filter_idx))
                 return true;
  
         return false;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c

index 0958c7b..f679c7a 100644 (file)
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3333,6 +3333,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
         int idx;
         int s_idx = cb->family;
         int type = cb->nlh->nlmsg_type - RTM_BASE;
+       int ret = 0;
  
         if (s_idx == 0)
                 s_idx = 1;
@@ -3365,12 +3366,13 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
                         cb->prev_seq = 0;
                         cb->seq = 0;
                 }
-               if (dumpit(skb, cb))
+               ret = dumpit(skb, cb);
+               if (ret < 0)
                         break;
         }
         cb->family = idx;
  
-       return skb->len;
+       return skb->len ? : ret;
  }
  
  struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c

index 63d5b58..a34602a 100644 (file)
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1704,6 +1704,7 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
  
                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
                         if (IS_ERR(net)) {
+                               fillargs->netnsid = -1;
                                 NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
                                 return PTR_ERR(net);
                         }
@@ -1761,7 +1762,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
         struct net_device *dev;
         struct in_device *in_dev;
         struct hlist_head *head;
-       int err;
+       int err = 0;
  
         s_h = cb->args[0];
         s_idx = idx = cb->args[1];
@@ -1771,12 +1772,15 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
                 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
                                                  skb->sk, cb);
                 if (err < 0)
-                       return err;
+                       goto put_tgt_net;
  
+               err = 0;
                 if (fillargs.ifindex) {
                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
-                       if (!dev)
-                               return -ENODEV;
+                       if (!dev) {
+                               err = -ENODEV;
+                               goto put_tgt_net;
+                       }
  
                         in_dev = __in_dev_get_rtnl(dev);
                         if (in_dev) {
@@ -1821,7 +1825,7 @@ put_tgt_net:
         if (fillargs.netnsid >= 0)
                 put_net(tgt_net);
  
-       return skb->len;
+       return err < 0 ? err : skb->len;
  }
  
  static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c

index 5bf653f..6df95be 100644 (file)
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -829,6 +829,7 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
                 return -EINVAL;
         }
  
+       filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC);
         filter->flags    = rtm->rtm_flags;
         filter->protocol = rtm->rtm_protocol;
         filter->rt_type  = rtm->rtm_type;
@@ -899,6 +900,9 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
         if (filter.table_id) {
                 tb = fib_get_table(net, filter.table_id);
                 if (!tb) {
+                       if (filter.dump_all_families)
+                               return skb->len;
+
                         NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
                         return -ENOENT;
                 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c

index 7a3e2ac..a6defbe 100644 (file)
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2542,6 +2542,9 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
  
                 mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
                 if (!mrt) {
+                       if (filter.dump_all_families)
+                               return skb->len;
+
                         NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
                         return -ENOENT;
                 }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

index cf8252d..ca3ed93 100644 (file)
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -609,8 +609,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
         struct net *net = dev_net(skb->dev);
  
         sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
-                              iph->saddr, uh->source, skb->dev->ifindex, 0,
-                              udptable, NULL);
+                              iph->saddr, uh->source, skb->dev->ifindex,
+                              inet_sdif(skb), udptable, NULL);
         if (!sk) {
                 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
                 return; /* No socket for error */
@@ -2120,8 +2120,24 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
         /* Note, we are only interested in != 0 or == 0, thus the
          * force to int.
          */
-       return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
-                                                        inet_compute_pseudo);
+       err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+                                                       inet_compute_pseudo);
+       if (err)
+               return err;
+
+       if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
+               /* If SW calculated the value, we know it's bad */
+               if (skb->csum_complete_sw)
+                       return 1;
+
+               /* HW says the value is bad. Let's validate that.
+                * skb->csum is no longer the full packet checksum,
+                * so don't treat it as such.
+                */
+               skb_checksum_complete_unset(skb);
+       }
+
+       return 0;
  }
  
  /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c

index 45b84dd..63a808d 100644 (file)
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5058,6 +5058,7 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
                         fillargs->netnsid = nla_get_s32(tb[i]);
                         net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
                         if (IS_ERR(net)) {
+                               fillargs->netnsid = -1;
                                 NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id");
                                 return PTR_ERR(net);
                         }
@@ -5089,23 +5090,25 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
         struct net_device *dev;
         struct inet6_dev *idev;
         struct hlist_head *head;
+       int err = 0;
  
         s_h = cb->args[0];
         s_idx = idx = cb->args[1];
         s_ip_idx = cb->args[2];
  
         if (cb->strict_check) {
-               int err;
-
                 err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
                                                   skb->sk, cb);
                 if (err < 0)
-                       return err;
+                       goto put_tgt_net;
  
+               err = 0;
                 if (fillargs.ifindex) {
                         dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
-                       if (!dev)
-                               return -ENODEV;
+                       if (!dev) {
+                               err = -ENODEV;
+                               goto put_tgt_net;
+                       }
                         idev = __in6_dev_get(dev);
                         if (idev) {
                                 err = in6_dump_addrs(idev, skb, cb, s_ip_idx,
@@ -5144,7 +5147,7 @@ put_tgt_net:
         if (fillargs.netnsid >= 0)
                 put_net(tgt_net);
  
-       return skb->len;
+       return err < 0 ? err : skb->len;
  }
  
  static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c

index 547515e..3777170 100644 (file)
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -88,8 +88,24 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
          * Note, we are only interested in != 0 or == 0, thus the
          * force to int.
          */
-       return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
-                                                        ip6_compute_pseudo);
+       err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+                                                       ip6_compute_pseudo);
+       if (err)
+               return err;
+
+       if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
+               /* If SW calculated the value, we know it's bad */
+               if (skb->csum_complete_sw)
+                       return 1;
+
+               /* HW says the value is bad. Let's validate that.
+                * skb->csum is no longer the full packet checksum,
+                * so don't treat is as such.
+                */
+               skb_checksum_complete_unset(skb);
+       }
+
+       return 0;
  }
  EXPORT_SYMBOL(udp6_csum_init);
  
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c

index 2a058b4..1b8bc00 100644 (file)
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -620,6 +620,9 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
         if (arg.filter.table_id) {
                 tb = fib6_get_table(net, arg.filter.table_id);
                 if (!tb) {
+                       if (arg.filter.dump_all_families)
+                               return skb->len;
+
                         NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
                         return -ENOENT;
                 }
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c

index c3317ff..e2ea691 100644 (file)
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2473,6 +2473,9 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
  
                 mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
                 if (!mrt) {
+                       if (filter.dump_all_families)
+                               return skb->len;
+
                         NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
                         return -ENOENT;
                 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c

index a25cfdd..659ecf4 100644 (file)
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1732,10 +1732,9 @@ int ndisc_rcv(struct sk_buff *skb)
                 return 0;
         }
  
-       memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
-
         switch (msg->icmph.icmp6_type) {
         case NDISC_NEIGHBOUR_SOLICITATION:
+               memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
                 ndisc_recv_ns(skb);
                 break;
  
diff --git a/net/ipv6/route.c b/net/ipv6/route.c

index e322628..2a7423c 100644 (file)
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2745,6 +2745,8 @@ static int ip6_route_check_nh_onlink(struct net *net,
         grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
         if (grt) {
                 if (!grt->dst.error &&
+                   /* ignore match if it is the default route */
+                   grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
                     (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
                         NL_SET_ERR_MSG(extack,
                                        "Nexthop has invalid gateway or device mismatch");
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index 06d17ff..d2d97d0 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -478,7 +478,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
         struct net *net = dev_net(skb->dev);
  
         sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
-                              inet6_iif(skb), 0, udptable, skb);
+                              inet6_iif(skb), inet6_sdif(skb), udptable, skb);
         if (!sk) {
                 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
                                   ICMP6_MIB_INERRORS);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c

index 022bca9..ca3b0f4 100644 (file)
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1320,7 +1320,6 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
  
  const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
         [TCA_KIND]              = { .type = NLA_STRING },
-       [TCA_OPTIONS]           = { .type = NLA_NESTED },
         [TCA_RATE]              = { .type = NLA_BINARY,
                                     .len = sizeof(struct tc_estimator) },
         [TCA_STAB]              = { .type = NLA_NESTED },
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c

index e871368..18daebc 100644 (file)
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -122,22 +122,17 @@ static void __smc_lgr_unregister_conn(struct smc_connection *conn)
         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
  }
  
-/* Unregister connection and trigger lgr freeing if applicable
+/* Unregister connection from lgr
   */
  static void smc_lgr_unregister_conn(struct smc_connection *conn)
  {
         struct smc_link_group *lgr = conn->lgr;
-       int reduced = 0;
  
         write_lock_bh(&lgr->conns_lock);
         if (conn->alert_token_local) {
-               reduced = 1;
                 __smc_lgr_unregister_conn(conn);
         }
         write_unlock_bh(&lgr->conns_lock);
-       if (!reduced || lgr->conns_num)
-               return;
-       smc_lgr_schedule_free_work(lgr);
  }
  
  /* Send delete link, either as client to request the initiation
@@ -291,7 +286,8 @@ out:
         return rc;
  }
  
-static void smc_buf_unuse(struct smc_connection *conn)
+static void smc_buf_unuse(struct smc_connection *conn,
+                         struct smc_link_group *lgr)
  {
         if (conn->sndbuf_desc)
                 conn->sndbuf_desc->used = 0;
@@ -301,8 +297,6 @@ static void smc_buf_unuse(struct smc_connection *conn)
                         conn->rmb_desc->used = 0;
                 } else {
                         /* buf registration failed, reuse not possible */
-                       struct smc_link_group *lgr = conn->lgr;
-
                         write_lock_bh(&lgr->rmbs_lock);
                         list_del(&conn->rmb_desc->list);
                         write_unlock_bh(&lgr->rmbs_lock);
@@ -315,16 +309,21 @@ static void smc_buf_unuse(struct smc_connection *conn)
  /* remove a finished connection from its link group */
  void smc_conn_free(struct smc_connection *conn)
  {
-       if (!conn->lgr)
+       struct smc_link_group *lgr = conn->lgr;
+
+       if (!lgr)
                 return;
-       if (conn->lgr->is_smcd) {
+       if (lgr->is_smcd) {
                 smc_ism_unset_conn(conn);
                 tasklet_kill(&conn->rx_tsklet);
         } else {
                 smc_cdc_tx_dismiss_slots(conn);
         }
-       smc_lgr_unregister_conn(conn);
-       smc_buf_unuse(conn);
+       smc_lgr_unregister_conn(conn);          /* unsets conn->lgr */
+       smc_buf_unuse(conn, lgr);               /* allow buffer reuse */
+
+       if (!lgr->conns_num)
+               smc_lgr_schedule_free_work(lgr);
  }
  
  static void smc_link_clear(struct smc_link *lnk)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c

index 305ecea..ad8ead7 100644 (file)
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -30,10 +30,9 @@ struct rpc_cred_cache {
  
  static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
  
-static DEFINE_SPINLOCK(rpc_authflavor_lock);
-static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
-       &authnull_ops,          /* AUTH_NULL */
-       &authunix_ops,          /* AUTH_UNIX */
+static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+       [RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops,
+       [RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops,
         NULL,                   /* others can be loadable modules */
  };
  
@@ -93,39 +92,65 @@ pseudoflavor_to_flavor(u32 flavor) {
  int
  rpcauth_register(const struct rpc_authops *ops)
  {
+       const struct rpc_authops *old;
         rpc_authflavor_t flavor;
-       int ret = -EPERM;
  
         if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
                 return -EINVAL;
-       spin_lock(&rpc_authflavor_lock);
-       if (auth_flavors[flavor] == NULL) {
-               auth_flavors[flavor] = ops;
-               ret = 0;
-       }
-       spin_unlock(&rpc_authflavor_lock);
-       return ret;
+       old = cmpxchg((const struct rpc_authops ** __force)&auth_flavors[flavor], NULL, ops);
+       if (old == NULL || old == ops)
+               return 0;
+       return -EPERM;
  }
  EXPORT_SYMBOL_GPL(rpcauth_register);
  
  int
  rpcauth_unregister(const struct rpc_authops *ops)
  {
+       const struct rpc_authops *old;
         rpc_authflavor_t flavor;
-       int ret = -EPERM;
  
         if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
                 return -EINVAL;
-       spin_lock(&rpc_authflavor_lock);
-       if (auth_flavors[flavor] == ops) {
-               auth_flavors[flavor] = NULL;
-               ret = 0;
-       }
-       spin_unlock(&rpc_authflavor_lock);
-       return ret;
+
+       old = cmpxchg((const struct rpc_authops ** __force)&auth_flavors[flavor], ops, NULL);
+       if (old == ops || old == NULL)
+               return 0;
+       return -EPERM;
  }
  EXPORT_SYMBOL_GPL(rpcauth_unregister);
  
+static const struct rpc_authops *
+rpcauth_get_authops(rpc_authflavor_t flavor)
+{
+       const struct rpc_authops *ops;
+
+       if (flavor >= RPC_AUTH_MAXFLAVOR)
+               return NULL;
+
+       rcu_read_lock();
+       ops = rcu_dereference(auth_flavors[flavor]);
+       if (ops == NULL) {
+               rcu_read_unlock();
+               request_module("rpc-auth-%u", flavor);
+               rcu_read_lock();
+               ops = rcu_dereference(auth_flavors[flavor]);
+               if (ops == NULL)
+                       goto out;
+       }
+       if (!try_module_get(ops->owner))
+               ops = NULL;
+out:
+       rcu_read_unlock();
+       return ops;
+}
+
+static void
+rpcauth_put_authops(const struct rpc_authops *ops)
+{
+       module_put(ops->owner);
+}
+
  /**
   * rpcauth_get_pseudoflavor - check if security flavor is supported
   * @flavor: a security flavor
@@ -138,25 +163,16 @@ EXPORT_SYMBOL_GPL(rpcauth_unregister);
  rpc_authflavor_t
  rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info)
  {
-       const struct rpc_authops *ops;
+       const struct rpc_authops *ops = rpcauth_get_authops(flavor);
         rpc_authflavor_t pseudoflavor;
  
-       ops = auth_flavors[flavor];
-       if (ops == NULL)
-               request_module("rpc-auth-%u", flavor);
-       spin_lock(&rpc_authflavor_lock);
-       ops = auth_flavors[flavor];
-       if (ops == NULL || !try_module_get(ops->owner)) {
-               spin_unlock(&rpc_authflavor_lock);
+       if (!ops)
                 return RPC_AUTH_MAXFLAVOR;
-       }
-       spin_unlock(&rpc_authflavor_lock);
-
         pseudoflavor = flavor;
         if (ops->info2flavor != NULL)
                 pseudoflavor = ops->info2flavor(info);
  
-       module_put(ops->owner);
+       rpcauth_put_authops(ops);
         return pseudoflavor;
  }
  EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor);
@@ -176,25 +192,15 @@ rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info)
         const struct rpc_authops *ops;
         int result;
  
-       if (flavor >= RPC_AUTH_MAXFLAVOR)
-               return -EINVAL;
-
-       ops = auth_flavors[flavor];
+       ops = rpcauth_get_authops(flavor);
         if (ops == NULL)
-               request_module("rpc-auth-%u", flavor);
-       spin_lock(&rpc_authflavor_lock);
-       ops = auth_flavors[flavor];
-       if (ops == NULL || !try_module_get(ops->owner)) {
-               spin_unlock(&rpc_authflavor_lock);
                 return -ENOENT;
-       }
-       spin_unlock(&rpc_authflavor_lock);
  
         result = -ENOENT;
         if (ops->flavor2info != NULL)
                 result = ops->flavor2info(pseudoflavor, info);
  
-       module_put(ops->owner);
+       rpcauth_put_authops(ops);
         return result;
  }
  EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
@@ -212,15 +218,13 @@ EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
  int
  rpcauth_list_flavors(rpc_authflavor_t *array, int size)
  {
-       rpc_authflavor_t flavor;
-       int result = 0;
+       const struct rpc_authops *ops;
+       rpc_authflavor_t flavor, pseudos[4];
+       int i, len, result = 0;
  
-       spin_lock(&rpc_authflavor_lock);
+       rcu_read_lock();
         for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) {
-               const struct rpc_authops *ops = auth_flavors[flavor];
-               rpc_authflavor_t pseudos[4];
-               int i, len;
-
+               ops = rcu_dereference(auth_flavors[flavor]);
                 if (result >= size) {
                         result = -ENOMEM;
                         break;
@@ -245,7 +249,7 @@ rpcauth_list_flavors(rpc_authflavor_t *array, int size)
                         array[result++] = pseudos[i];
                 }
         }
-       spin_unlock(&rpc_authflavor_lock);
+       rcu_read_unlock();
  
         dprintk("RPC:       %s returns %d\n", __func__, result);
         return result;
@@ -255,25 +259,17 @@ EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
  struct rpc_auth *
  rpcauth_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
  {
-       struct rpc_auth         *auth;
+       struct rpc_auth *auth = ERR_PTR(-EINVAL);
         const struct rpc_authops *ops;
-       u32                     flavor = pseudoflavor_to_flavor(args->pseudoflavor);
+       u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
  
-       auth = ERR_PTR(-EINVAL);
-       if (flavor >= RPC_AUTH_MAXFLAVOR)
+       ops = rpcauth_get_authops(flavor);
+       if (ops == NULL)
                 goto out;
  
-       if ((ops = auth_flavors[flavor]) == NULL)
-               request_module("rpc-auth-%u", flavor);
-       spin_lock(&rpc_authflavor_lock);
-       ops = auth_flavors[flavor];
-       if (ops == NULL || !try_module_get(ops->owner)) {
-               spin_unlock(&rpc_authflavor_lock);
-               goto out;
-       }
-       spin_unlock(&rpc_authflavor_lock);
         auth = ops->create(args, clnt);
-       module_put(ops->owner);
+
+       rpcauth_put_authops(ops);
         if (IS_ERR(auth))
                 return auth;
         if (clnt->cl_auth)
@@ -288,32 +284,37 @@ EXPORT_SYMBOL_GPL(rpcauth_create);
  void
  rpcauth_release(struct rpc_auth *auth)
  {
-       if (!atomic_dec_and_test(&auth->au_count))
+       if (!refcount_dec_and_test(&auth->au_count))
                 return;
         auth->au_ops->destroy(auth);
  }
  
  static DEFINE_SPINLOCK(rpc_credcache_lock);
  
-static void
+/*
+ * On success, the caller is responsible for freeing the reference
+ * held by the hashtable
+ */
+static bool
  rpcauth_unhash_cred_locked(struct rpc_cred *cred)
  {
+       if (!test_and_clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+               return false;
         hlist_del_rcu(&cred->cr_hash);
-       smp_mb__before_atomic();
-       clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+       return true;
  }
  
-static int
+static bool
  rpcauth_unhash_cred(struct rpc_cred *cred)
  {
         spinlock_t *cache_lock;
-       int ret;
+       bool ret;
  
+       if (!test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+               return false;
         cache_lock = &cred->cr_auth->au_credcache->lock;
         spin_lock(cache_lock);
-       ret = atomic_read(&cred->cr_count) == 0;
-       if (ret)
-               rpcauth_unhash_cred_locked(cred);
+       ret = rpcauth_unhash_cred_locked(cred);
         spin_unlock(cache_lock);
         return ret;
  }
@@ -392,6 +393,44 @@ void rpcauth_destroy_credlist(struct list_head *head)
         }
  }
  
+static void
+rpcauth_lru_add_locked(struct rpc_cred *cred)
+{
+       if (!list_empty(&cred->cr_lru))
+               return;
+       number_cred_unused++;
+       list_add_tail(&cred->cr_lru, &cred_unused);
+}
+
+static void
+rpcauth_lru_add(struct rpc_cred *cred)
+{
+       if (!list_empty(&cred->cr_lru))
+               return;
+       spin_lock(&rpc_credcache_lock);
+       rpcauth_lru_add_locked(cred);
+       spin_unlock(&rpc_credcache_lock);
+}
+
+static void
+rpcauth_lru_remove_locked(struct rpc_cred *cred)
+{
+       if (list_empty(&cred->cr_lru))
+               return;
+       number_cred_unused--;
+       list_del_init(&cred->cr_lru);
+}
+
+static void
+rpcauth_lru_remove(struct rpc_cred *cred)
+{
+       if (list_empty(&cred->cr_lru))
+               return;
+       spin_lock(&rpc_credcache_lock);
+       rpcauth_lru_remove_locked(cred);
+       spin_unlock(&rpc_credcache_lock);
+}
+
  /*
   * Clear the RPC credential cache, and delete those credentials
   * that are not referenced.
@@ -411,13 +450,10 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache)
                 head = &cache->hashtable[i];
                 while (!hlist_empty(head)) {
                         cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
-                       get_rpccred(cred);
-                       if (!list_empty(&cred->cr_lru)) {
-                               list_del(&cred->cr_lru);
-                               number_cred_unused--;
-                       }
-                       list_add_tail(&cred->cr_lru, &free);
                         rpcauth_unhash_cred_locked(cred);
+                       /* Note: We now hold a reference to cred */
+                       rpcauth_lru_remove_locked(cred);
+                       list_add_tail(&cred->cr_lru, &free);
                 }
         }
         spin_unlock(&cache->lock);
@@ -451,7 +487,6 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
  static long
  rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
  {
-       spinlock_t *cache_lock;
         struct rpc_cred *cred, *next;
         unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
         long freed = 0;
@@ -460,32 +495,24 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
  
                 if (nr_to_scan-- == 0)
                         break;
+               if (refcount_read(&cred->cr_count) > 1) {
+                       rpcauth_lru_remove_locked(cred);
+                       continue;
+               }
                 /*
                  * Enforce a 60 second garbage collection moratorium
                  * Note that the cred_unused list must be time-ordered.
                  */
-               if (time_in_range(cred->cr_expire, expired, jiffies) &&
-                   test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
-                       freed = SHRINK_STOP;
-                       break;
-               }
-
-               list_del_init(&cred->cr_lru);
-               number_cred_unused--;
-               freed++;
-               if (atomic_read(&cred->cr_count) != 0)
+               if (!time_in_range(cred->cr_expire, expired, jiffies))
+                       continue;
+               if (!rpcauth_unhash_cred(cred))
                         continue;
  
-               cache_lock = &cred->cr_auth->au_credcache->lock;
-               spin_lock(cache_lock);
-               if (atomic_read(&cred->cr_count) == 0) {
-                       get_rpccred(cred);
-                       list_add_tail(&cred->cr_lru, free);
-                       rpcauth_unhash_cred_locked(cred);
-               }
-               spin_unlock(cache_lock);
+               rpcauth_lru_remove_locked(cred);
+               freed++;
+               list_add_tail(&cred->cr_lru, free);
         }
-       return freed;
+       return freed ? freed : SHRINK_STOP;
  }
  
  static unsigned long
@@ -561,19 +588,15 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
                 if (!entry->cr_ops->crmatch(acred, entry, flags))
                         continue;
                 if (flags & RPCAUTH_LOOKUP_RCU) {
-                       if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
-                           !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
-                               cred = entry;
+                       if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) ||
+                           refcount_read(&entry->cr_count) == 0)
+                               continue;
+                       cred = entry;
                         break;
                 }
-               spin_lock(&cache->lock);
-               if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
-                       spin_unlock(&cache->lock);
-                       continue;
-               }
                 cred = get_rpccred(entry);
-               spin_unlock(&cache->lock);
-               break;
+               if (cred)
+                       break;
         }
         rcu_read_unlock();
  
@@ -594,11 +617,13 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
                 if (!entry->cr_ops->crmatch(acred, entry, flags))
                         continue;
                 cred = get_rpccred(entry);
-               break;
+               if (cred)
+                       break;
         }
         if (cred == NULL) {
                 cred = new;
                 set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+               refcount_inc(&cred->cr_count);
                 hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
         } else
                 list_add_tail(&new->cr_lru, &free);
@@ -645,7 +670,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
  {
         INIT_HLIST_NODE(&cred->cr_hash);
         INIT_LIST_HEAD(&cred->cr_lru);
-       atomic_set(&cred->cr_count, 1);
+       refcount_set(&cred->cr_count, 1);
         cred->cr_auth = auth;
         cred->cr_ops = ops;
         cred->cr_expire = jiffies;
@@ -713,36 +738,29 @@ put_rpccred(struct rpc_cred *cred)
  {
         if (cred == NULL)
                 return;
-       /* Fast path for unhashed credentials */
-       if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) {
-               if (atomic_dec_and_test(&cred->cr_count))
-                       cred->cr_ops->crdestroy(cred);
-               return;
-       }
-
-       if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
-               return;
-       if (!list_empty(&cred->cr_lru)) {
-               number_cred_unused--;
-               list_del_init(&cred->cr_lru);
-       }
-       if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
-               if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
-                       cred->cr_expire = jiffies;
-                       list_add_tail(&cred->cr_lru, &cred_unused);
-                       number_cred_unused++;
-                       goto out_nodestroy;
-               }
-               if (!rpcauth_unhash_cred(cred)) {
-                       /* We were hashed and someone looked us up... */
-                       goto out_nodestroy;
-               }
+       rcu_read_lock();
+       if (refcount_dec_and_test(&cred->cr_count))
+               goto destroy;
+       if (refcount_read(&cred->cr_count) != 1 ||
+           !test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+               goto out;
+       if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
+               cred->cr_expire = jiffies;
+               rpcauth_lru_add(cred);
+               /* Race breaker */
+               if (unlikely(!test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags)))
+                       rpcauth_lru_remove(cred);
+       } else if (rpcauth_unhash_cred(cred)) {
+               rpcauth_lru_remove(cred);
+               if (refcount_dec_and_test(&cred->cr_count))
+                       goto destroy;
         }
-       spin_unlock(&rpc_credcache_lock);
-       cred->cr_ops->crdestroy(cred);
+out:
+       rcu_read_unlock();
         return;
-out_nodestroy:
-       spin_unlock(&rpc_credcache_lock);
+destroy:
+       rcu_read_unlock();
+       cred->cr_ops->crdestroy(cred);
  }
  EXPORT_SYMBOL_GPL(put_rpccred);
  
@@ -817,6 +835,16 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
         return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
  }
  
+bool
+rpcauth_xmit_need_reencode(struct rpc_task *task)
+{
+       struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+       if (!cred || !cred->cr_ops->crneed_reencode)
+               return false;
+       return cred->cr_ops->crneed_reencode(task);
+}
+
  int
  rpcauth_refreshcred(struct rpc_task *task)
  {
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c

index f1df983..d8831b9 100644 (file)
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -274,7 +274,7 @@ static const struct rpc_authops generic_auth_ops = {
  
  static struct rpc_auth generic_auth = {
         .au_ops = &generic_auth_ops,
-       .au_count = ATOMIC_INIT(0),
+       .au_count = REFCOUNT_INIT(1),
  };
  
  static bool generic_key_to_expire(struct rpc_cred *cred)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c

index 21c0aa0..30f970c 100644 (file)
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1058,7 +1058,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
         auth->au_flavor = flavor;
         if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor))
                 auth->au_flags |= RPCAUTH_AUTH_DATATOUCH;
-       atomic_set(&auth->au_count, 1);
+       refcount_set(&auth->au_count, 1);
         kref_init(&gss_auth->kref);
  
         err = rpcauth_init_credcache(auth);
@@ -1187,7 +1187,7 @@ gss_auth_find_or_add_hashed(const struct rpc_auth_create_args *args,
                         if (strcmp(gss_auth->target_name, args->target_name))
                                 continue;
                 }
-               if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
+               if (!refcount_inc_not_zero(&gss_auth->rpc_auth.au_count))
                         continue;
                 goto out;
         }
@@ -1984,6 +1984,46 @@ gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
         return decode(rqstp, &xdr, obj);
  }
  
+static bool
+gss_seq_is_newer(u32 new, u32 old)
+{
+       return (s32)(new - old) > 0;
+}
+
+static bool
+gss_xmit_need_reencode(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+       struct rpc_cred *cred = req->rq_cred;
+       struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
+       u32 win, seq_xmit;
+       bool ret = true;
+
+       if (!ctx)
+               return true;
+
+       if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
+               goto out;
+
+       seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
+       while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
+               u32 tmp = seq_xmit;
+
+               seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
+               if (seq_xmit == tmp) {
+                       ret = false;
+                       goto out;
+               }
+       }
+
+       win = ctx->gc_win;
+       if (win > 0)
+               ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
+out:
+       gss_put_ctx(ctx);
+       return ret;
+}
+
  static int
  gss_unwrap_resp(struct rpc_task *task,
                 kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
@@ -2052,6 +2092,7 @@ static const struct rpc_credops gss_credops = {
         .crunwrap_resp          = gss_unwrap_resp,
         .crkey_timeout          = gss_key_timeout,
         .crstringify_acceptor   = gss_stringify_acceptor,
+       .crneed_reencode        = gss_xmit_need_reencode,
  };
  
  static const struct rpc_credops gss_nullops = {
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c

index eaad9bc..b4adeb0 100644 (file)
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -63,13 +63,12 @@
  #include <linux/sunrpc/gss_krb5.h>
  #include <linux/random.h>
  #include <linux/crypto.h>
+#include <linux/atomic.h>
  
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  # define RPCDBG_FACILITY        RPCDBG_AUTH
  #endif
  
-DEFINE_SPINLOCK(krb5_seq_lock);
-
  static void *
  setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
  {
@@ -124,6 +123,30 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
         return krb5_hdr;
  }
  
+u32
+gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx)
+{
+       u32 old, seq_send = READ_ONCE(ctx->seq_send);
+
+       do {
+               old = seq_send;
+               seq_send = cmpxchg(&ctx->seq_send, old, old + 1);
+       } while (old != seq_send);
+       return seq_send;
+}
+
+u64
+gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx)
+{
+       u64 old, seq_send = READ_ONCE(ctx->seq_send);
+
+       do {
+               old = seq_send;
+               seq_send = cmpxchg64(&ctx->seq_send64, old, old + 1);
+       } while (old != seq_send);
+       return seq_send;
+}
+
  static u32
  gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
                 struct xdr_netobj *token)
@@ -154,9 +177,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
  
         memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
  
-       spin_lock(&krb5_seq_lock);
-       seq_send = ctx->seq_send++;
-       spin_unlock(&krb5_seq_lock);
+       seq_send = gss_seq_send_fetch_and_inc(ctx);
  
         if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
                               seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
@@ -174,7 +195,6 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
                                        .data = cksumdata};
         void *krb5_hdr;
         s32 now;
-       u64 seq_send;
         u8 *cksumkey;
         unsigned int cksum_usage;
         __be64 seq_send_be64;
@@ -185,11 +205,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
  
         /* Set up the sequence number. Now 64-bits in clear
          * text and w/o direction indicator */
-       spin_lock(&krb5_seq_lock);
-       seq_send = ctx->seq_send64++;
-       spin_unlock(&krb5_seq_lock);
-
-       seq_send_be64 = cpu_to_be64(seq_send);
+       seq_send_be64 = cpu_to_be64(gss_seq_send64_fetch_and_inc(ctx));
         memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
  
         if (ctx->initiate) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c

index 3d975a4..962fa84 100644 (file)
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -228,9 +228,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
  
         memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
  
-       spin_lock(&krb5_seq_lock);
-       seq_send = kctx->seq_send++;
-       spin_unlock(&krb5_seq_lock);
+       seq_send = gss_seq_send_fetch_and_inc(kctx);
  
         /* XXX would probably be more efficient to compute checksum
          * and encrypt at the same time: */
@@ -477,9 +475,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
         *be16ptr++ = 0;
  
         be64ptr = (__be64 *)be16ptr;
-       spin_lock(&krb5_seq_lock);
-       *be64ptr = cpu_to_be64(kctx->seq_send64++);
-       spin_unlock(&krb5_seq_lock);
+       *be64ptr = cpu_to_be64(gss_seq_send64_fetch_and_inc(kctx));
  
         err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
         if (err)
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c

index 5fec3ab..16ac0f4 100644 (file)
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -117,7 +117,7 @@ int gss_mech_register(struct gss_api_mech *gm)
         if (status)
                 return status;
         spin_lock(&registered_mechs_lock);
-       list_add(&gm->gm_list, &registered_mechs);
+       list_add_rcu(&gm->gm_list, &registered_mechs);
         spin_unlock(&registered_mechs_lock);
         dprintk("RPC:       registered gss mechanism %s\n", gm->gm_name);
         return 0;
@@ -132,7 +132,7 @@ EXPORT_SYMBOL_GPL(gss_mech_register);
  void gss_mech_unregister(struct gss_api_mech *gm)
  {
         spin_lock(&registered_mechs_lock);
-       list_del(&gm->gm_list);
+       list_del_rcu(&gm->gm_list);
         spin_unlock(&registered_mechs_lock);
         dprintk("RPC:       unregistered gss mechanism %s\n", gm->gm_name);
         gss_mech_free(gm);
@@ -151,15 +151,15 @@ _gss_mech_get_by_name(const char *name)
  {
         struct gss_api_mech     *pos, *gm = NULL;
  
-       spin_lock(&registered_mechs_lock);
-       list_for_each_entry(pos, &registered_mechs, gm_list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
                 if (0 == strcmp(name, pos->gm_name)) {
                         if (try_module_get(pos->gm_owner))
                                 gm = pos;
                         break;
                 }
         }
-       spin_unlock(&registered_mechs_lock);
+       rcu_read_unlock();
         return gm;
  
  }
@@ -186,8 +186,8 @@ struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
         dprintk("RPC:       %s(%s)\n", __func__, buf);
         request_module("rpc-auth-gss-%s", buf);
  
-       spin_lock(&registered_mechs_lock);
-       list_for_each_entry(pos, &registered_mechs, gm_list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
                 if (obj->len == pos->gm_oid.len) {
                         if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
                                 if (try_module_get(pos->gm_owner))
@@ -196,7 +196,7 @@ struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
                         }
                 }
         }
-       spin_unlock(&registered_mechs_lock);
+       rcu_read_unlock();
         return gm;
  }
  
@@ -216,15 +216,15 @@ static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
  {
         struct gss_api_mech *gm = NULL, *pos;
  
-       spin_lock(&registered_mechs_lock);
-       list_for_each_entry(pos, &registered_mechs, gm_list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
                 if (!mech_supports_pseudoflavor(pos, pseudoflavor))
                         continue;
                 if (try_module_get(pos->gm_owner))
                         gm = pos;
                 break;
         }
-       spin_unlock(&registered_mechs_lock);
+       rcu_read_unlock();
         return gm;
  }
  
@@ -257,8 +257,8 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
         struct gss_api_mech *pos = NULL;
         int j, i = 0;
  
-       spin_lock(&registered_mechs_lock);
-       list_for_each_entry(pos, &registered_mechs, gm_list) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(pos, &registered_mechs, gm_list) {
                 for (j = 0; j < pos->gm_pf_num; j++) {
                         if (i >= size) {
                                 spin_unlock(&registered_mechs_lock);
@@ -267,7 +267,7 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
                         array_ptr[i++] = pos->gm_pfs[j].pseudoflavor;
                 }
         }
-       spin_unlock(&registered_mechs_lock);
+       rcu_read_unlock();
         return i;
  }
  
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c

index 444380f..006062a 100644 (file)
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -784,6 +784,7 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
         xdr_inline_pages(&req->rq_rcv_buf,
                 PAGE_SIZE/2 /* pretty arbitrary */,
                 arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
+       req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
  done:
         if (err)
                 dprintk("RPC:       gssx_enc_accept_sec_context: %d\n", err);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c

index 4b48228..2694a1b 100644 (file)
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -21,7 +21,7 @@ static struct rpc_cred null_cred;
  static struct rpc_auth *
  nul_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
  {
-       atomic_inc(&null_auth.au_count);
+       refcount_inc(&null_auth.au_count);
         return &null_auth;
  }
  
@@ -119,7 +119,7 @@ struct rpc_auth null_auth = {
         .au_flags       = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
         .au_ops         = &authnull_ops,
         .au_flavor      = RPC_AUTH_NULL,
-       .au_count       = ATOMIC_INIT(0),
+       .au_count       = REFCOUNT_INIT(1),
  };
  
  static
@@ -138,6 +138,6 @@ struct rpc_cred null_cred = {
         .cr_lru         = LIST_HEAD_INIT(null_cred.cr_lru),
         .cr_auth        = &null_auth,
         .cr_ops         = &null_credops,
-       .cr_count       = ATOMIC_INIT(1),
+       .cr_count       = REFCOUNT_INIT(2),
         .cr_flags       = 1UL << RPCAUTH_CRED_UPTODATE,
  };
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c

index 185e56d..4c1c7e5 100644 (file)
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -34,7 +34,7 @@ unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
  {
         dprintk("RPC:       creating UNIX authenticator for client %p\n",
                         clnt);
-       atomic_inc(&unix_auth.au_count);
+       refcount_inc(&unix_auth.au_count);
         return &unix_auth;
  }
  
@@ -239,7 +239,7 @@ struct rpc_auth             unix_auth = {
         .au_flags       = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
         .au_ops         = &authunix_ops,
         .au_flavor      = RPC_AUTH_UNIX,
-       .au_count       = ATOMIC_INIT(0),
+       .au_count       = REFCOUNT_INIT(1),
  };
  
  static
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c

index 3c15a99..fa5ba6e 100644 (file)
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -91,7 +91,6 @@ struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
                 return NULL;
  
         req->rq_xprt = xprt;
-       INIT_LIST_HEAD(&req->rq_list);
         INIT_LIST_HEAD(&req->rq_bc_list);
  
         /* Preallocate one XDR receive buffer */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c

index 8ea2f5f..ae3b814 100644 (file)
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -61,6 +61,7 @@ static void   call_start(struct rpc_task *task);
  static void    call_reserve(struct rpc_task *task);
  static void    call_reserveresult(struct rpc_task *task);
  static void    call_allocate(struct rpc_task *task);
+static void    call_encode(struct rpc_task *task);
  static void    call_decode(struct rpc_task *task);
  static void    call_bind(struct rpc_task *task);
  static void    call_bind_status(struct rpc_task *task);
@@ -1137,10 +1138,10 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
  struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
  {
         struct rpc_task *task;
-       struct xdr_buf *xbufp = &req->rq_snd_buf;
         struct rpc_task_setup task_setup_data = {
                 .callback_ops = &rpc_default_ops,
-               .flags = RPC_TASK_SOFTCONN,
+               .flags = RPC_TASK_SOFTCONN |
+                       RPC_TASK_NO_RETRANS_TIMEOUT,
         };
  
         dprintk("RPC: rpc_run_bc_task req= %p\n", req);
@@ -1148,14 +1149,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
          * Create an rpc_task to send the data
          */
         task = rpc_new_task(&task_setup_data);
-       task->tk_rqstp = req;
-
-       /*
-        * Set up the xdr_buf length.
-        * This also indicates that the buffer is XDR encoded already.
-        */
-       xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
-                       xbufp->tail[0].iov_len;
+       xprt_init_bc_request(req, task);
  
         task->tk_action = call_bc_transmit;
         atomic_inc(&task->tk_count);
@@ -1558,7 +1552,6 @@ call_reserveresult(struct rpc_task *task)
         task->tk_status = 0;
         if (status >= 0) {
                 if (task->tk_rqstp) {
-                       xprt_request_init(task);
                         task->tk_action = call_refresh;
                         return;
                 }
@@ -1680,7 +1673,7 @@ call_allocate(struct rpc_task *task)
         dprint_status(task);
  
         task->tk_status = 0;
-       task->tk_action = call_bind;
+       task->tk_action = call_encode;
  
         if (req->rq_buffer)
                 return;
@@ -1721,22 +1714,15 @@ call_allocate(struct rpc_task *task)
         rpc_exit(task, -ERESTARTSYS);
  }
  
-static inline int
+static int
  rpc_task_need_encode(struct rpc_task *task)
  {
-       return task->tk_rqstp->rq_snd_buf.len == 0;
+       return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0 &&
+               (!(task->tk_flags & RPC_TASK_SENT) ||
+                !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) ||
+                xprt_request_need_retransmit(task));
  }
  
-static inline void
-rpc_task_force_reencode(struct rpc_task *task)
-{
-       task->tk_rqstp->rq_snd_buf.len = 0;
-       task->tk_rqstp->rq_bytes_sent = 0;
-}
-
-/*
- * 3.  Encode arguments of an RPC call
- */
  static void
  rpc_xdr_encode(struct rpc_task *task)
  {
@@ -1752,6 +1738,7 @@ rpc_xdr_encode(struct rpc_task *task)
         xdr_buf_init(&req->rq_rcv_buf,
                      req->rq_rbuffer,
                      req->rq_rcvsize);
+       req->rq_bytes_sent = 0;
  
         p = rpc_encode_header(task);
         if (p == NULL) {
@@ -1766,6 +1753,36 @@ rpc_xdr_encode(struct rpc_task *task)
  
         task->tk_status = rpcauth_wrap_req(task, encode, req, p,
                         task->tk_msg.rpc_argp);
+       if (task->tk_status == 0)
+               xprt_request_prepare(req);
+}
+
+/*
+ * 3.  Encode arguments of an RPC call
+ */
+static void
+call_encode(struct rpc_task *task)
+{
+       if (!rpc_task_need_encode(task))
+               goto out;
+       /* Encode here so that rpcsec_gss can use correct sequence number. */
+       rpc_xdr_encode(task);
+       /* Did the encode result in an error condition? */
+       if (task->tk_status != 0) {
+               /* Was the error nonfatal? */
+               if (task->tk_status == -EAGAIN || task->tk_status == -ENOMEM)
+                       rpc_delay(task, HZ >> 4);
+               else
+                       rpc_exit(task, task->tk_status);
+               return;
+       }
+
+       /* Add task to reply queue before transmission to avoid races */
+       if (rpc_reply_expected(task))
+               xprt_request_enqueue_receive(task);
+       xprt_request_enqueue_transmit(task);
+out:
+       task->tk_action = call_bind;
  }
  
  /*
@@ -1947,43 +1964,16 @@ call_connect_status(struct rpc_task *task)
  static void
  call_transmit(struct rpc_task *task)
  {
-       int is_retrans = RPC_WAS_SENT(task);
-
         dprint_status(task);
  
-       task->tk_action = call_status;
-       if (task->tk_status < 0)
-               return;
-       if (!xprt_prepare_transmit(task))
-               return;
-       task->tk_action = call_transmit_status;
-       /* Encode here so that rpcsec_gss can use correct sequence number. */
-       if (rpc_task_need_encode(task)) {
-               rpc_xdr_encode(task);
-               /* Did the encode result in an error condition? */
-               if (task->tk_status != 0) {
-                       /* Was the error nonfatal? */
-                       if (task->tk_status == -EAGAIN)
-                               rpc_delay(task, HZ >> 4);
-                       else
-                               rpc_exit(task, task->tk_status);
+       task->tk_status = 0;
+       if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+               if (!xprt_prepare_transmit(task))
                         return;
-               }
+               xprt_transmit(task);
         }
-       xprt_transmit(task);
-       if (task->tk_status < 0)
-               return;
-       if (is_retrans)
-               task->tk_client->cl_stats->rpcretrans++;
-       /*
-        * On success, ensure that we call xprt_end_transmit() before sleeping
-        * in order to allow access to the socket to other RPC requests.
-        */
-       call_transmit_status(task);
-       if (rpc_reply_expected(task))
-               return;
-       task->tk_action = rpc_exit_task;
-       rpc_wake_up_queued_task(&task->tk_rqstp->rq_xprt->pending, task);
+       task->tk_action = call_transmit_status;
+       xprt_end_transmit(task);
  }
  
  /*
@@ -1999,19 +1989,17 @@ call_transmit_status(struct rpc_task *task)
          * test first.
          */
         if (task->tk_status == 0) {
-               xprt_end_transmit(task);
-               rpc_task_force_reencode(task);
+               xprt_request_wait_receive(task);
                 return;
         }
  
         switch (task->tk_status) {
-       case -EAGAIN:
-       case -ENOBUFS:
-               break;
         default:
                 dprint_status(task);
-               xprt_end_transmit(task);
-               rpc_task_force_reencode(task);
+               break;
+       case -EBADMSG:
+               task->tk_status = 0;
+               task->tk_action = call_encode;
                 break;
                 /*
                  * Special cases: if we've been waiting on the
@@ -2019,6 +2007,14 @@ call_transmit_status(struct rpc_task *task)
                  * socket just returned a connection error,
                  * then hold onto the transport lock.
                  */
+       case -ENOBUFS:
+               rpc_delay(task, HZ>>2);
+               /* fall through */
+       case -EBADSLT:
+       case -EAGAIN:
+               task->tk_action = call_transmit;
+               task->tk_status = 0;
+               break;
         case -ECONNREFUSED:
         case -EHOSTDOWN:
         case -ENETDOWN:
@@ -2026,7 +2022,6 @@ call_transmit_status(struct rpc_task *task)
         case -ENETUNREACH:
         case -EPERM:
                 if (RPC_IS_SOFTCONN(task)) {
-                       xprt_end_transmit(task);
                         if (!task->tk_msg.rpc_proc->p_proc)
                                 trace_xprt_ping(task->tk_xprt,
                                                 task->tk_status);
@@ -2039,7 +2034,7 @@ call_transmit_status(struct rpc_task *task)
         case -EADDRINUSE:
         case -ENOTCONN:
         case -EPIPE:
-               rpc_task_force_reencode(task);
+               break;
         }
  }
  
@@ -2053,6 +2048,11 @@ call_bc_transmit(struct rpc_task *task)
  {
         struct rpc_rqst *req = task->tk_rqstp;
  
+       if (rpc_task_need_encode(task))
+               xprt_request_enqueue_transmit(task);
+       if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+               goto out_wakeup;
+
         if (!xprt_prepare_transmit(task))
                 goto out_retry;
  
@@ -2061,14 +2061,9 @@ call_bc_transmit(struct rpc_task *task)
                         "error: %d\n", task->tk_status);
                 goto out_done;
         }
-       if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
-               req->rq_bytes_sent = 0;
  
         xprt_transmit(task);
  
-       if (task->tk_status == -EAGAIN)
-               goto out_nospace;
-
         xprt_end_transmit(task);
         dprint_status(task);
         switch (task->tk_status) {
@@ -2084,6 +2079,8 @@ call_bc_transmit(struct rpc_task *task)
         case -ENOTCONN:
         case -EPIPE:
                 break;
+       case -EAGAIN:
+               goto out_retry;
         case -ETIMEDOUT:
                 /*
                  * Problem reaching the server.  Disconnect and let the
@@ -2107,12 +2104,11 @@ call_bc_transmit(struct rpc_task *task)
                         "error: %d\n", task->tk_status);
                 break;
         }
+out_wakeup:
         rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
  out_done:
         task->tk_action = rpc_exit_task;
         return;
-out_nospace:
-       req->rq_connect_cookie = req->rq_xprt->connect_cookie;
  out_retry:
         task->tk_status = 0;
  }
@@ -2125,15 +2121,11 @@ static void
  call_status(struct rpc_task *task)
  {
         struct rpc_clnt *clnt = task->tk_client;
-       struct rpc_rqst *req = task->tk_rqstp;
         int             status;
  
         if (!task->tk_msg.rpc_proc->p_proc)
                 trace_xprt_ping(task->tk_xprt, task->tk_status);
  
-       if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
-               task->tk_status = req->rq_reply_bytes_recvd;
-
         dprint_status(task);
  
         status = task->tk_status;
@@ -2173,13 +2165,8 @@ call_status(struct rpc_task *task)
                 /* fall through */
         case -EPIPE:
         case -ENOTCONN:
-               task->tk_action = call_bind;
-               break;
-       case -ENOBUFS:
-               rpc_delay(task, HZ>>2);
-               /* fall through */
         case -EAGAIN:
-               task->tk_action = call_transmit;
+               task->tk_action = call_encode;
                 break;
         case -EIO:
                 /* shutdown or soft timeout */
@@ -2244,7 +2231,7 @@ call_timeout(struct rpc_task *task)
         rpcauth_invalcred(task);
  
  retry:
-       task->tk_action = call_bind;
+       task->tk_action = call_encode;
         task->tk_status = 0;
  }
  
@@ -2261,6 +2248,11 @@ call_decode(struct rpc_task *task)
  
         dprint_status(task);
  
+       if (!decode) {
+               task->tk_action = rpc_exit_task;
+               return;
+       }
+
         if (task->tk_flags & RPC_CALL_MAJORSEEN) {
                 if (clnt->cl_chatty) {
                         printk(KERN_NOTICE "%s: server %s OK\n",
@@ -2283,7 +2275,7 @@ call_decode(struct rpc_task *task)
  
         if (req->rq_rcv_buf.len < 12) {
                 if (!RPC_IS_SOFT(task)) {
-                       task->tk_action = call_bind;
+                       task->tk_action = call_encode;
                         goto out_retry;
                 }
                 dprintk("RPC:       %s: too small RPC reply size (%d bytes)\n",
@@ -2298,13 +2290,11 @@ call_decode(struct rpc_task *task)
                         goto out_retry;
                 return;
         }
-
         task->tk_action = rpc_exit_task;
  
-       if (decode) {
-               task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
-                                                     task->tk_msg.rpc_resp);
-       }
+       task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
+                                             task->tk_msg.rpc_resp);
+
         dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
                         task->tk_status);
         return;
@@ -2416,7 +2406,7 @@ rpc_verify_header(struct rpc_task *task)
                         task->tk_garb_retry--;
                         dprintk("RPC: %5u %s: retry garbled creds\n",
                                         task->tk_pid, __func__);
-                       task->tk_action = call_bind;
+                       task->tk_action = call_encode;
                         goto out_retry;
                 case RPC_AUTH_TOOWEAK:
                         printk(KERN_NOTICE "RPC: server %s requires stronger "
@@ -2485,7 +2475,7 @@ out_garbage:
                 task->tk_garb_retry--;
                 dprintk("RPC: %5u %s: retrying\n",
                                 task->tk_pid, __func__);
-               task->tk_action = call_bind;
+               task->tk_action = call_encode;
  out_retry:
                 return ERR_PTR(-EAGAIN);
         }
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c

index 3fe5d60..57ca5be 100644 (file)
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -99,64 +99,78 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
         list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
  }
  
-static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
-{
-       struct list_head *q = &queue->tasks[queue->priority];
-       struct rpc_task *task;
-
-       if (!list_empty(q)) {
-               task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
-               if (task->tk_owner == queue->owner)
-                       list_move_tail(&task->u.tk_wait.list, q);
-       }
-}
-
  static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
  {
         if (queue->priority != priority) {
-               /* Fairness: rotate the list when changing priority */
-               rpc_rotate_queue_owner(queue);
                 queue->priority = priority;
+               queue->nr = 1U << priority;
         }
  }
  
-static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-       queue->owner = pid;
-       queue->nr = RPC_BATCH_COUNT;
-}
-
  static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
  {
         rpc_set_waitqueue_priority(queue, queue->maxpriority);
-       rpc_set_waitqueue_owner(queue, 0);
  }
  
  /*
- * Add new request to a priority queue.
+ * Add a request to a queue list
   */
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
-               struct rpc_task *task,
-               unsigned char queue_priority)
+static void
+__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
  {
-       struct list_head *q;
         struct rpc_task *t;
  
-       INIT_LIST_HEAD(&task->u.tk_wait.links);
-       if (unlikely(queue_priority > queue->maxpriority))
-               queue_priority = queue->maxpriority;
-       if (queue_priority > queue->priority)
-               rpc_set_waitqueue_priority(queue, queue_priority);
-       q = &queue->tasks[queue_priority];
         list_for_each_entry(t, q, u.tk_wait.list) {
                 if (t->tk_owner == task->tk_owner) {
-                       list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
+                       list_add_tail(&task->u.tk_wait.links,
+                                       &t->u.tk_wait.links);
+                       /* Cache the queue head in task->u.tk_wait.list */
+                       task->u.tk_wait.list.next = q;
+                       task->u.tk_wait.list.prev = NULL;
                         return;
                 }
         }
+       INIT_LIST_HEAD(&task->u.tk_wait.links);
         list_add_tail(&task->u.tk_wait.list, q);
  }
  
+/*
+ * Remove request from a queue list
+ */
+static void
+__rpc_list_dequeue_task(struct rpc_task *task)
+{
+       struct list_head *q;
+       struct rpc_task *t;
+
+       if (task->u.tk_wait.list.prev == NULL) {
+               list_del(&task->u.tk_wait.links);
+               return;
+       }
+       if (!list_empty(&task->u.tk_wait.links)) {
+               t = list_first_entry(&task->u.tk_wait.links,
+                               struct rpc_task,
+                               u.tk_wait.links);
+               /* Assume __rpc_list_enqueue_task() cached the queue head */
+               q = t->u.tk_wait.list.next;
+               list_add_tail(&t->u.tk_wait.list, q);
+               list_del(&task->u.tk_wait.links);
+       }
+       list_del(&task->u.tk_wait.list);
+}
+
+/*
+ * Add new request to a priority queue.
+ */
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+               struct rpc_task *task,
+               unsigned char queue_priority)
+{
+       if (unlikely(queue_priority > queue->maxpriority))
+               queue_priority = queue->maxpriority;
+       __rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
+}
+
  /*
   * Add new request to wait queue.
   *
@@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
   */
  static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
  {
-       struct rpc_task *t;
-
-       if (!list_empty(&task->u.tk_wait.links)) {
-               t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
-               list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
-               list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
-       }
+       __rpc_list_dequeue_task(task);
  }
  
  /*
@@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
         __rpc_disable_timer(queue, task);
         if (RPC_IS_PRIORITY(queue))
                 __rpc_remove_wait_queue_priority(task);
-       list_del(&task->u.tk_wait.list);
+       else
+               list_del(&task->u.tk_wait.list);
         queue->qlen--;
         dprintk("RPC: %5u removed from queue %p \"%s\"\n",
                         task->tk_pid, queue, rpc_qname(queue));
@@ -440,14 +449,28 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
  /*
   * Wake up a queued task while the queue lock is being held
   */
-static void rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
-               struct rpc_wait_queue *queue, struct rpc_task *task)
+static struct rpc_task *
+rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
+               struct rpc_wait_queue *queue, struct rpc_task *task,
+               bool (*action)(struct rpc_task *, void *), void *data)
  {
         if (RPC_IS_QUEUED(task)) {
                 smp_rmb();
-               if (task->tk_waitqueue == queue)
-                       __rpc_do_wake_up_task_on_wq(wq, queue, task);
+               if (task->tk_waitqueue == queue) {
+                       if (action == NULL || action(task, data)) {
+                               __rpc_do_wake_up_task_on_wq(wq, queue, task);
+                               return task;
+                       }
+               }
         }
+       return NULL;
+}
+
+static void
+rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
+               struct rpc_wait_queue *queue, struct rpc_task *task)
+{
+       rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, task, NULL, NULL);
  }
  
  /*
@@ -465,6 +488,8 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
                 struct rpc_wait_queue *queue,
                 struct rpc_task *task)
  {
+       if (!RPC_IS_QUEUED(task))
+               return;
         spin_lock_bh(&queue->lock);
         rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
         spin_unlock_bh(&queue->lock);
@@ -475,12 +500,48 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
   */
  void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
  {
+       if (!RPC_IS_QUEUED(task))
+               return;
         spin_lock_bh(&queue->lock);
         rpc_wake_up_task_queue_locked(queue, task);
         spin_unlock_bh(&queue->lock);
  }
  EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
  
+static bool rpc_task_action_set_status(struct rpc_task *task, void *status)
+{
+       task->tk_status = *(int *)status;
+       return true;
+}
+
+static void
+rpc_wake_up_task_queue_set_status_locked(struct rpc_wait_queue *queue,
+               struct rpc_task *task, int status)
+{
+       rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
+                       task, rpc_task_action_set_status, &status);
+}
+
+/**
+ * rpc_wake_up_queued_task_set_status - wake up a task and set task->tk_status
+ * @queue: pointer to rpc_wait_queue
+ * @task: pointer to rpc_task
+ * @status: integer error value
+ *
+ * If @task is queued on @queue, then it is woken up, and @task->tk_status is
+ * set to the value of @status.
+ */
+void
+rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
+               struct rpc_task *task, int status)
+{
+       if (!RPC_IS_QUEUED(task))
+               return;
+       spin_lock_bh(&queue->lock);
+       rpc_wake_up_task_queue_set_status_locked(queue, task, status);
+       spin_unlock_bh(&queue->lock);
+}
+
  /*
   * Wake up the next task on a priority queue.
   */
@@ -493,17 +554,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
          * Service a batch of tasks from a single owner.
          */
         q = &queue->tasks[queue->priority];
-       if (!list_empty(q)) {
-               task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
-               if (queue->owner == task->tk_owner) {
-                       if (--queue->nr)
-                               goto out;
-                       list_move_tail(&task->u.tk_wait.list, q);
-               }
-               /*
-                * Check if we need to switch queues.
-                */
-               goto new_owner;
+       if (!list_empty(q) && --queue->nr) {
+               task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+               goto out;
         }
  
         /*
@@ -515,7 +568,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
                 else
                         q = q - 1;
                 if (!list_empty(q)) {
-                       task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+                       task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
                         goto new_queue;
                 }
         } while (q != &queue->tasks[queue->priority]);
@@ -525,8 +578,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
  
  new_queue:
         rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_owner:
-       rpc_set_waitqueue_owner(queue, task->tk_owner);
  out:
         return task;
  }
@@ -553,12 +604,9 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
                         queue, rpc_qname(queue));
         spin_lock_bh(&queue->lock);
         task = __rpc_find_next_queued(queue);
-       if (task != NULL) {
-               if (func(task, data))
-                       rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
-               else
-                       task = NULL;
-       }
+       if (task != NULL)
+               task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
+                               task, func, data);
         spin_unlock_bh(&queue->lock);
  
         return task;
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c

index f217c34..9062967 100644 (file)
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -26,7 +26,8 @@
   * Possibly called several times to iterate over an sk_buff and copy
   * data out of it.
   */
-size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
+static size_t
+xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
  {
         if (len > desc->count)
                 len = desc->count;
@@ -36,7 +37,6 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
         desc->offset += len;
         return len;
  }
-EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
  
  /**
   * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
@@ -69,7 +69,8 @@ static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to,
   * @copy_actor: virtual method for copying data
   *
   */
-ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
+static ssize_t
+xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
  {
         struct page     **ppage = xdr->pages;
         unsigned int    len, pglen = xdr->page_len;
@@ -104,7 +105,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct
  
                 /* ACL likes to be lazy in allocating pages - ACLs
                  * are small by default but can get huge. */
-               if (unlikely(*ppage == NULL)) {
+               if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
                         *ppage = alloc_page(GFP_ATOMIC);
                         if (unlikely(*ppage == NULL)) {
                                 if (copied == 0)
@@ -140,7 +141,6 @@ copy_tail:
  out:
         return copied;
  }
-EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
  
  /**
   * csum_partial_copy_to_xdr - checksum and copy data
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c

index 5185efb..87533fb 100644 (file)
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -171,7 +171,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
         mutex_init(&xprt->xpt_mutex);
         spin_lock_init(&xprt->xpt_lock);
         set_bit(XPT_BUSY, &xprt->xpt_flags);
-       rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
         xprt->xpt_net = get_net(net);
         strcpy(xprt->xpt_remotebuf, "uninitialized");
  }
@@ -895,7 +894,6 @@ int svc_send(struct svc_rqst *rqstp)
         else
                 len = xprt->xpt_ops->xpo_sendto(rqstp);
         mutex_unlock(&xprt->xpt_mutex);
-       rpc_wake_up(&xprt->xpt_bc_pending);
         trace_svc_send(rqstp, len);
         svc_xprt_release(rqstp);
  
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c

index 5445145..db8bb6b 100644 (file)
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1004,7 +1004,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
  
         if (!bc_xprt)
                 return -EAGAIN;
-       spin_lock(&bc_xprt->recv_lock);
+       spin_lock(&bc_xprt->queue_lock);
         req = xprt_lookup_rqst(bc_xprt, xid);
         if (!req)
                 goto unlock_notfound;
@@ -1022,7 +1022,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
         memcpy(dst->iov_base, src->iov_base, src->iov_len);
         xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
         rqstp->rq_arg.len = 0;
-       spin_unlock(&bc_xprt->recv_lock);
+       spin_unlock(&bc_xprt->queue_lock);
         return 0;
  unlock_notfound:
         printk(KERN_NOTICE
@@ -1031,7 +1031,7 @@ unlock_notfound:
                 __func__, ntohl(calldir),
                 bc_xprt, ntohl(xid));
  unlock_eagain:
-       spin_unlock(&bc_xprt->recv_lock);
+       spin_unlock(&bc_xprt->queue_lock);
         return -EAGAIN;
  }
  
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c

index 30afbd2..2bbb8d3 100644 (file)
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -15,6 +15,7 @@
  #include <linux/errno.h>
  #include <linux/sunrpc/xdr.h>
  #include <linux/sunrpc/msg_prot.h>
+#include <linux/bvec.h>
  
  /*
   * XDR functions for basic NFS types
@@ -128,6 +129,39 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
  }
  EXPORT_SYMBOL_GPL(xdr_terminate_string);
  
+size_t
+xdr_buf_pagecount(struct xdr_buf *buf)
+{
+       if (!buf->page_len)
+               return 0;
+       return (buf->page_base + buf->page_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
+
+int
+xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp)
+{
+       size_t i, n = xdr_buf_pagecount(buf);
+
+       if (n != 0 && buf->bvec == NULL) {
+               buf->bvec = kmalloc_array(n, sizeof(buf->bvec[0]), gfp);
+               if (!buf->bvec)
+                       return -ENOMEM;
+               for (i = 0; i < n; i++) {
+                       buf->bvec[i].bv_page = buf->pages[i];
+                       buf->bvec[i].bv_len = PAGE_SIZE;
+                       buf->bvec[i].bv_offset = 0;
+               }
+       }
+       return 0;
+}
+
+void
+xdr_free_bvec(struct xdr_buf *buf)
+{
+       kfree(buf->bvec);
+       buf->bvec = NULL;
+}
+
  void
  xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
                  struct page **pages, unsigned int base, unsigned int len)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c

index a8db2e3..86bea45 100644 (file)
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -68,8 +68,6 @@
  static void     xprt_init(struct rpc_xprt *xprt, struct net *net);
  static __be32  xprt_alloc_xid(struct rpc_xprt *xprt);
  static void    xprt_connect_status(struct rpc_task *task);
-static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
-static void     __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
  static void     xprt_destroy(struct rpc_xprt *xprt);
  
  static DEFINE_SPINLOCK(xprt_list_lock);
@@ -171,6 +169,17 @@ out:
  }
  EXPORT_SYMBOL_GPL(xprt_load_transport);
  
+static void xprt_clear_locked(struct rpc_xprt *xprt)
+{
+       xprt->snd_task = NULL;
+       if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
+               smp_mb__before_atomic();
+               clear_bit(XPRT_LOCKED, &xprt->state);
+               smp_mb__after_atomic();
+       } else
+               queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+}
+
  /**
   * xprt_reserve_xprt - serialize write access to transports
   * @task: task that is requesting access to the transport
@@ -183,44 +192,53 @@ EXPORT_SYMBOL_GPL(xprt_load_transport);
  int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
  {
         struct rpc_rqst *req = task->tk_rqstp;
-       int priority;
  
         if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
                 if (task == xprt->snd_task)
                         return 1;
                 goto out_sleep;
         }
+       if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+               goto out_unlock;
         xprt->snd_task = task;
-       if (req != NULL)
-               req->rq_ntrans++;
  
         return 1;
  
+out_unlock:
+       xprt_clear_locked(xprt);
  out_sleep:
         dprintk("RPC: %5u failed to lock transport %p\n",
                         task->tk_pid, xprt);
-       task->tk_timeout = 0;
+       task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
         task->tk_status = -EAGAIN;
-       if (req == NULL)
-               priority = RPC_PRIORITY_LOW;
-       else if (!req->rq_ntrans)
-               priority = RPC_PRIORITY_NORMAL;
-       else
-               priority = RPC_PRIORITY_HIGH;
-       rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
+       rpc_sleep_on(&xprt->sending, task, NULL);
         return 0;
  }
  EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
  
-static void xprt_clear_locked(struct rpc_xprt *xprt)
+static bool
+xprt_need_congestion_window_wait(struct rpc_xprt *xprt)
  {
-       xprt->snd_task = NULL;
-       if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
-               smp_mb__before_atomic();
-               clear_bit(XPRT_LOCKED, &xprt->state);
-               smp_mb__after_atomic();
-       } else
-               queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+       return test_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_set_congestion_window_wait(struct rpc_xprt *xprt)
+{
+       if (!list_empty(&xprt->xmit_queue)) {
+               /* Peek at head of queue to see if it can make progress */
+               if (list_first_entry(&xprt->xmit_queue, struct rpc_rqst,
+                                       rq_xmit)->rq_cong)
+                       return;
+       }
+       set_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_test_and_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+       if (!RPCXPRT_CONGESTED(xprt))
+               clear_bit(XPRT_CWND_WAIT, &xprt->state);
  }
  
  /*
@@ -230,11 +248,11 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
   * Same as xprt_reserve_xprt, but Van Jacobson congestion control is
   * integrated into the decision of whether a request is allowed to be
   * woken up and given access to the transport.
+ * Note that the lock is only granted if we know there are free slots.
   */
  int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
  {
         struct rpc_rqst *req = task->tk_rqstp;
-       int priority;
  
         if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
                 if (task == xprt->snd_task)
@@ -245,25 +263,19 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
                 xprt->snd_task = task;
                 return 1;
         }
-       if (__xprt_get_cong(xprt, task)) {
+       if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+               goto out_unlock;
+       if (!xprt_need_congestion_window_wait(xprt)) {
                 xprt->snd_task = task;
-               req->rq_ntrans++;
                 return 1;
         }
+out_unlock:
         xprt_clear_locked(xprt);
  out_sleep:
-       if (req)
-               __xprt_put_cong(xprt, req);
         dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
-       task->tk_timeout = 0;
+       task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
         task->tk_status = -EAGAIN;
-       if (req == NULL)
-               priority = RPC_PRIORITY_LOW;
-       else if (!req->rq_ntrans)
-               priority = RPC_PRIORITY_NORMAL;
-       else
-               priority = RPC_PRIORITY_HIGH;
-       rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
+       rpc_sleep_on(&xprt->sending, task, NULL);
         return 0;
  }
  EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -272,6 +284,8 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
  {
         int retval;
  
+       if (test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == task)
+               return 1;
         spin_lock_bh(&xprt->transport_lock);
         retval = xprt->ops->reserve_xprt(xprt, task);
         spin_unlock_bh(&xprt->transport_lock);
@@ -281,12 +295,8 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
  static bool __xprt_lock_write_func(struct rpc_task *task, void *data)
  {
         struct rpc_xprt *xprt = data;
-       struct rpc_rqst *req;
  
-       req = task->tk_rqstp;
         xprt->snd_task = task;
-       if (req)
-               req->rq_ntrans++;
         return true;
  }
  
@@ -294,53 +304,30 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
  {
         if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                 return;
-
+       if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+               goto out_unlock;
         if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
                                 __xprt_lock_write_func, xprt))
                 return;
+out_unlock:
         xprt_clear_locked(xprt);
  }
  
-static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data)
-{
-       struct rpc_xprt *xprt = data;
-       struct rpc_rqst *req;
-
-       req = task->tk_rqstp;
-       if (req == NULL) {
-               xprt->snd_task = task;
-               return true;
-       }
-       if (__xprt_get_cong(xprt, task)) {
-               xprt->snd_task = task;
-               req->rq_ntrans++;
-               return true;
-       }
-       return false;
-}
-
  static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
  {
         if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                 return;
-       if (RPCXPRT_CONGESTED(xprt))
+       if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+               goto out_unlock;
+       if (xprt_need_congestion_window_wait(xprt))
                 goto out_unlock;
         if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
-                               __xprt_lock_write_cong_func, xprt))
+                               __xprt_lock_write_func, xprt))
                 return;
  out_unlock:
         xprt_clear_locked(xprt);
  }
  
-static void xprt_task_clear_bytes_sent(struct rpc_task *task)
-{
-       if (task != NULL) {
-               struct rpc_rqst *req = task->tk_rqstp;
-               if (req != NULL)
-                       req->rq_bytes_sent = 0;
-       }
-}
-
  /**
   * xprt_release_xprt - allow other requests to use a transport
   * @xprt: transport with other tasks potentially waiting
@@ -351,7 +338,6 @@ static void xprt_task_clear_bytes_sent(struct rpc_task *task)
  void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
  {
         if (xprt->snd_task == task) {
-               xprt_task_clear_bytes_sent(task);
                 xprt_clear_locked(xprt);
                 __xprt_lock_write_next(xprt);
         }
@@ -369,7 +355,6 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt);
  void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
  {
         if (xprt->snd_task == task) {
-               xprt_task_clear_bytes_sent(task);
                 xprt_clear_locked(xprt);
                 __xprt_lock_write_next_cong(xprt);
         }
@@ -378,6 +363,8 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt_cong);
  
  static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
  {
+       if (xprt->snd_task != task)
+               return;
         spin_lock_bh(&xprt->transport_lock);
         xprt->ops->release_xprt(xprt, task);
         spin_unlock_bh(&xprt->transport_lock);
@@ -388,16 +375,16 @@ static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *ta
   * overflowed. Put the task to sleep if this is the case.
   */
  static int
-__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
+__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
  {
-       struct rpc_rqst *req = task->tk_rqstp;
-
         if (req->rq_cong)
                 return 1;
         dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n",
-                       task->tk_pid, xprt->cong, xprt->cwnd);
-       if (RPCXPRT_CONGESTED(xprt))
+                       req->rq_task->tk_pid, xprt->cong, xprt->cwnd);
+       if (RPCXPRT_CONGESTED(xprt)) {
+               xprt_set_congestion_window_wait(xprt);
                 return 0;
+       }
         req->rq_cong = 1;
         xprt->cong += RPC_CWNDSCALE;
         return 1;
@@ -414,9 +401,31 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
                 return;
         req->rq_cong = 0;
         xprt->cong -= RPC_CWNDSCALE;
+       xprt_test_and_clear_congestion_window_wait(xprt);
         __xprt_lock_write_next_cong(xprt);
  }
  
+/**
+ * xprt_request_get_cong - Request congestion control credits
+ * @xprt: pointer to transport
+ * @req: pointer to RPC request
+ *
+ * Useful for transports that require congestion control.
+ */
+bool
+xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+       bool ret = false;
+
+       if (req->rq_cong)
+               return true;
+       spin_lock_bh(&xprt->transport_lock);
+       ret = __xprt_get_cong(xprt, req) != 0;
+       spin_unlock_bh(&xprt->transport_lock);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(xprt_request_get_cong);
+
  /**
   * xprt_release_rqst_cong - housekeeping when request is complete
   * @task: RPC request that recently completed
@@ -431,6 +440,20 @@ void xprt_release_rqst_cong(struct rpc_task *task)
  }
  EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
  
+/*
+ * Clear the congestion window wait flag and wake up the next
+ * entry on xprt->sending
+ */
+static void
+xprt_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+       if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) {
+               spin_lock_bh(&xprt->transport_lock);
+               __xprt_lock_write_next_cong(xprt);
+               spin_unlock_bh(&xprt->transport_lock);
+       }
+}
+
  /**
   * xprt_adjust_cwnd - adjust transport congestion window
   * @xprt: pointer to xprt
@@ -488,39 +511,46 @@ EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks);
  
  /**
   * xprt_wait_for_buffer_space - wait for transport output buffer to clear
- * @task: task to be put to sleep
- * @action: function pointer to be executed after wait
+ * @xprt: transport
   *
   * Note that we only set the timer for the case of RPC_IS_SOFT(), since
   * we don't in general want to force a socket disconnection due to
   * an incomplete RPC call transmission.
   */
-void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action)
+void xprt_wait_for_buffer_space(struct rpc_xprt *xprt)
  {
-       struct rpc_rqst *req = task->tk_rqstp;
-       struct rpc_xprt *xprt = req->rq_xprt;
-
-       task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
-       rpc_sleep_on(&xprt->pending, task, action);
+       set_bit(XPRT_WRITE_SPACE, &xprt->state);
  }
  EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
  
+static bool
+xprt_clear_write_space_locked(struct rpc_xprt *xprt)
+{
+       if (test_and_clear_bit(XPRT_WRITE_SPACE, &xprt->state)) {
+               __xprt_lock_write_next(xprt);
+               dprintk("RPC:       write space: waking waiting task on "
+                               "xprt %p\n", xprt);
+               return true;
+       }
+       return false;
+}
+
  /**
   * xprt_write_space - wake the task waiting for transport output buffer space
   * @xprt: transport with waiting tasks
   *
   * Can be called in a soft IRQ context, so xprt_write_space never sleeps.
   */
-void xprt_write_space(struct rpc_xprt *xprt)
+bool xprt_write_space(struct rpc_xprt *xprt)
  {
+       bool ret;
+
+       if (!test_bit(XPRT_WRITE_SPACE, &xprt->state))
+               return false;
         spin_lock_bh(&xprt->transport_lock);
-       if (xprt->snd_task) {
-               dprintk("RPC:       write space: waking waiting task on "
-                               "xprt %p\n", xprt);
-               rpc_wake_up_queued_task_on_wq(xprtiod_workqueue,
-                               &xprt->pending, xprt->snd_task);
-       }
+       ret = xprt_clear_write_space_locked(xprt);
         spin_unlock_bh(&xprt->transport_lock);
+       return ret;
  }
  EXPORT_SYMBOL_GPL(xprt_write_space);
  
@@ -631,6 +661,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
         dprintk("RPC:       disconnected transport %p\n", xprt);
         spin_lock_bh(&xprt->transport_lock);
         xprt_clear_connected(xprt);
+       xprt_clear_write_space_locked(xprt);
         xprt_wake_pending_tasks(xprt, -EAGAIN);
         spin_unlock_bh(&xprt->transport_lock);
  }
@@ -654,6 +685,22 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
  }
  EXPORT_SYMBOL_GPL(xprt_force_disconnect);
  
+static unsigned int
+xprt_connect_cookie(struct rpc_xprt *xprt)
+{
+       return READ_ONCE(xprt->connect_cookie);
+}
+
+static bool
+xprt_request_retransmit_after_disconnect(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+       struct rpc_xprt *xprt = req->rq_xprt;
+
+       return req->rq_connect_cookie != xprt_connect_cookie(xprt) ||
+               !xprt_connected(xprt);
+}
+
  /**
   * xprt_conditional_disconnect - force a transport to disconnect
   * @xprt: transport to disconnect
@@ -692,7 +739,7 @@ static void
  xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
         __must_hold(&xprt->transport_lock)
  {
-       if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+       if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt))
                 mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
  }
  
@@ -702,7 +749,7 @@ xprt_init_autodisconnect(struct timer_list *t)
         struct rpc_xprt *xprt = from_timer(xprt, t, timer);
  
         spin_lock(&xprt->transport_lock);
-       if (!list_empty(&xprt->recv))
+       if (!RB_EMPTY_ROOT(&xprt->recv_queue))
                 goto out_abort;
         /* Reset xprt->last_used to avoid connect/autodisconnect cycling */
         xprt->last_used = jiffies;
@@ -726,7 +773,6 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
                 goto out;
         if (xprt->snd_task != task)
                 goto out;
-       xprt_task_clear_bytes_sent(task);
         xprt->snd_task = cookie;
         ret = true;
  out:
@@ -772,7 +818,6 @@ void xprt_connect(struct rpc_task *task)
                 xprt->ops->close(xprt);
  
         if (!xprt_connected(xprt)) {
-               task->tk_rqstp->rq_bytes_sent = 0;
                 task->tk_timeout = task->tk_rqstp->rq_timeout;
                 task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
                 rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
@@ -789,17 +834,11 @@ void xprt_connect(struct rpc_task *task)
  
  static void xprt_connect_status(struct rpc_task *task)
  {
-       struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
-
-       if (task->tk_status == 0) {
-               xprt->stat.connect_count++;
-               xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
+       switch (task->tk_status) {
+       case 0:
                 dprintk("RPC: %5u xprt_connect_status: connection established\n",
                                 task->tk_pid);
-               return;
-       }
-
-       switch (task->tk_status) {
+               break;
         case -ECONNREFUSED:
         case -ECONNRESET:
         case -ECONNABORTED:
@@ -816,28 +855,97 @@ static void xprt_connect_status(struct rpc_task *task)
         default:
                 dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
                                 "server %s\n", task->tk_pid, -task->tk_status,
-                               xprt->servername);
+                               task->tk_rqstp->rq_xprt->servername);
                 task->tk_status = -EIO;
         }
  }
  
+enum xprt_xid_rb_cmp {
+       XID_RB_EQUAL,
+       XID_RB_LEFT,
+       XID_RB_RIGHT,
+};
+static enum xprt_xid_rb_cmp
+xprt_xid_cmp(__be32 xid1, __be32 xid2)
+{
+       if (xid1 == xid2)
+               return XID_RB_EQUAL;
+       if ((__force u32)xid1 < (__force u32)xid2)
+               return XID_RB_LEFT;
+       return XID_RB_RIGHT;
+}
+
+static struct rpc_rqst *
+xprt_request_rb_find(struct rpc_xprt *xprt, __be32 xid)
+{
+       struct rb_node *n = xprt->recv_queue.rb_node;
+       struct rpc_rqst *req;
+
+       while (n != NULL) {
+               req = rb_entry(n, struct rpc_rqst, rq_recv);
+               switch (xprt_xid_cmp(xid, req->rq_xid)) {
+               case XID_RB_LEFT:
+                       n = n->rb_left;
+                       break;
+               case XID_RB_RIGHT:
+                       n = n->rb_right;
+                       break;
+               case XID_RB_EQUAL:
+                       return req;
+               }
+       }
+       return NULL;
+}
+
+static void
+xprt_request_rb_insert(struct rpc_xprt *xprt, struct rpc_rqst *new)
+{
+       struct rb_node **p = &xprt->recv_queue.rb_node;
+       struct rb_node *n = NULL;
+       struct rpc_rqst *req;
+
+       while (*p != NULL) {
+               n = *p;
+               req = rb_entry(n, struct rpc_rqst, rq_recv);
+               switch(xprt_xid_cmp(new->rq_xid, req->rq_xid)) {
+               case XID_RB_LEFT:
+                       p = &n->rb_left;
+                       break;
+               case XID_RB_RIGHT:
+                       p = &n->rb_right;
+                       break;
+               case XID_RB_EQUAL:
+                       WARN_ON_ONCE(new != req);
+                       return;
+               }
+       }
+       rb_link_node(&new->rq_recv, n, p);
+       rb_insert_color(&new->rq_recv, &xprt->recv_queue);
+}
+
+static void
+xprt_request_rb_remove(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+       rb_erase(&req->rq_recv, &xprt->recv_queue);
+}
+
  /**
   * xprt_lookup_rqst - find an RPC request corresponding to an XID
   * @xprt: transport on which the original request was transmitted
   * @xid: RPC XID of incoming reply
   *
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
   */
  struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
  {
         struct rpc_rqst *entry;
  
-       list_for_each_entry(entry, &xprt->recv, rq_list)
-               if (entry->rq_xid == xid) {
-                       trace_xprt_lookup_rqst(xprt, xid, 0);
-                       entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
-                       return entry;
-               }
+       entry = xprt_request_rb_find(xprt, xid);
+       if (entry != NULL) {
+               trace_xprt_lookup_rqst(xprt, xid, 0);
+               entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
+               return entry;
+       }
  
         dprintk("RPC:       xprt_lookup_rqst did not find xid %08x\n",
                         ntohl(xid));
@@ -847,16 +955,22 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
  }
  EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
  
+static bool
+xprt_is_pinned_rqst(struct rpc_rqst *req)
+{
+       return atomic_read(&req->rq_pin) != 0;
+}
+
  /**
   * xprt_pin_rqst - Pin a request on the transport receive list
   * @req: Request to pin
   *
   * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
- * so should be holding the xprt transport lock.
+ * so should be holding the xprt receive lock.
   */
  void xprt_pin_rqst(struct rpc_rqst *req)
  {
-       set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
+       atomic_inc(&req->rq_pin);
  }
  EXPORT_SYMBOL_GPL(xprt_pin_rqst);
  
@@ -864,38 +978,87 @@ EXPORT_SYMBOL_GPL(xprt_pin_rqst);
   * xprt_unpin_rqst - Unpin a request on the transport receive list
   * @req: Request to pin
   *
- * Caller should be holding the xprt transport lock.
+ * Caller should be holding the xprt receive lock.
   */
  void xprt_unpin_rqst(struct rpc_rqst *req)
  {
-       struct rpc_task *task = req->rq_task;
-
-       clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
-       if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
-               wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
+       if (!test_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate)) {
+               atomic_dec(&req->rq_pin);
+               return;
+       }
+       if (atomic_dec_and_test(&req->rq_pin))
+               wake_up_var(&req->rq_pin);
  }
  EXPORT_SYMBOL_GPL(xprt_unpin_rqst);
  
  static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
-__must_hold(&req->rq_xprt->recv_lock)
  {
-       struct rpc_task *task = req->rq_task;
+       wait_var_event(&req->rq_pin, !xprt_is_pinned_rqst(req));
+}
  
-       if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
-               spin_unlock(&req->rq_xprt->recv_lock);
-               set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
-               wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
-                               TASK_UNINTERRUPTIBLE);
-               clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
-               spin_lock(&req->rq_xprt->recv_lock);
-       }
+static bool
+xprt_request_data_received(struct rpc_task *task)
+{
+       return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+               READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) != 0;
+}
+
+static bool
+xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req)
+{
+       return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+               READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) == 0;
+}
+
+/**
+ * xprt_request_enqueue_receive - Add an request to the receive queue
+ * @task: RPC task
+ *
+ */
+void
+xprt_request_enqueue_receive(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+       struct rpc_xprt *xprt = req->rq_xprt;
+
+       if (!xprt_request_need_enqueue_receive(task, req))
+               return;
+       spin_lock(&xprt->queue_lock);
+
+       /* Update the softirq receive buffer */
+       memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+                       sizeof(req->rq_private_buf));
+
+       /* Add request to the receive list */
+       xprt_request_rb_insert(xprt, req);
+       set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
+       spin_unlock(&xprt->queue_lock);
+
+       xprt_reset_majortimeo(req);
+       /* Turn off autodisconnect */
+       del_singleshot_timer_sync(&xprt->timer);
+}
+
+/**
+ * xprt_request_dequeue_receive_locked - Remove a request from the receive queue
+ * @task: RPC task
+ *
+ * Caller must hold xprt->queue_lock.
+ */
+static void
+xprt_request_dequeue_receive_locked(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+
+       if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+               xprt_request_rb_remove(req->rq_xprt, req);
  }
  
  /**
   * xprt_update_rtt - Update RPC RTT statistics
   * @task: RPC request that recently completed
   *
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
   */
  void xprt_update_rtt(struct rpc_task *task)
  {
@@ -917,7 +1080,7 @@ EXPORT_SYMBOL_GPL(xprt_update_rtt);
   * @task: RPC request that recently completed
   * @copied: actual number of bytes received from the transport
   *
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
   */
  void xprt_complete_rqst(struct rpc_task *task, int copied)
  {
@@ -930,12 +1093,12 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
  
         xprt->stat.recvs++;
  
-       list_del_init(&req->rq_list);
         req->rq_private_buf.len = copied;
         /* Ensure all writes are done before we update */
         /* req->rq_reply_bytes_recvd */
         smp_wmb();
         req->rq_reply_bytes_recvd = copied;
+       xprt_request_dequeue_receive_locked(task);
         rpc_wake_up_queued_task(&xprt->pending, task);
  }
  EXPORT_SYMBOL_GPL(xprt_complete_rqst);
@@ -956,6 +1119,172 @@ static void xprt_timer(struct rpc_task *task)
                 task->tk_status = 0;
  }
  
+/**
+ * xprt_request_wait_receive - wait for the reply to an RPC request
+ * @task: RPC task about to send a request
+ *
+ */
+void xprt_request_wait_receive(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+       struct rpc_xprt *xprt = req->rq_xprt;
+
+       if (!test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+               return;
+       /*
+        * Sleep on the pending queue if we're expecting a reply.
+        * The spinlock ensures atomicity between the test of
+        * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
+        */
+       spin_lock(&xprt->queue_lock);
+       if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
+               xprt->ops->set_retrans_timeout(task);
+               rpc_sleep_on(&xprt->pending, task, xprt_timer);
+               /*
+                * Send an extra queue wakeup call if the
+                * connection was dropped in case the call to
+                * rpc_sleep_on() raced.
+                */
+               if (xprt_request_retransmit_after_disconnect(task))
+                       rpc_wake_up_queued_task_set_status(&xprt->pending,
+                                       task, -ENOTCONN);
+       }
+       spin_unlock(&xprt->queue_lock);
+}
+
+static bool
+xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req)
+{
+       return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+}
+
+/**
+ * xprt_request_enqueue_transmit - queue a task for transmission
+ * @task: pointer to rpc_task
+ *
+ * Add a task to the transmission queue.
+ */
+void
+xprt_request_enqueue_transmit(struct rpc_task *task)
+{
+       struct rpc_rqst *pos, *req = task->tk_rqstp;
+       struct rpc_xprt *xprt = req->rq_xprt;
+
+       if (xprt_request_need_enqueue_transmit(task, req)) {
+               spin_lock(&xprt->queue_lock);
+               /*
+                * Requests that carry congestion control credits are added
+                * to the head of the list to avoid starvation issues.
+                */
+               if (req->rq_cong) {
+                       xprt_clear_congestion_window_wait(xprt);
+                       list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+                               if (pos->rq_cong)
+                                       continue;
+                               /* Note: req is added _before_ pos */
+                               list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+                               INIT_LIST_HEAD(&req->rq_xmit2);
+                               goto out;
+                       }
+               } else if (RPC_IS_SWAPPER(task)) {
+                       list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+                               if (pos->rq_cong || pos->rq_bytes_sent)
+                                       continue;
+                               if (RPC_IS_SWAPPER(pos->rq_task))
+                                       continue;
+                               /* Note: req is added _before_ pos */
+                               list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+                               INIT_LIST_HEAD(&req->rq_xmit2);
+                               goto out;
+                       }
+               } else {
+                       list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+                               if (pos->rq_task->tk_owner != task->tk_owner)
+                                       continue;
+                               list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
+                               INIT_LIST_HEAD(&req->rq_xmit);
+                               goto out;
+                       }
+               }
+               list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
+               INIT_LIST_HEAD(&req->rq_xmit2);
+out:
+               set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+               spin_unlock(&xprt->queue_lock);
+       }
+}
+
+/**
+ * xprt_request_dequeue_transmit_locked - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ * Caller must hold xprt->queue_lock
+ */
+static void
+xprt_request_dequeue_transmit_locked(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+
+       if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+               return;
+       if (!list_empty(&req->rq_xmit)) {
+               list_del(&req->rq_xmit);
+               if (!list_empty(&req->rq_xmit2)) {
+                       struct rpc_rqst *next = list_first_entry(&req->rq_xmit2,
+                                       struct rpc_rqst, rq_xmit2);
+                       list_del(&req->rq_xmit2);
+                       list_add_tail(&next->rq_xmit, &next->rq_xprt->xmit_queue);
+               }
+       } else
+               list_del(&req->rq_xmit2);
+}
+
+/**
+ * xprt_request_dequeue_transmit - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ */
+static void
+xprt_request_dequeue_transmit(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+       struct rpc_xprt *xprt = req->rq_xprt;
+
+       spin_lock(&xprt->queue_lock);
+       xprt_request_dequeue_transmit_locked(task);
+       spin_unlock(&xprt->queue_lock);
+}
+
+/**
+ * xprt_request_prepare - prepare an encoded request for transport
+ * @req: pointer to rpc_rqst
+ *
+ * Calls into the transport layer to do whatever is needed to prepare
+ * the request for transmission or receive.
+ */
+void
+xprt_request_prepare(struct rpc_rqst *req)
+{
+       struct rpc_xprt *xprt = req->rq_xprt;
+
+       if (xprt->ops->prepare_request)
+               xprt->ops->prepare_request(req);
+}
+
+/**
+ * xprt_request_need_retransmit - Test if a task needs retransmission
+ * @task: pointer to rpc_task
+ *
+ * Test for whether a connection breakage requires the task to retransmit
+ */
+bool
+xprt_request_need_retransmit(struct rpc_task *task)
+{
+       return xprt_request_retransmit_after_disconnect(task);
+}
+
  /**
   * xprt_prepare_transmit - reserve the transport before sending a request
   * @task: RPC task about to send a request
@@ -965,32 +1294,18 @@ bool xprt_prepare_transmit(struct rpc_task *task)
  {
         struct rpc_rqst *req = task->tk_rqstp;
         struct rpc_xprt *xprt = req->rq_xprt;
-       bool ret = false;
  
         dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid);
  
-       spin_lock_bh(&xprt->transport_lock);
-       if (!req->rq_bytes_sent) {
-               if (req->rq_reply_bytes_recvd) {
-                       task->tk_status = req->rq_reply_bytes_recvd;
-                       goto out_unlock;
-               }
-               if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
-                   && xprt_connected(xprt)
-                   && req->rq_connect_cookie == xprt->connect_cookie) {
-                       xprt->ops->set_retrans_timeout(task);
-                       rpc_sleep_on(&xprt->pending, task, xprt_timer);
-                       goto out_unlock;
-               }
-       }
-       if (!xprt->ops->reserve_xprt(xprt, task)) {
-               task->tk_status = -EAGAIN;
-               goto out_unlock;
+       if (!xprt_lock_write(xprt, task)) {
+               /* Race breaker: someone may have transmitted us */
+               if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+                       rpc_wake_up_queued_task_set_status(&xprt->sending,
+                                       task, 0);
+               return false;
+
         }
-       ret = true;
-out_unlock:
-       spin_unlock_bh(&xprt->transport_lock);
-       return ret;
+       return true;
  }
  
  void xprt_end_transmit(struct rpc_task *task)
@@ -999,54 +1314,62 @@ void xprt_end_transmit(struct rpc_task *task)
  }
  
  /**
- * xprt_transmit - send an RPC request on a transport
- * @task: controlling RPC task
+ * xprt_request_transmit - send an RPC request on a transport
+ * @req: pointer to request to transmit
+ * @snd_task: RPC task that owns the transport lock
   *
- * We have to copy the iovec because sendmsg fiddles with its contents.
+ * This performs the transmission of a single request.
+ * Note that if the request is not the same as snd_task, then it
+ * does need to be pinned.
+ * Returns '0' on success.
   */
-void xprt_transmit(struct rpc_task *task)
+static int
+xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
  {
-       struct rpc_rqst *req = task->tk_rqstp;
-       struct rpc_xprt *xprt = req->rq_xprt;
+       struct rpc_xprt *xprt = req->rq_xprt;
+       struct rpc_task *task = req->rq_task;
         unsigned int connect_cookie;
+       int is_retrans = RPC_WAS_SENT(task);
         int status;
  
         dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
  
-       if (!req->rq_reply_bytes_recvd) {
-               if (list_empty(&req->rq_list) && rpc_reply_expected(task)) {
-                       /*
-                        * Add to the list only if we're expecting a reply
-                        */
-                       /* Update the softirq receive buffer */
-                       memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
-                                       sizeof(req->rq_private_buf));
-                       /* Add request to the receive list */
-                       spin_lock(&xprt->recv_lock);
-                       list_add_tail(&req->rq_list, &xprt->recv);
-                       spin_unlock(&xprt->recv_lock);
-                       xprt_reset_majortimeo(req);
-                       /* Turn off autodisconnect */
-                       del_singleshot_timer_sync(&xprt->timer);
+       if (!req->rq_bytes_sent) {
+               if (xprt_request_data_received(task)) {
+                       status = 0;
+                       goto out_dequeue;
                 }
-       } else if (!req->rq_bytes_sent)
-               return;
+               /* Verify that our message lies in the RPCSEC_GSS window */
+               if (rpcauth_xmit_need_reencode(task)) {
+                       status = -EBADMSG;
+                       goto out_dequeue;
+               }
+       }
+
+       /*
+        * Update req->rq_ntrans before transmitting to avoid races with
+        * xprt_update_rtt(), which needs to know that it is recording a
+        * reply to the first transmission.
+        */
+       req->rq_ntrans++;
  
         connect_cookie = xprt->connect_cookie;
-       status = xprt->ops->send_request(task);
+       status = xprt->ops->send_request(req);
         trace_xprt_transmit(xprt, req->rq_xid, status);
         if (status != 0) {
-               task->tk_status = status;
-               return;
+               req->rq_ntrans--;
+               return status;
         }
+
+       if (is_retrans)
+               task->tk_client->cl_stats->rpcretrans++;
+
         xprt_inject_disconnect(xprt);
  
         dprintk("RPC: %5u xmit complete\n", task->tk_pid);
         task->tk_flags |= RPC_TASK_SENT;
         spin_lock_bh(&xprt->transport_lock);
  
-       xprt->ops->set_retrans_timeout(task);
-
         xprt->stat.sends++;
         xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
         xprt->stat.bklog_u += xprt->backlog.qlen;
@@ -1055,25 +1378,49 @@ void xprt_transmit(struct rpc_task *task)
         spin_unlock_bh(&xprt->transport_lock);
  
         req->rq_connect_cookie = connect_cookie;
-       if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
-               /*
-                * Sleep on the pending queue if we're expecting a reply.
-                * The spinlock ensures atomicity between the test of
-                * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
-                */
-               spin_lock(&xprt->recv_lock);
-               if (!req->rq_reply_bytes_recvd) {
-                       rpc_sleep_on(&xprt->pending, task, xprt_timer);
-                       /*
-                        * Send an extra queue wakeup call if the
-                        * connection was dropped in case the call to
-                        * rpc_sleep_on() raced.
-                        */
-                       if (!xprt_connected(xprt))
-                               xprt_wake_pending_tasks(xprt, -ENOTCONN);
-               }
-               spin_unlock(&xprt->recv_lock);
+out_dequeue:
+       xprt_request_dequeue_transmit(task);
+       rpc_wake_up_queued_task_set_status(&xprt->sending, task, status);
+       return status;
+}
+
+/**
+ * xprt_transmit - send an RPC request on a transport
+ * @task: controlling RPC task
+ *
+ * Attempts to drain the transmit queue. On exit, either the transport
+ * signalled an error that needs to be handled before transmission can
+ * resume, or @task finished transmitting, and detected that it already
+ * received a reply.
+ */
+void
+xprt_transmit(struct rpc_task *task)
+{
+       struct rpc_rqst *next, *req = task->tk_rqstp;
+       struct rpc_xprt *xprt = req->rq_xprt;
+       int status;
+
+       spin_lock(&xprt->queue_lock);
+       while (!list_empty(&xprt->xmit_queue)) {
+               next = list_first_entry(&xprt->xmit_queue,
+                               struct rpc_rqst, rq_xmit);
+               xprt_pin_rqst(next);
+               spin_unlock(&xprt->queue_lock);
+               status = xprt_request_transmit(next, task);
+               if (status == -EBADMSG && next != req)
+                       status = 0;
+               cond_resched();
+               spin_lock(&xprt->queue_lock);
+               xprt_unpin_rqst(next);
+               if (status == 0) {
+                       if (!xprt_request_data_received(task) ||
+                           test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+                               continue;
+               } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+                       task->tk_status = status;
+               break;
         }
+       spin_unlock(&xprt->queue_lock);
  }
  
  static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
@@ -1170,20 +1517,6 @@ out_init_req:
  }
  EXPORT_SYMBOL_GPL(xprt_alloc_slot);
  
-void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
-{
-       /* Note: grabbing the xprt_lock_write() ensures that we throttle
-        * new slot allocation if the transport is congested (i.e. when
-        * reconnecting a stream transport or when out of socket write
-        * buffer space).
-        */
-       if (xprt_lock_write(xprt, task)) {
-               xprt_alloc_slot(xprt, task);
-               xprt_release_write(xprt, task);
-       }
-}
-EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
-
  void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
  {
         spin_lock(&xprt->reserve_lock);
@@ -1250,6 +1583,60 @@ void xprt_free(struct rpc_xprt *xprt)
  }
  EXPORT_SYMBOL_GPL(xprt_free);
  
+static void
+xprt_init_connect_cookie(struct rpc_rqst *req, struct rpc_xprt *xprt)
+{
+       req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1;
+}
+
+static __be32
+xprt_alloc_xid(struct rpc_xprt *xprt)
+{
+       __be32 xid;
+
+       spin_lock(&xprt->reserve_lock);
+       xid = (__force __be32)xprt->xid++;
+       spin_unlock(&xprt->reserve_lock);
+       return xid;
+}
+
+static void
+xprt_init_xid(struct rpc_xprt *xprt)
+{
+       xprt->xid = prandom_u32();
+}
+
+static void
+xprt_request_init(struct rpc_task *task)
+{
+       struct rpc_xprt *xprt = task->tk_xprt;
+       struct rpc_rqst *req = task->tk_rqstp;
+
+       req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+       req->rq_task    = task;
+       req->rq_xprt    = xprt;
+       req->rq_buffer  = NULL;
+       req->rq_xid     = xprt_alloc_xid(xprt);
+       xprt_init_connect_cookie(req, xprt);
+       req->rq_bytes_sent = 0;
+       req->rq_snd_buf.len = 0;
+       req->rq_snd_buf.buflen = 0;
+       req->rq_rcv_buf.len = 0;
+       req->rq_rcv_buf.buflen = 0;
+       req->rq_release_snd_buf = NULL;
+       xprt_reset_majortimeo(req);
+       dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
+                       req, ntohl(req->rq_xid));
+}
+
+static void
+xprt_do_reserve(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+       xprt->ops->alloc_slot(xprt, task);
+       if (task->tk_rqstp != NULL)
+               xprt_request_init(task);
+}
+
  /**
   * xprt_reserve - allocate an RPC request slot
   * @task: RPC task requesting a slot allocation
@@ -1269,7 +1656,7 @@ void xprt_reserve(struct rpc_task *task)
         task->tk_timeout = 0;
         task->tk_status = -EAGAIN;
         if (!xprt_throttle_congested(xprt, task))
-               xprt->ops->alloc_slot(xprt, task);
+               xprt_do_reserve(xprt, task);
  }
  
  /**
@@ -1291,45 +1678,29 @@ void xprt_retry_reserve(struct rpc_task *task)
  
         task->tk_timeout = 0;
         task->tk_status = -EAGAIN;
-       xprt->ops->alloc_slot(xprt, task);
-}
-
-static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
-{
-       __be32 xid;
-
-       spin_lock(&xprt->reserve_lock);
-       xid = (__force __be32)xprt->xid++;
-       spin_unlock(&xprt->reserve_lock);
-       return xid;
+       xprt_do_reserve(xprt, task);
  }
  
-static inline void xprt_init_xid(struct rpc_xprt *xprt)
-{
-       xprt->xid = prandom_u32();
-}
-
-void xprt_request_init(struct rpc_task *task)
+static void
+xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req)
  {
-       struct rpc_xprt *xprt = task->tk_xprt;
-       struct rpc_rqst *req = task->tk_rqstp;
+       struct rpc_xprt *xprt = req->rq_xprt;
  
-       INIT_LIST_HEAD(&req->rq_list);
-       req->rq_timeout = task->tk_client->cl_timeout->to_initval;
-       req->rq_task    = task;
-       req->rq_xprt    = xprt;
-       req->rq_buffer  = NULL;
-       req->rq_xid     = xprt_alloc_xid(xprt);
-       req->rq_connect_cookie = xprt->connect_cookie - 1;
-       req->rq_bytes_sent = 0;
-       req->rq_snd_buf.len = 0;
-       req->rq_snd_buf.buflen = 0;
-       req->rq_rcv_buf.len = 0;
-       req->rq_rcv_buf.buflen = 0;
-       req->rq_release_snd_buf = NULL;
-       xprt_reset_majortimeo(req);
-       dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
-                       req, ntohl(req->rq_xid));
+       if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
+           test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
+           xprt_is_pinned_rqst(req)) {
+               spin_lock(&xprt->queue_lock);
+               xprt_request_dequeue_transmit_locked(task);
+               xprt_request_dequeue_receive_locked(task);
+               while (xprt_is_pinned_rqst(req)) {
+                       set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+                       spin_unlock(&xprt->queue_lock);
+                       xprt_wait_on_pinned_rqst(req);
+                       spin_lock(&xprt->queue_lock);
+                       clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+               }
+               spin_unlock(&xprt->queue_lock);
+       }
  }
  
  /**
@@ -1345,8 +1716,7 @@ void xprt_release(struct rpc_task *task)
         if (req == NULL) {
                 if (task->tk_client) {
                         xprt = task->tk_xprt;
-                       if (xprt->snd_task == task)
-                               xprt_release_write(xprt, task);
+                       xprt_release_write(xprt, task);
                 }
                 return;
         }
@@ -1356,12 +1726,7 @@ void xprt_release(struct rpc_task *task)
                 task->tk_ops->rpc_count_stats(task, task->tk_calldata);
         else if (task->tk_client)
                 rpc_count_iostats(task, task->tk_client->cl_metrics);
-       spin_lock(&xprt->recv_lock);
-       if (!list_empty(&req->rq_list)) {
-               list_del_init(&req->rq_list);
-               xprt_wait_on_pinned_rqst(req);
-       }
-       spin_unlock(&xprt->recv_lock);
+       xprt_request_dequeue_all(task, req);
         spin_lock_bh(&xprt->transport_lock);
         xprt->ops->release_xprt(xprt, task);
         if (xprt->ops->release_request)
@@ -1372,6 +1737,7 @@ void xprt_release(struct rpc_task *task)
         if (req->rq_buffer)
                 xprt->ops->buf_free(task);
         xprt_inject_disconnect(xprt);
+       xdr_free_bvec(&req->rq_rcv_buf);
         if (req->rq_cred != NULL)
                 put_rpccred(req->rq_cred);
         task->tk_rqstp = NULL;
@@ -1385,16 +1751,36 @@ void xprt_release(struct rpc_task *task)
                 xprt_free_bc_request(req);
  }
  
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+void
+xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
+{
+       struct xdr_buf *xbufp = &req->rq_snd_buf;
+
+       task->tk_rqstp = req;
+       req->rq_task = task;
+       xprt_init_connect_cookie(req, req->rq_xprt);
+       /*
+        * Set up the xdr_buf length.
+        * This also indicates that the buffer is XDR encoded already.
+        */
+       xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
+               xbufp->tail[0].iov_len;
+       req->rq_bytes_sent = 0;
+}
+#endif
+
  static void xprt_init(struct rpc_xprt *xprt, struct net *net)
  {
         kref_init(&xprt->kref);
  
         spin_lock_init(&xprt->transport_lock);
         spin_lock_init(&xprt->reserve_lock);
-       spin_lock_init(&xprt->recv_lock);
+       spin_lock_init(&xprt->queue_lock);
  
         INIT_LIST_HEAD(&xprt->free);
-       INIT_LIST_HEAD(&xprt->recv);
+       xprt->recv_queue = RB_ROOT;
+       INIT_LIST_HEAD(&xprt->xmit_queue);
  #if defined(CONFIG_SUNRPC_BACKCHANNEL)
         spin_lock_init(&xprt->bc_pa_lock);
         INIT_LIST_HEAD(&xprt->bc_pa_list);
@@ -1407,7 +1793,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
  
         rpc_init_wait_queue(&xprt->binding, "xprt_binding");
         rpc_init_wait_queue(&xprt->pending, "xprt_pending");
-       rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
+       rpc_init_wait_queue(&xprt->sending, "xprt_sending");
         rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
  
         xprt_init_xid(xprt);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c

index 90adeff..e5b367a 100644 (file)
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -51,12 +51,11 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
                 rqst = &req->rl_slot;
  
                 rqst->rq_xprt = xprt;
-               INIT_LIST_HEAD(&rqst->rq_list);
                 INIT_LIST_HEAD(&rqst->rq_bc_list);
                 __set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
-               spin_lock_bh(&xprt->bc_pa_lock);
+               spin_lock(&xprt->bc_pa_lock);
                 list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
-               spin_unlock_bh(&xprt->bc_pa_lock);
+               spin_unlock(&xprt->bc_pa_lock);
  
                 size = r_xprt->rx_data.inline_rsize;
                 rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
@@ -201,6 +200,9 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
         if (!xprt_connected(rqst->rq_xprt))
                 goto drop_connection;
  
+       if (!xprt_request_get_cong(rqst->rq_xprt, rqst))
+               return -EBADSLT;
+
         rc = rpcrdma_bc_marshal_reply(rqst);
         if (rc < 0)
                 goto failed_marshal;
@@ -228,16 +230,16 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
         struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
         struct rpc_rqst *rqst, *tmp;
  
-       spin_lock_bh(&xprt->bc_pa_lock);
+       spin_lock(&xprt->bc_pa_lock);
         list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
                 list_del(&rqst->rq_bc_pa_list);
-               spin_unlock_bh(&xprt->bc_pa_lock);
+               spin_unlock(&xprt->bc_pa_lock);
  
                 rpcrdma_bc_free_rqst(r_xprt, rqst);
  
-               spin_lock_bh(&xprt->bc_pa_lock);
+               spin_lock(&xprt->bc_pa_lock);
         }
-       spin_unlock_bh(&xprt->bc_pa_lock);
+       spin_unlock(&xprt->bc_pa_lock);
  }
  
  /**
@@ -255,9 +257,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
         rpcrdma_recv_buffer_put(req->rl_reply);
         req->rl_reply = NULL;
  
-       spin_lock_bh(&xprt->bc_pa_lock);
+       spin_lock(&xprt->bc_pa_lock);
         list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
-       spin_unlock_bh(&xprt->bc_pa_lock);
+       spin_unlock(&xprt->bc_pa_lock);
  }
  
  /**
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c

index 0f7c465..7f5632c 100644 (file)
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -49,46 +49,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
         return true;
  }
  
-static int
-fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
-{
-       static struct ib_fmr_attr fmr_attr = {
-               .max_pages      = RPCRDMA_MAX_FMR_SGES,
-               .max_maps       = 1,
-               .page_shift     = PAGE_SHIFT
-       };
-
-       mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
-                                      sizeof(u64), GFP_KERNEL);
-       if (!mr->fmr.fm_physaddrs)
-               goto out_free;
-
-       mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
-                           sizeof(*mr->mr_sg), GFP_KERNEL);
-       if (!mr->mr_sg)
-               goto out_free;
-
-       sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
-
-       mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
-                                    &fmr_attr);
-       if (IS_ERR(mr->fmr.fm_mr))
-               goto out_fmr_err;
-
-       INIT_LIST_HEAD(&mr->mr_list);
-       return 0;
-
-out_fmr_err:
-       dprintk("RPC:       %s: ib_alloc_fmr returned %ld\n", __func__,
-               PTR_ERR(mr->fmr.fm_mr));
-
-out_free:
-       kfree(mr->mr_sg);
-       kfree(mr->fmr.fm_physaddrs);
-       return -ENOMEM;
-}
-
-static int
+static void
  __fmr_unmap(struct rpcrdma_mr *mr)
  {
         LIST_HEAD(l);
@@ -97,13 +58,16 @@ __fmr_unmap(struct rpcrdma_mr *mr)
         list_add(&mr->fmr.fm_mr->list, &l);
         rc = ib_unmap_fmr(&l);
         list_del(&mr->fmr.fm_mr->list);
-       return rc;
+       if (rc)
+               pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
+                      mr, rc);
  }
  
+/* Release an MR.
+ */
  static void
  fmr_op_release_mr(struct rpcrdma_mr *mr)
  {
-       LIST_HEAD(unmap_list);
         int rc;
  
         kfree(mr->fmr.fm_physaddrs);
@@ -112,10 +76,7 @@ fmr_op_release_mr(struct rpcrdma_mr *mr)
         /* In case this one was left mapped, try to unmap it
          * to prevent dealloc_fmr from failing with EBUSY
          */
-       rc = __fmr_unmap(mr);
-       if (rc)
-               pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
-                      mr, rc);
+       __fmr_unmap(mr);
  
         rc = ib_dealloc_fmr(mr->fmr.fm_mr);
         if (rc)
@@ -125,40 +86,68 @@ fmr_op_release_mr(struct rpcrdma_mr *mr)
         kfree(mr);
  }
  
-/* Reset of a single FMR.
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
   */
  static void
-fmr_op_recover_mr(struct rpcrdma_mr *mr)
+fmr_mr_recycle_worker(struct work_struct *work)
  {
+       struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
         struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-       int rc;
  
-       /* ORDER: invalidate first */
-       rc = __fmr_unmap(mr);
-       if (rc)
-               goto out_release;
-
-       /* ORDER: then DMA unmap */
-       rpcrdma_mr_unmap_and_put(mr);
+       trace_xprtrdma_mr_recycle(mr);
  
-       r_xprt->rx_stats.mrs_recovered++;
-       return;
-
-out_release:
-       pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
-       r_xprt->rx_stats.mrs_orphaned++;
-
-       trace_xprtrdma_dma_unmap(mr);
+       trace_xprtrdma_mr_unmap(mr);
         ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
                         mr->mr_sg, mr->mr_nents, mr->mr_dir);
  
         spin_lock(&r_xprt->rx_buf.rb_mrlock);
         list_del(&mr->mr_all);
+       r_xprt->rx_stats.mrs_recycled++;
         spin_unlock(&r_xprt->rx_buf.rb_mrlock);
-
         fmr_op_release_mr(mr);
  }
  
+static int
+fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
+{
+       static struct ib_fmr_attr fmr_attr = {
+               .max_pages      = RPCRDMA_MAX_FMR_SGES,
+               .max_maps       = 1,
+               .page_shift     = PAGE_SHIFT
+       };
+
+       mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
+                                      sizeof(u64), GFP_KERNEL);
+       if (!mr->fmr.fm_physaddrs)
+               goto out_free;
+
+       mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
+                           sizeof(*mr->mr_sg), GFP_KERNEL);
+       if (!mr->mr_sg)
+               goto out_free;
+
+       sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
+
+       mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
+                                    &fmr_attr);
+       if (IS_ERR(mr->fmr.fm_mr))
+               goto out_fmr_err;
+
+       INIT_LIST_HEAD(&mr->mr_list);
+       INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker);
+       return 0;
+
+out_fmr_err:
+       dprintk("RPC:       %s: ib_alloc_fmr returned %ld\n", __func__,
+               PTR_ERR(mr->fmr.fm_mr));
+
+out_free:
+       kfree(mr->mr_sg);
+       kfree(mr->fmr.fm_physaddrs);
+       return -ENOMEM;
+}
+
  /* On success, sets:
   *     ep->rep_attr.cap.max_send_wr
   *     ep->rep_attr.cap.max_recv_wr
@@ -187,6 +176,7 @@ fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
  
         ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
                                 RPCRDMA_MAX_FMR_SGES);
+       ia->ri_max_segs += 2;   /* segments for head and tail buffers */
         return 0;
  }
  
@@ -244,7 +234,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
                                      mr->mr_sg, i, mr->mr_dir);
         if (!mr->mr_nents)
                 goto out_dmamap_err;
-       trace_xprtrdma_dma_map(mr);
+       trace_xprtrdma_mr_map(mr);
  
         for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
                 dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
@@ -305,13 +295,13 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
         list_for_each_entry(mr, mrs, mr_list) {
                 dprintk("RPC:       %s: unmapping fmr %p\n",
                         __func__, &mr->fmr);
-               trace_xprtrdma_localinv(mr);
+               trace_xprtrdma_mr_localinv(mr);
                 list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
         }
         r_xprt->rx_stats.local_inv_needed++;
         rc = ib_unmap_fmr(&unmap_list);
         if (rc)
-               goto out_reset;
+               goto out_release;
  
         /* ORDER: Now DMA unmap all of the req's MRs, and return
          * them to the free MW list.
@@ -324,13 +314,13 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
  
         return;
  
-out_reset:
+out_release:
         pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
  
         while (!list_empty(mrs)) {
                 mr = rpcrdma_mr_pop(mrs);
                 list_del(&mr->fmr.fm_mr->list);
-               fmr_op_recover_mr(mr);
+               rpcrdma_mr_recycle(mr);
         }
  }
  
@@ -338,7 +328,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
         .ro_map                         = fmr_op_map,
         .ro_send                        = fmr_op_send,
         .ro_unmap_sync                  = fmr_op_unmap_sync,
-       .ro_recover_mr                  = fmr_op_recover_mr,
         .ro_open                        = fmr_op_open,
         .ro_maxpages                    = fmr_op_maxpages,
         .ro_init_mr                     = fmr_op_init_mr,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c

index 1bb00dd..fc6378c 100644 (file)
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -97,6 +97,44 @@ out_not_supported:
         return false;
  }
  
+static void
+frwr_op_release_mr(struct rpcrdma_mr *mr)
+{
+       int rc;
+
+       rc = ib_dereg_mr(mr->frwr.fr_mr);
+       if (rc)
+               pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
+                      mr, rc);
+       kfree(mr->mr_sg);
+       kfree(mr);
+}
+
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
+ */
+static void
+frwr_mr_recycle_worker(struct work_struct *work)
+{
+       struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
+       enum rpcrdma_frwr_state state = mr->frwr.fr_state;
+       struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
+
+       trace_xprtrdma_mr_recycle(mr);
+
+       if (state != FRWR_FLUSHED_LI) {
+               trace_xprtrdma_mr_unmap(mr);
+               ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+                               mr->mr_sg, mr->mr_nents, mr->mr_dir);
+       }
+
+       spin_lock(&r_xprt->rx_buf.rb_mrlock);
+       list_del(&mr->mr_all);
+       r_xprt->rx_stats.mrs_recycled++;
+       spin_unlock(&r_xprt->rx_buf.rb_mrlock);
+       frwr_op_release_mr(mr);
+}
+
  static int
  frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
  {
@@ -113,6 +151,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
                 goto out_list_err;
  
         INIT_LIST_HEAD(&mr->mr_list);
+       INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
         sg_init_table(mr->mr_sg, depth);
         init_completion(&frwr->fr_linv_done);
         return 0;
@@ -131,79 +170,6 @@ out_list_err:
         return rc;
  }
  
-static void
-frwr_op_release_mr(struct rpcrdma_mr *mr)
-{
-       int rc;
-
-       rc = ib_dereg_mr(mr->frwr.fr_mr);
-       if (rc)
-               pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
-                      mr, rc);
-       kfree(mr->mr_sg);
-       kfree(mr);
-}
-
-static int
-__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
-{
-       struct rpcrdma_frwr *frwr = &mr->frwr;
-       int rc;
-
-       rc = ib_dereg_mr(frwr->fr_mr);
-       if (rc) {
-               pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
-                       rc, mr);
-               return rc;
-       }
-
-       frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
-                                 ia->ri_max_frwr_depth);
-       if (IS_ERR(frwr->fr_mr)) {
-               pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
-                       PTR_ERR(frwr->fr_mr), mr);
-               return PTR_ERR(frwr->fr_mr);
-       }
-
-       dprintk("RPC:       %s: recovered FRWR %p\n", __func__, frwr);
-       frwr->fr_state = FRWR_IS_INVALID;
-       return 0;
-}
-
-/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
- */
-static void
-frwr_op_recover_mr(struct rpcrdma_mr *mr)
-{
-       enum rpcrdma_frwr_state state = mr->frwr.fr_state;
-       struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       int rc;
-
-       rc = __frwr_mr_reset(ia, mr);
-       if (state != FRWR_FLUSHED_LI) {
-               trace_xprtrdma_dma_unmap(mr);
-               ib_dma_unmap_sg(ia->ri_device,
-                               mr->mr_sg, mr->mr_nents, mr->mr_dir);
-       }
-       if (rc)
-               goto out_release;
-
-       rpcrdma_mr_put(mr);
-       r_xprt->rx_stats.mrs_recovered++;
-       return;
-
-out_release:
-       pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
-       r_xprt->rx_stats.mrs_orphaned++;
-
-       spin_lock(&r_xprt->rx_buf.rb_mrlock);
-       list_del(&mr->mr_all);
-       spin_unlock(&r_xprt->rx_buf.rb_mrlock);
-
-       frwr_op_release_mr(mr);
-}
-
  /* On success, sets:
   *     ep->rep_attr.cap.max_send_wr
   *     ep->rep_attr.cap.max_recv_wr
@@ -276,6 +242,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
  
         ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
                                 ia->ri_max_frwr_depth);
+       ia->ri_max_segs += 2;   /* segments for head and tail buffers */
         return 0;
  }
  
@@ -384,7 +351,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
         mr = NULL;
         do {
                 if (mr)
-                       rpcrdma_mr_defer_recovery(mr);
+                       rpcrdma_mr_recycle(mr);
                 mr = rpcrdma_mr_get(r_xprt);
                 if (!mr)
                         return ERR_PTR(-EAGAIN);
@@ -417,7 +384,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
         mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
         if (!mr->mr_nents)
                 goto out_dmamap_err;
-       trace_xprtrdma_dma_map(mr);
+       trace_xprtrdma_mr_map(mr);
  
         ibmr = frwr->fr_mr;
         n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
@@ -451,7 +418,7 @@ out_dmamap_err:
  out_mapmr_err:
         pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
                frwr->fr_mr, n, mr->mr_nents);
-       rpcrdma_mr_defer_recovery(mr);
+       rpcrdma_mr_recycle(mr);
         return ERR_PTR(-EIO);
  }
  
@@ -499,7 +466,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
         list_for_each_entry(mr, mrs, mr_list)
                 if (mr->mr_handle == rep->rr_inv_rkey) {
                         list_del_init(&mr->mr_list);
-                       trace_xprtrdma_remoteinv(mr);
+                       trace_xprtrdma_mr_remoteinv(mr);
                         mr->frwr.fr_state = FRWR_IS_INVALID;
                         rpcrdma_mr_unmap_and_put(mr);
                         break;  /* only one invalidated MR per RPC */
@@ -536,7 +503,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
                 mr->frwr.fr_state = FRWR_IS_INVALID;
  
                 frwr = &mr->frwr;
-               trace_xprtrdma_localinv(mr);
+               trace_xprtrdma_mr_localinv(mr);
  
                 frwr->fr_cqe.done = frwr_wc_localinv;
                 last = &frwr->fr_invwr;
@@ -570,7 +537,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
         if (bad_wr != first)
                 wait_for_completion(&frwr->fr_linv_done);
         if (rc)
-               goto reset_mrs;
+               goto out_release;
  
         /* ORDER: Now DMA unmap all of the MRs, and return
          * them to the free MR list.
@@ -582,22 +549,21 @@ unmap:
         }
         return;
  
-reset_mrs:
+out_release:
         pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
  
-       /* Find and reset the MRs in the LOCAL_INV WRs that did not
+       /* Unmap and release the MRs in the LOCAL_INV WRs that did not
          * get posted.
          */
         while (bad_wr) {
                 frwr = container_of(bad_wr, struct rpcrdma_frwr,
                                     fr_invwr);
                 mr = container_of(frwr, struct rpcrdma_mr, frwr);
-
-               __frwr_mr_reset(ia, mr);
-
                 bad_wr = bad_wr->next;
+
+               list_del(&mr->mr_list);
+               frwr_op_release_mr(mr);
         }
-       goto unmap;
  }
  
  const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
@@ -605,7 +571,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
         .ro_send                        = frwr_op_send,
         .ro_reminv                      = frwr_op_reminv,
         .ro_unmap_sync                  = frwr_op_unmap_sync,
-       .ro_recover_mr                  = frwr_op_recover_mr,
         .ro_open                        = frwr_op_open,
         .ro_maxpages                    = frwr_op_maxpages,
         .ro_init_mr                     = frwr_op_init_mr,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c

index c8ae983..9f53e02 100644 (file)
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -71,7 +71,6 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
         size = RPCRDMA_HDRLEN_MIN;
  
         /* Maximum Read list size */
-       maxsegs += 2;   /* segment for head and tail buffers */
         size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
  
         /* Minimal Read chunk size */
@@ -97,7 +96,6 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
         size = RPCRDMA_HDRLEN_MIN;
  
         /* Maximum Write list size */
-       maxsegs += 2;   /* segment for head and tail buffers */
         size = sizeof(__be32);          /* segment count */
         size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
         size += sizeof(__be32); /* list discriminator */
@@ -805,7 +803,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
                 struct rpcrdma_mr *mr;
  
                 mr = rpcrdma_mr_pop(&req->rl_registered);
-               rpcrdma_mr_defer_recovery(mr);
+               rpcrdma_mr_recycle(mr);
         }
  
         /* This implementation supports the following combinations
@@ -866,7 +864,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
  out_err:
         switch (ret) {
         case -EAGAIN:
-               xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+               xprt_wait_for_buffer_space(rqst->rq_xprt);
                 break;
         case -ENOBUFS:
                 break;
@@ -1216,7 +1214,6 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
         struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
         struct rpc_xprt *xprt = &r_xprt->rx_xprt;
         struct rpc_rqst *rqst = rep->rr_rqst;
-       unsigned long cwnd;
         int status;
  
         xprt->reestablish_timeout = 0;
@@ -1238,15 +1235,10 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
                 goto out_badheader;
  
  out:
-       spin_lock(&xprt->recv_lock);
-       cwnd = xprt->cwnd;
-       xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
-       if (xprt->cwnd > cwnd)
-               xprt_release_rqst_cong(rqst->rq_task);
-
+       spin_lock(&xprt->queue_lock);
         xprt_complete_rqst(rqst->rq_task, status);
         xprt_unpin_rqst(rqst);
-       spin_unlock(&xprt->recv_lock);
+       spin_unlock(&xprt->queue_lock);
         return;
  
  /* If the incoming reply terminated a pending RPC, the next
@@ -1345,19 +1337,23 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
         /* Match incoming rpcrdma_rep to an rpcrdma_req to
          * get context for handling any incoming chunks.
          */
-       spin_lock(&xprt->recv_lock);
+       spin_lock(&xprt->queue_lock);
         rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
         if (!rqst)
                 goto out_norqst;
         xprt_pin_rqst(rqst);
+       spin_unlock(&xprt->queue_lock);
  
         if (credits == 0)
                 credits = 1;    /* don't deadlock */
         else if (credits > buf->rb_max_requests)
                 credits = buf->rb_max_requests;
-       buf->rb_credits = credits;
-
-       spin_unlock(&xprt->recv_lock);
+       if (buf->rb_credits != credits) {
+               spin_lock_bh(&xprt->transport_lock);
+               buf->rb_credits = credits;
+               xprt->cwnd = credits << RPC_CWNDSHIFT;
+               spin_unlock_bh(&xprt->transport_lock);
+       }
  
         req = rpcr_to_rdmar(rqst);
         req->rl_reply = rep;
@@ -1378,7 +1374,7 @@ out_badversion:
   * is corrupt.
   */
  out_norqst:
-       spin_unlock(&xprt->recv_lock);
+       spin_unlock(&xprt->queue_lock);
         trace_xprtrdma_reply_rqst(rep);
         goto repost;
  
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c

index a681800..d3a1a23 100644 (file)
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -56,7 +56,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
         if (src->iov_len < 24)
                 goto out_shortreply;
  
-       spin_lock(&xprt->recv_lock);
+       spin_lock(&xprt->queue_lock);
         req = xprt_lookup_rqst(xprt, xid);
         if (!req)
                 goto out_notfound;
@@ -86,7 +86,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
         rcvbuf->len = 0;
  
  out_unlock:
-       spin_unlock(&xprt->recv_lock);
+       spin_unlock(&xprt->queue_lock);
  out:
         return ret;
  
@@ -215,9 +215,8 @@ drop_connection:
   * connection.
   */
  static int
-xprt_rdma_bc_send_request(struct rpc_task *task)
+xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
  {
-       struct rpc_rqst *rqst = task->tk_rqstp;
         struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
         struct svcxprt_rdma *rdma;
         int ret;
@@ -225,12 +224,7 @@ xprt_rdma_bc_send_request(struct rpc_task *task)
         dprintk("svcrdma: sending bc call with xid: %08x\n",
                 be32_to_cpu(rqst->rq_xid));
  
-       if (!mutex_trylock(&sxprt->xpt_mutex)) {
-               rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
-               if (!mutex_trylock(&sxprt->xpt_mutex))
-                       return -EAGAIN;
-               rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
-       }
+       mutex_lock(&sxprt->xpt_mutex);
  
         ret = -ENOTCONN;
         rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
@@ -248,6 +242,7 @@ static void
  xprt_rdma_bc_close(struct rpc_xprt *xprt)
  {
         dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+       xprt->cwnd = RPC_CWNDSHIFT;
  }
  
  static void
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c

index 143ce25..ae2a838 100644 (file)
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -225,69 +225,59 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt)
                 }
  }
  
-void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
-{
-       schedule_delayed_work(&ep->rep_connect_worker, 0);
-}
-
-void
-rpcrdma_connect_worker(struct work_struct *work)
-{
-       struct rpcrdma_ep *ep =
-               container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
-       struct rpcrdma_xprt *r_xprt =
-               container_of(ep, struct rpcrdma_xprt, rx_ep);
-       struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-
-       spin_lock_bh(&xprt->transport_lock);
-       if (ep->rep_connected > 0) {
-               if (!xprt_test_and_set_connected(xprt))
-                       xprt_wake_pending_tasks(xprt, 0);
-       } else {
-               if (xprt_test_and_clear_connected(xprt))
-                       xprt_wake_pending_tasks(xprt, -ENOTCONN);
-       }
-       spin_unlock_bh(&xprt->transport_lock);
-}
-
+/**
+ * xprt_rdma_connect_worker - establish connection in the background
+ * @work: worker thread context
+ *
+ * Requester holds the xprt's send lock to prevent activity on this
+ * transport while a fresh connection is being established. RPC tasks
+ * sleep on the xprt's pending queue waiting for connect to complete.
+ */
  static void
  xprt_rdma_connect_worker(struct work_struct *work)
  {
         struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
                                                    rx_connect_worker.work);
         struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-       int rc = 0;
-
-       xprt_clear_connected(xprt);
+       int rc;
  
         rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
-       if (rc)
-               xprt_wake_pending_tasks(xprt, rc);
-
         xprt_clear_connecting(xprt);
+       if (r_xprt->rx_ep.rep_connected > 0) {
+               if (!xprt_test_and_set_connected(xprt)) {
+                       xprt->stat.connect_count++;
+                       xprt->stat.connect_time += (long)jiffies -
+                                                  xprt->stat.connect_start;
+                       xprt_wake_pending_tasks(xprt, -EAGAIN);
+               }
+       } else {
+               if (xprt_test_and_clear_connected(xprt))
+                       xprt_wake_pending_tasks(xprt, rc);
+       }
  }
  
+/**
+ * xprt_rdma_inject_disconnect - inject a connection fault
+ * @xprt: transport context
+ *
+ * If @xprt is connected, disconnect it to simulate spurious connection
+ * loss.
+ */
  static void
  xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
  {
-       struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
-                                                  rx_xprt);
+       struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
  
         trace_xprtrdma_inject_dsc(r_xprt);
         rdma_disconnect(r_xprt->rx_ia.ri_id);
  }
  
-/*
- * xprt_rdma_destroy
+/**
+ * xprt_rdma_destroy - Full tear down of transport
+ * @xprt: doomed transport context
   *
- * Destroy the xprt.
- * Free all memory associated with the object, including its own.
- * NOTE: none of the *destroy methods free memory for their top-level
- * objects, even though they may have allocated it (they do free
- * private memory). It's up to the caller to handle it. In this
- * case (RDMA transport), all structure memory is inlined with the
- * struct rpcrdma_xprt.
+ * Caller guarantees there will be no more calls to us with
+ * this @xprt.
   */
  static void
  xprt_rdma_destroy(struct rpc_xprt *xprt)
@@ -298,8 +288,6 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
  
         cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
  
-       xprt_clear_connected(xprt);
-
         rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
         rpcrdma_buffer_destroy(&r_xprt->rx_buf);
         rpcrdma_ia_close(&r_xprt->rx_ia);
@@ -442,11 +430,12 @@ out1:
  }
  
  /**
- * xprt_rdma_close - Close down RDMA connection
- * @xprt: generic transport to be closed
+ * xprt_rdma_close - close a transport connection
+ * @xprt: transport context
   *
- * Called during transport shutdown reconnect, or device
- * removal. Caller holds the transport's write lock.
+ * Called during transport shutdown, reconnect, or device removal.
+ * Caller holds @xprt's send lock to prevent activity on this
+ * transport while the connection is torn down.
   */
  static void
  xprt_rdma_close(struct rpc_xprt *xprt)
@@ -468,6 +457,12 @@ xprt_rdma_close(struct rpc_xprt *xprt)
                 xprt->reestablish_timeout = 0;
         xprt_disconnect_done(xprt);
         rpcrdma_ep_disconnect(ep, ia);
+
+       /* Prepare @xprt for the next connection by reinitializing
+        * its credit grant to one (see RFC 8166, Section 3.3.3).
+        */
+       r_xprt->rx_buf.rb_credits = 1;
+       xprt->cwnd = RPC_CWNDSHIFT;
  }
  
  /**
@@ -519,6 +514,12 @@ xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
         xprt_force_disconnect(xprt);
  }
  
+/**
+ * xprt_rdma_connect - try to establish a transport connection
+ * @xprt: transport state
+ * @task: RPC scheduler context
+ *
+ */
  static void
  xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
  {
@@ -638,13 +639,6 @@ rpcrdma_get_recvbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
   *        0:   Success; rq_buffer points to RPC buffer to use
   *   ENOMEM:   Out of memory, call again later
   *      EIO:   A permanent error occurred, do not retry
- *
- * The RDMA allocate/free functions need the task structure as a place
- * to hide the struct rpcrdma_req, which is necessary for the actual
- * send/recv sequence.
- *
- * xprt_rdma_allocate provides buffers that are already mapped for
- * DMA, and a local DMA lkey is provided for each.
   */
  static int
  xprt_rdma_allocate(struct rpc_task *task)
@@ -693,7 +687,7 @@ xprt_rdma_free(struct rpc_task *task)
  
  /**
   * xprt_rdma_send_request - marshal and send an RPC request
- * @task: RPC task with an RPC message in rq_snd_buf
+ * @rqst: RPC message in rq_snd_buf
   *
   * Caller holds the transport's write lock.
   *
@@ -706,9 +700,8 @@ xprt_rdma_free(struct rpc_task *task)
   *             sent. Do not try to send this message again.
   */
  static int
-xprt_rdma_send_request(struct rpc_task *task)
+xprt_rdma_send_request(struct rpc_rqst *rqst)
  {
-       struct rpc_rqst *rqst = task->tk_rqstp;
         struct rpc_xprt *xprt = rqst->rq_xprt;
         struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
         struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
@@ -722,6 +715,9 @@ xprt_rdma_send_request(struct rpc_task *task)
         if (!xprt_connected(xprt))
                 goto drop_connection;
  
+       if (!xprt_request_get_cong(xprt, rqst))
+               return -EBADSLT;
+
         rc = rpcrdma_marshal_req(r_xprt, rqst);
         if (rc < 0)
                 goto failed_marshal;
@@ -741,7 +737,7 @@ xprt_rdma_send_request(struct rpc_task *task)
         /* An RPC with no reply will throw off credit accounting,
          * so drop the connection to reset the credit grant.
          */
-       if (!rpc_reply_expected(task))
+       if (!rpc_reply_expected(rqst->rq_task))
                 goto drop_connection;
         return 0;
  
@@ -766,7 +762,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
                    0,   /* need a local port? */
                    xprt->stat.bind_count,
                    xprt->stat.connect_count,
-                  xprt->stat.connect_time,
+                  xprt->stat.connect_time / HZ,
                    idle_time,
                    xprt->stat.sends,
                    xprt->stat.recvs,
@@ -786,7 +782,7 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
                    r_xprt->rx_stats.bad_reply_count,
                    r_xprt->rx_stats.nomsg_call_count);
         seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
-                  r_xprt->rx_stats.mrs_recovered,
+                  r_xprt->rx_stats.mrs_recycled,
                    r_xprt->rx_stats.mrs_orphaned,
                    r_xprt->rx_stats.mrs_allocated,
                    r_xprt->rx_stats.local_inv_needed,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c

index 956a5ea..3ddba94 100644 (file)
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -108,20 +108,48 @@ rpcrdma_destroy_wq(void)
         }
  }
  
+/**
+ * rpcrdma_disconnect_worker - Force a disconnect
+ * @work: endpoint to be disconnected
+ *
+ * Provider callbacks can possibly run in an IRQ context. This function
+ * is invoked in a worker thread to guarantee that disconnect wake-up
+ * calls are always done in process context.
+ */
+static void
+rpcrdma_disconnect_worker(struct work_struct *work)
+{
+       struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep,
+                                            rep_disconnect_worker.work);
+       struct rpcrdma_xprt *r_xprt =
+               container_of(ep, struct rpcrdma_xprt, rx_ep);
+
+       xprt_force_disconnect(&r_xprt->rx_xprt);
+}
+
+/**
+ * rpcrdma_qp_event_handler - Handle one QP event (error notification)
+ * @event: details of the event
+ * @context: ep that owns QP where event occurred
+ *
+ * Called from the RDMA provider (device driver) possibly in an interrupt
+ * context.
+ */
  static void
-rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
+rpcrdma_qp_event_handler(struct ib_event *event, void *context)
  {
         struct rpcrdma_ep *ep = context;
         struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
                                                    rx_ep);
  
-       trace_xprtrdma_qp_error(r_xprt, event);
-       pr_err("rpcrdma: %s on device %s ep %p\n",
-              ib_event_msg(event->event), event->device->name, context);
+       trace_xprtrdma_qp_event(r_xprt, event);
+       pr_err("rpcrdma: %s on device %s connected to %s:%s\n",
+              ib_event_msg(event->event), event->device->name,
+              rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
  
         if (ep->rep_connected == 1) {
                 ep->rep_connected = -EIO;
-               rpcrdma_conn_func(ep);
+               schedule_delayed_work(&ep->rep_disconnect_worker, 0);
                 wake_up_all(&ep->rep_connect_wait);
         }
  }
@@ -219,38 +247,48 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
         rpcrdma_set_max_header_sizes(r_xprt);
  }
  
+/**
+ * rpcrdma_cm_event_handler - Handle RDMA CM events
+ * @id: rdma_cm_id on which an event has occurred
+ * @event: details of the event
+ *
+ * Called with @id's mutex held. Returns 1 if caller should
+ * destroy @id, otherwise 0.
+ */
  static int
-rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
+rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
  {
-       struct rpcrdma_xprt *xprt = id->context;
-       struct rpcrdma_ia *ia = &xprt->rx_ia;
-       struct rpcrdma_ep *ep = &xprt->rx_ep;
-       int connstate = 0;
+       struct rpcrdma_xprt *r_xprt = id->context;
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+       struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+
+       might_sleep();
  
-       trace_xprtrdma_conn_upcall(xprt, event);
+       trace_xprtrdma_cm_event(r_xprt, event);
         switch (event->event) {
         case RDMA_CM_EVENT_ADDR_RESOLVED:
         case RDMA_CM_EVENT_ROUTE_RESOLVED:
                 ia->ri_async_rc = 0;
                 complete(&ia->ri_done);
-               break;
+               return 0;
         case RDMA_CM_EVENT_ADDR_ERROR:
                 ia->ri_async_rc = -EPROTO;
                 complete(&ia->ri_done);
-               break;
+               return 0;
         case RDMA_CM_EVENT_ROUTE_ERROR:
                 ia->ri_async_rc = -ENETUNREACH;
                 complete(&ia->ri_done);
-               break;
+               return 0;
         case RDMA_CM_EVENT_DEVICE_REMOVAL:
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
                 pr_info("rpcrdma: removing device %s for %s:%s\n",
                         ia->ri_device->name,
-                       rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
+                       rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
  #endif
                 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
                 ep->rep_connected = -ENODEV;
-               xprt_force_disconnect(&xprt->rx_xprt);
+               xprt_force_disconnect(xprt);
                 wait_for_completion(&ia->ri_remove_done);
  
                 ia->ri_id = NULL;
@@ -258,41 +296,40 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
                 /* Return 1 to ensure the core destroys the id. */
                 return 1;
         case RDMA_CM_EVENT_ESTABLISHED:
-               ++xprt->rx_xprt.connect_cookie;
-               connstate = 1;
-               rpcrdma_update_connect_private(xprt, &event->param.conn);
-               goto connected;
+               ++xprt->connect_cookie;
+               ep->rep_connected = 1;
+               rpcrdma_update_connect_private(r_xprt, &event->param.conn);
+               wake_up_all(&ep->rep_connect_wait);
+               break;
         case RDMA_CM_EVENT_CONNECT_ERROR:
-               connstate = -ENOTCONN;
-               goto connected;
+               ep->rep_connected = -ENOTCONN;
+               goto disconnected;
         case RDMA_CM_EVENT_UNREACHABLE:
-               connstate = -ENETUNREACH;
-               goto connected;
+               ep->rep_connected = -ENETUNREACH;
+               goto disconnected;
         case RDMA_CM_EVENT_REJECTED:
                 dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
-                       rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
+                       rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
                         rdma_reject_msg(id, event->status));
-               connstate = -ECONNREFUSED;
+               ep->rep_connected = -ECONNREFUSED;
                 if (event->status == IB_CM_REJ_STALE_CONN)
-                       connstate = -EAGAIN;
-               goto connected;
+                       ep->rep_connected = -EAGAIN;
+               goto disconnected;
         case RDMA_CM_EVENT_DISCONNECTED:
-               ++xprt->rx_xprt.connect_cookie;
-               connstate = -ECONNABORTED;
-connected:
-               ep->rep_connected = connstate;
-               rpcrdma_conn_func(ep);
+               ++xprt->connect_cookie;
+               ep->rep_connected = -ECONNABORTED;
+disconnected:
+               xprt_force_disconnect(xprt);
                 wake_up_all(&ep->rep_connect_wait);
-               /*FALLTHROUGH*/
+               break;
         default:
-               dprintk("RPC:       %s: %s:%s on %s/%s (ep 0x%p): %s\n",
-                       __func__,
-                       rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
-                       ia->ri_device->name, ia->ri_ops->ro_displayname,
-                       ep, rdma_event_msg(event->event));
                 break;
         }
  
+       dprintk("RPC:       %s: %s:%s on %s/%s: %s\n", __func__,
+               rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
+               ia->ri_device->name, ia->ri_ops->ro_displayname,
+               rdma_event_msg(event->event));
         return 0;
  }
  
@@ -308,7 +345,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
         init_completion(&ia->ri_done);
         init_completion(&ia->ri_remove_done);
  
-       id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall,
+       id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler,
                             xprt, RDMA_PS_TCP, IB_QPT_RC);
         if (IS_ERR(id)) {
                 rc = PTR_ERR(id);
@@ -519,7 +556,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
         if (rc)
                 return rc;
  
-       ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
+       ep->rep_attr.event_handler = rpcrdma_qp_event_handler;
         ep->rep_attr.qp_context = ep;
         ep->rep_attr.srq = NULL;
         ep->rep_attr.cap.max_send_sge = max_sge;
@@ -542,7 +579,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
                                    cdata->max_requests >> 2);
         ep->rep_send_count = ep->rep_send_batch;
         init_waitqueue_head(&ep->rep_connect_wait);
-       INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
+       INIT_DELAYED_WORK(&ep->rep_disconnect_worker,
+                         rpcrdma_disconnect_worker);
  
         sendcq = ib_alloc_cq(ia->ri_device, NULL,
                              ep->rep_attr.cap.max_send_wr + 1,
@@ -615,7 +653,7 @@ out1:
  void
  rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  {
-       cancel_delayed_work_sync(&ep->rep_connect_worker);
+       cancel_delayed_work_sync(&ep->rep_disconnect_worker);
  
         if (ia->ri_id && ia->ri_id->qp) {
                 rpcrdma_ep_disconnect(ep, ia);
@@ -728,6 +766,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  {
         struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
                                                    rx_ia);
+       struct rpc_xprt *xprt = &r_xprt->rx_xprt;
         int rc;
  
  retry:
@@ -754,6 +793,8 @@ retry:
         }
  
         ep->rep_connected = 0;
+       xprt_clear_connected(xprt);
+
         rpcrdma_post_recvs(r_xprt, true);
  
         rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -877,7 +918,6 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt)
                 sc->sc_xprt = r_xprt;
                 buf->rb_sc_ctxs[i] = sc;
         }
-       buf->rb_flags = 0;
  
         return 0;
  
@@ -977,39 +1017,6 @@ rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc)
         }
  }
  
-static void
-rpcrdma_mr_recovery_worker(struct work_struct *work)
-{
-       struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
-                                                 rb_recovery_worker.work);
-       struct rpcrdma_mr *mr;
-
-       spin_lock(&buf->rb_recovery_lock);
-       while (!list_empty(&buf->rb_stale_mrs)) {
-               mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
-               spin_unlock(&buf->rb_recovery_lock);
-
-               trace_xprtrdma_recover_mr(mr);
-               mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
-
-               spin_lock(&buf->rb_recovery_lock);
-       }
-       spin_unlock(&buf->rb_recovery_lock);
-}
-
-void
-rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
-{
-       struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-
-       spin_lock(&buf->rb_recovery_lock);
-       rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
-       spin_unlock(&buf->rb_recovery_lock);
-
-       schedule_delayed_work(&buf->rb_recovery_worker, 0);
-}
-
  static void
  rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
  {
@@ -1019,7 +1026,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
         LIST_HEAD(free);
         LIST_HEAD(all);
  
-       for (count = 0; count < 3; count++) {
+       for (count = 0; count < ia->ri_max_segs; count++) {
                 struct rpcrdma_mr *mr;
                 int rc;
  
@@ -1138,18 +1145,15 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
         struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
         int i, rc;
  
+       buf->rb_flags = 0;
         buf->rb_max_requests = r_xprt->rx_data.max_requests;
         buf->rb_bc_srv_max_requests = 0;
         spin_lock_init(&buf->rb_mrlock);
         spin_lock_init(&buf->rb_lock);
-       spin_lock_init(&buf->rb_recovery_lock);
         INIT_LIST_HEAD(&buf->rb_mrs);
         INIT_LIST_HEAD(&buf->rb_all);
-       INIT_LIST_HEAD(&buf->rb_stale_mrs);
         INIT_DELAYED_WORK(&buf->rb_refresh_worker,
                           rpcrdma_mr_refresh_worker);
-       INIT_DELAYED_WORK(&buf->rb_recovery_worker,
-                         rpcrdma_mr_recovery_worker);
  
         rpcrdma_mrs_create(r_xprt);
  
@@ -1233,7 +1237,6 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
  void
  rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
  {
-       cancel_delayed_work_sync(&buf->rb_recovery_worker);
         cancel_delayed_work_sync(&buf->rb_refresh_worker);
  
         rpcrdma_sendctxs_destroy(buf);
@@ -1326,7 +1329,7 @@ rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
  {
         struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
  
-       trace_xprtrdma_dma_unmap(mr);
+       trace_xprtrdma_mr_unmap(mr);
         ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
                         mr->mr_sg, mr->mr_nents, mr->mr_dir);
         __rpcrdma_mr_put(&r_xprt->rx_buf, mr);
@@ -1518,9 +1521,11 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
         struct ib_recv_wr *wr, *bad_wr;
         int needed, count, rc;
  
+       rc = 0;
+       count = 0;
         needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
         if (buf->rb_posted_receives > needed)
-               return;
+               goto out;
         needed -= buf->rb_posted_receives;
  
         count = 0;
@@ -1556,7 +1561,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
                 --needed;
         }
         if (!count)
-               return;
+               goto out;
  
         rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
                           (const struct ib_recv_wr **)&bad_wr);
@@ -1570,5 +1575,6 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
                 }
         }
         buf->rb_posted_receives += count;
+out:
         trace_xprtrdma_post_recvs(r_xprt, count, rc);
  }
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h

index 2ca14f7..a13ccb6 100644 (file)
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -101,7 +101,7 @@ struct rpcrdma_ep {
         wait_queue_head_t       rep_connect_wait;
         struct rpcrdma_connect_private  rep_cm_private;
         struct rdma_conn_param  rep_remote_cma;
-       struct delayed_work     rep_connect_worker;
+       struct delayed_work     rep_disconnect_worker;
  };
  
  /* Pre-allocate extra Work Requests for handling backward receives
@@ -280,6 +280,7 @@ struct rpcrdma_mr {
         u32                     mr_handle;
         u32                     mr_length;
         u64                     mr_offset;
+       struct work_struct      mr_recycle;
         struct list_head        mr_all;
  };
  
@@ -411,9 +412,6 @@ struct rpcrdma_buffer {
  
         u32                     rb_bc_max_requests;
  
-       spinlock_t              rb_recovery_lock; /* protect rb_stale_mrs */
-       struct list_head        rb_stale_mrs;
-       struct delayed_work     rb_recovery_worker;
         struct delayed_work     rb_refresh_worker;
  };
  #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
@@ -452,7 +450,7 @@ struct rpcrdma_stats {
         unsigned long           hardway_register_count;
         unsigned long           failed_marshal_count;
         unsigned long           bad_reply_count;
-       unsigned long           mrs_recovered;
+       unsigned long           mrs_recycled;
         unsigned long           mrs_orphaned;
         unsigned long           mrs_allocated;
         unsigned long           empty_sendctx_q;
@@ -481,7 +479,6 @@ struct rpcrdma_memreg_ops {
                                      struct list_head *mrs);
         void            (*ro_unmap_sync)(struct rpcrdma_xprt *,
                                          struct list_head *);
-       void            (*ro_recover_mr)(struct rpcrdma_mr *mr);
         int             (*ro_open)(struct rpcrdma_ia *,
                                    struct rpcrdma_ep *,
                                    struct rpcrdma_create_data_internal *);
@@ -559,7 +556,6 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
                                 struct rpcrdma_create_data_internal *);
  void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
  int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
-void rpcrdma_conn_func(struct rpcrdma_ep *ep);
  void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
  
  int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
@@ -578,7 +574,12 @@ struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
  struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
  void rpcrdma_mr_put(struct rpcrdma_mr *mr);
  void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
-void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
+
+static inline void
+rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
+{
+       schedule_work(&mr->mr_recycle);
+}
  
  struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
  void rpcrdma_buffer_put(struct rpcrdma_req *);
@@ -652,7 +653,6 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
  extern unsigned int xprt_rdma_max_inline_read;
  void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
  void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
-void rpcrdma_connect_worker(struct work_struct *work);
  void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
  int xprt_rdma_init(void);
  void xprt_rdma_cleanup(void);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c

index 6b7539c..1b51e04 100644 (file)
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,13 +47,13 @@
  #include <net/checksum.h>
  #include <net/udp.h>
  #include <net/tcp.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
  
  #include <trace/events/sunrpc.h>
  
  #include "sunrpc.h"
  
-#define RPC_TCP_READ_CHUNK_SZ  (3*512*1024)
-
  static void xs_close(struct rpc_xprt *xprt);
  static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
                 struct socket *sock);
@@ -129,7 +129,7 @@ static struct ctl_table xs_tunables_table[] = {
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec_minmax,
                 .extra1         = &xprt_min_resvport_limit,
-               .extra2         = &xprt_max_resvport
+               .extra2         = &xprt_max_resvport_limit
         },
         {
                 .procname       = "max_resvport",
@@ -137,7 +137,7 @@ static struct ctl_table xs_tunables_table[] = {
                 .maxlen         = sizeof(unsigned int),
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xprt_min_resvport,
+               .extra1         = &xprt_min_resvport_limit,
                 .extra2         = &xprt_max_resvport_limit
         },
         {
@@ -325,6 +325,362 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt)
                 }
  }
  
+static size_t
+xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
+{
+       size_t i,n;
+
+       if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+               return want;
+       if (want > buf->page_len)
+               want = buf->page_len;
+       n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       for (i = 0; i < n; i++) {
+               if (buf->pages[i])
+                       continue;
+               buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
+               if (!buf->pages[i]) {
+                       buf->page_len = (i * PAGE_SIZE) - buf->page_base;
+                       return buf->page_len;
+               }
+       }
+       return want;
+}
+
+static ssize_t
+xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
+{
+       ssize_t ret;
+       if (seek != 0)
+               iov_iter_advance(&msg->msg_iter, seek);
+       ret = sock_recvmsg(sock, msg, flags);
+       return ret > 0 ? ret + seek : ret;
+}
+
+static ssize_t
+xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
+               struct kvec *kvec, size_t count, size_t seek)
+{
+       iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count);
+       return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
+               struct bio_vec *bvec, unsigned long nr, size_t count,
+               size_t seek)
+{
+       iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count);
+       return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
+               size_t count)
+{
+       struct kvec kvec = { 0 };
+       return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0);
+}
+
+static ssize_t
+xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
+               struct xdr_buf *buf, size_t count, size_t seek, size_t *read)
+{
+       size_t want, seek_init = seek, offset = 0;
+       ssize_t ret;
+
+       if (seek < buf->head[0].iov_len) {
+               want = min_t(size_t, count, buf->head[0].iov_len);
+               ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
+               if (ret <= 0)
+                       goto sock_err;
+               offset += ret;
+               if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+                       goto out;
+               if (ret != want)
+                       goto eagain;
+               seek = 0;
+       } else {
+               seek -= buf->head[0].iov_len;
+               offset += buf->head[0].iov_len;
+       }
+       if (seek < buf->page_len) {
+               want = xs_alloc_sparse_pages(buf,
+                               min_t(size_t, count - offset, buf->page_len),
+                               GFP_NOWAIT);
+               ret = xs_read_bvec(sock, msg, flags, buf->bvec,
+                               xdr_buf_pagecount(buf),
+                               want + buf->page_base,
+                               seek + buf->page_base);
+               if (ret <= 0)
+                       goto sock_err;
+               offset += ret - buf->page_base;
+               if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+                       goto out;
+               if (ret != want)
+                       goto eagain;
+               seek = 0;
+       } else {
+               seek -= buf->page_len;
+               offset += buf->page_len;
+       }
+       if (seek < buf->tail[0].iov_len) {
+               want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+               ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
+               if (ret <= 0)
+                       goto sock_err;
+               offset += ret;
+               if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+                       goto out;
+               if (ret != want)
+                       goto eagain;
+       } else
+               offset += buf->tail[0].iov_len;
+       ret = -EMSGSIZE;
+       msg->msg_flags |= MSG_TRUNC;
+out:
+       *read = offset - seek_init;
+       return ret;
+eagain:
+       ret = -EAGAIN;
+       goto out;
+sock_err:
+       offset += seek;
+       goto out;
+}
+
+static void
+xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf)
+{
+       if (!transport->recv.copied) {
+               if (buf->head[0].iov_len >= transport->recv.offset)
+                       memcpy(buf->head[0].iov_base,
+                                       &transport->recv.xid,
+                                       transport->recv.offset);
+               transport->recv.copied = transport->recv.offset;
+       }
+}
+
+static bool
+xs_read_stream_request_done(struct sock_xprt *transport)
+{
+       return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
+}
+
+static ssize_t
+xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
+               int flags, struct rpc_rqst *req)
+{
+       struct xdr_buf *buf = &req->rq_private_buf;
+       size_t want, read;
+       ssize_t ret;
+
+       xs_read_header(transport, buf);
+
+       want = transport->recv.len - transport->recv.offset;
+       ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+                       transport->recv.copied + want, transport->recv.copied,
+                       &read);
+       transport->recv.offset += read;
+       transport->recv.copied += read;
+       if (transport->recv.offset == transport->recv.len) {
+               if (xs_read_stream_request_done(transport))
+                       msg->msg_flags |= MSG_EOR;
+               return transport->recv.copied;
+       }
+
+       switch (ret) {
+       case -EMSGSIZE:
+               return transport->recv.copied;
+       case 0:
+               return -ESHUTDOWN;
+       default:
+               if (ret < 0)
+                       return ret;
+       }
+       return -EAGAIN;
+}
+
+static size_t
+xs_read_stream_headersize(bool isfrag)
+{
+       if (isfrag)
+               return sizeof(__be32);
+       return 3 * sizeof(__be32);
+}
+
+static ssize_t
+xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg,
+               int flags, size_t want, size_t seek)
+{
+       struct kvec kvec = {
+               .iov_base = &transport->recv.fraghdr,
+               .iov_len = want,
+       };
+       return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek);
+}
+
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+       struct rpc_xprt *xprt = &transport->xprt;
+       struct rpc_rqst *req;
+       ssize_t ret;
+
+       /* Look up and lock the request corresponding to the given XID */
+       req = xprt_lookup_bc_request(xprt, transport->recv.xid);
+       if (!req) {
+               printk(KERN_WARNING "Callback slot table overflowed\n");
+               return -ESHUTDOWN;
+       }
+
+       ret = xs_read_stream_request(transport, msg, flags, req);
+       if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+               xprt_complete_bc_request(req, ret);
+
+       return ret;
+}
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+       return -ESHUTDOWN;
+}
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+static ssize_t
+xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+       struct rpc_xprt *xprt = &transport->xprt;
+       struct rpc_rqst *req;
+       ssize_t ret = 0;
+
+       /* Look up and lock the request corresponding to the given XID */
+       spin_lock(&xprt->queue_lock);
+       req = xprt_lookup_rqst(xprt, transport->recv.xid);
+       if (!req) {
+               msg->msg_flags |= MSG_TRUNC;
+               goto out;
+       }
+       xprt_pin_rqst(req);
+       spin_unlock(&xprt->queue_lock);
+
+       ret = xs_read_stream_request(transport, msg, flags, req);
+
+       spin_lock(&xprt->queue_lock);
+       if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+               xprt_complete_rqst(req->rq_task, ret);
+       xprt_unpin_rqst(req);
+out:
+       spin_unlock(&xprt->queue_lock);
+       return ret;
+}
+
+static ssize_t
+xs_read_stream(struct sock_xprt *transport, int flags)
+{
+       struct msghdr msg = { 0 };
+       size_t want, read = 0;
+       ssize_t ret = 0;
+
+       if (transport->recv.len == 0) {
+               want = xs_read_stream_headersize(transport->recv.copied != 0);
+               ret = xs_read_stream_header(transport, &msg, flags, want,
+                               transport->recv.offset);
+               if (ret <= 0)
+                       goto out_err;
+               transport->recv.offset = ret;
+               if (ret != want) {
+                       ret = -EAGAIN;
+                       goto out_err;
+               }
+               transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
+                       RPC_FRAGMENT_SIZE_MASK;
+               transport->recv.offset -= sizeof(transport->recv.fraghdr);
+               read = ret;
+       }
+
+       switch (be32_to_cpu(transport->recv.calldir)) {
+       case RPC_CALL:
+               ret = xs_read_stream_call(transport, &msg, flags);
+               break;
+       case RPC_REPLY:
+               ret = xs_read_stream_reply(transport, &msg, flags);
+       }
+       if (msg.msg_flags & MSG_TRUNC) {
+               transport->recv.calldir = cpu_to_be32(-1);
+               transport->recv.copied = -1;
+       }
+       if (ret < 0)
+               goto out_err;
+       read += ret;
+       if (transport->recv.offset < transport->recv.len) {
+               ret = xs_read_discard(transport->sock, &msg, flags,
+                               transport->recv.len - transport->recv.offset);
+               if (ret <= 0)
+                       goto out_err;
+               transport->recv.offset += ret;
+               read += ret;
+               if (transport->recv.offset != transport->recv.len)
+                       return -EAGAIN;
+       }
+       if (xs_read_stream_request_done(transport)) {
+               trace_xs_stream_read_request(transport);
+               transport->recv.copied = 0;
+       }
+       transport->recv.offset = 0;
+       transport->recv.len = 0;
+       return read;
+out_err:
+       switch (ret) {
+       case 0:
+       case -ESHUTDOWN:
+               xprt_force_disconnect(&transport->xprt);
+               return -ESHUTDOWN;
+       }
+       return ret;
+}
+
+static void xs_stream_data_receive(struct sock_xprt *transport)
+{
+       size_t read = 0;
+       ssize_t ret = 0;
+
+       mutex_lock(&transport->recv_mutex);
+       if (transport->sock == NULL)
+               goto out;
+       clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+       for (;;) {
+               ret = xs_read_stream(transport, MSG_DONTWAIT);
+               if (ret <= 0)
+                       break;
+               read += ret;
+               cond_resched();
+       }
+out:
+       mutex_unlock(&transport->recv_mutex);
+       trace_xs_stream_read_data(&transport->xprt, ret, read);
+}
+
+static void xs_stream_data_receive_workfn(struct work_struct *work)
+{
+       struct sock_xprt *transport =
+               container_of(work, struct sock_xprt, recv_worker);
+       xs_stream_data_receive(transport);
+}
+
+static void
+xs_stream_reset_connect(struct sock_xprt *transport)
+{
+       transport->recv.offset = 0;
+       transport->recv.len = 0;
+       transport->recv.copied = 0;
+       transport->xmit.offset = 0;
+       transport->xprt.stat.connect_count++;
+       transport->xprt.stat.connect_start = jiffies;
+}
+
  #define XS_SENDMSG_FLAGS       (MSG_DONTWAIT | MSG_NOSIGNAL)
  
  static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
@@ -440,28 +796,21 @@ out:
         return err;
  }
  
-static void xs_nospace_callback(struct rpc_task *task)
-{
-       struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
-
-       transport->inet->sk_write_pending--;
-}
-
  /**
- * xs_nospace - place task on wait queue if transmit was incomplete
- * @task: task to put to sleep
+ * xs_nospace - handle transmit was incomplete
+ * @req: pointer to RPC request
   *
   */
-static int xs_nospace(struct rpc_task *task)
+static int xs_nospace(struct rpc_rqst *req)
  {
-       struct rpc_rqst *req = task->tk_rqstp;
         struct rpc_xprt *xprt = req->rq_xprt;
         struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
         struct sock *sk = transport->inet;
         int ret = -EAGAIN;
  
         dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
-                       task->tk_pid, req->rq_slen - req->rq_bytes_sent,
+                       req->rq_task->tk_pid,
+                       req->rq_slen - transport->xmit.offset,
                         req->rq_slen);
  
         /* Protect against races with write_space */
@@ -471,7 +820,7 @@ static int xs_nospace(struct rpc_task *task)
         if (xprt_connected(xprt)) {
                 /* wait for more buffer space */
                 sk->sk_write_pending++;
-               xprt_wait_for_buffer_space(task, xs_nospace_callback);
+               xprt_wait_for_buffer_space(xprt);
         } else
                 ret = -ENOTCONN;
  
@@ -491,6 +840,22 @@ static int xs_nospace(struct rpc_task *task)
         return ret;
  }
  
+static void
+xs_stream_prepare_request(struct rpc_rqst *req)
+{
+       req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO);
+}
+
+/*
+ * Determine if the previous message in the stream was aborted before it
+ * could complete transmission.
+ */
+static bool
+xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
+{
+       return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
+}
+
  /*
   * Construct a stream transport record marker in @buf.
   */
@@ -503,7 +868,7 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
  
  /**
   * xs_local_send_request - write an RPC request to an AF_LOCAL socket
- * @task: RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
   *
   * Return values:
   *        0:   The request has been sent
@@ -512,9 +877,8 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
   * ENOTCONN:   Caller needs to invoke connect logic then call again
   *    other:   Some other error occured, the request was not sent
   */
-static int xs_local_send_request(struct rpc_task *task)
+static int xs_local_send_request(struct rpc_rqst *req)
  {
-       struct rpc_rqst *req = task->tk_rqstp;
         struct rpc_xprt *xprt = req->rq_xprt;
         struct sock_xprt *transport =
                                 container_of(xprt, struct sock_xprt, xprt);
@@ -522,25 +886,34 @@ static int xs_local_send_request(struct rpc_task *task)
         int status;
         int sent = 0;
  
+       /* Close the stream if the previous transmission was incomplete */
+       if (xs_send_request_was_aborted(transport, req)) {
+               xs_close(xprt);
+               return -ENOTCONN;
+       }
+
         xs_encode_stream_record_marker(&req->rq_snd_buf);
  
         xs_pktdump("packet data:",
                         req->rq_svec->iov_base, req->rq_svec->iov_len);
  
         req->rq_xtime = ktime_get();
-       status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
+       status = xs_sendpages(transport->sock, NULL, 0, xdr,
+                             transport->xmit.offset,
                               true, &sent);
         dprintk("RPC:       %s(%u) = %d\n",
-                       __func__, xdr->len - req->rq_bytes_sent, status);
+                       __func__, xdr->len - transport->xmit.offset, status);
  
         if (status == -EAGAIN && sock_writeable(transport->inet))
                 status = -ENOBUFS;
  
         if (likely(sent > 0) || status == 0) {
-               req->rq_bytes_sent += sent;
-               req->rq_xmit_bytes_sent += sent;
+               transport->xmit.offset += sent;
+               req->rq_bytes_sent = transport->xmit.offset;
                 if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+                       req->rq_xmit_bytes_sent += transport->xmit.offset;
                         req->rq_bytes_sent = 0;
+                       transport->xmit.offset = 0;
                         return 0;
                 }
                 status = -EAGAIN;
@@ -550,7 +923,7 @@ static int xs_local_send_request(struct rpc_task *task)
         case -ENOBUFS:
                 break;
         case -EAGAIN:
-               status = xs_nospace(task);
+               status = xs_nospace(req);
                 break;
         default:
                 dprintk("RPC:       sendmsg returned unrecognized error %d\n",
@@ -566,7 +939,7 @@ static int xs_local_send_request(struct rpc_task *task)
  
  /**
   * xs_udp_send_request - write an RPC request to a UDP socket
- * @task: address of RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
   *
   * Return values:
   *        0:   The request has been sent
@@ -575,9 +948,8 @@ static int xs_local_send_request(struct rpc_task *task)
   * ENOTCONN:   Caller needs to invoke connect logic then call again
   *    other:   Some other error occurred, the request was not sent
   */
-static int xs_udp_send_request(struct rpc_task *task)
+static int xs_udp_send_request(struct rpc_rqst *req)
  {
-       struct rpc_rqst *req = task->tk_rqstp;
         struct rpc_xprt *xprt = req->rq_xprt;
         struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
         struct xdr_buf *xdr = &req->rq_snd_buf;
@@ -590,12 +962,16 @@ static int xs_udp_send_request(struct rpc_task *task)
  
         if (!xprt_bound(xprt))
                 return -ENOTCONN;
+
+       if (!xprt_request_get_cong(xprt, req))
+               return -EBADSLT;
+
         req->rq_xtime = ktime_get();
         status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
-                             xdr, req->rq_bytes_sent, true, &sent);
+                             xdr, 0, true, &sent);
  
         dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
-                       xdr->len - req->rq_bytes_sent, status);
+                       xdr->len, status);
  
         /* firewall is blocking us, don't return -EAGAIN or we end up looping */
         if (status == -EPERM)
@@ -619,7 +995,7 @@ process_status:
                 /* Should we call xs_close() here? */
                 break;
         case -EAGAIN:
-               status = xs_nospace(task);
+               status = xs_nospace(req);
                 break;
         case -ENETUNREACH:
         case -ENOBUFS:
@@ -639,7 +1015,7 @@ process_status:
  
  /**
   * xs_tcp_send_request - write an RPC request to a TCP socket
- * @task: address of RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
   *
   * Return values:
   *        0:   The request has been sent
@@ -651,9 +1027,8 @@ process_status:
   * XXX: In the case of soft timeouts, should we eventually give up
   *     if sendmsg is not able to make progress?
   */
-static int xs_tcp_send_request(struct rpc_task *task)
+static int xs_tcp_send_request(struct rpc_rqst *req)
  {
-       struct rpc_rqst *req = task->tk_rqstp;
         struct rpc_xprt *xprt = req->rq_xprt;
         struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
         struct xdr_buf *xdr = &req->rq_snd_buf;
@@ -662,6 +1037,13 @@ static int xs_tcp_send_request(struct rpc_task *task)
         int status;
         int sent;
  
+       /* Close the stream if the previous transmission was incomplete */
+       if (xs_send_request_was_aborted(transport, req)) {
+               if (transport->sock != NULL)
+                       kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+               return -ENOTCONN;
+       }
+
         xs_encode_stream_record_marker(&req->rq_snd_buf);
  
         xs_pktdump("packet data:",
@@ -671,7 +1053,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
          * completes while the socket holds a reference to the pages,
          * then we may end up resending corrupted data.
          */
-       if (task->tk_flags & RPC_TASK_SENT)
+       if (req->rq_task->tk_flags & RPC_TASK_SENT)
                 zerocopy = false;
  
         if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
@@ -684,17 +1066,20 @@ static int xs_tcp_send_request(struct rpc_task *task)
         while (1) {
                 sent = 0;
                 status = xs_sendpages(transport->sock, NULL, 0, xdr,
-                                     req->rq_bytes_sent, zerocopy, &sent);
+                                     transport->xmit.offset,
+                                     zerocopy, &sent);
  
                 dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
-                               xdr->len - req->rq_bytes_sent, status);
+                               xdr->len - transport->xmit.offset, status);
  
                 /* If we've sent the entire packet, immediately
                  * reset the count of bytes sent. */
-               req->rq_bytes_sent += sent;
-               req->rq_xmit_bytes_sent += sent;
+               transport->xmit.offset += sent;
+               req->rq_bytes_sent = transport->xmit.offset;
                 if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+                       req->rq_xmit_bytes_sent += transport->xmit.offset;
                         req->rq_bytes_sent = 0;
+                       transport->xmit.offset = 0;
                         return 0;
                 }
  
@@ -732,7 +1117,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
                 /* Should we call xs_close() here? */
                 break;
         case -EAGAIN:
-               status = xs_nospace(task);
+               status = xs_nospace(req);
                 break;
         case -ECONNRESET:
         case -ECONNREFUSED:
@@ -749,35 +1134,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
         return status;
  }
  
-/**
- * xs_tcp_release_xprt - clean up after a tcp transmission
- * @xprt: transport
- * @task: rpc task
- *
- * This cleans up if an error causes us to abort the transmission of a request.
- * In this case, the socket may need to be reset in order to avoid confusing
- * the server.
- */
-static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
-{
-       struct rpc_rqst *req;
-
-       if (task != xprt->snd_task)
-               return;
-       if (task == NULL)
-               goto out_release;
-       req = task->tk_rqstp;
-       if (req == NULL)
-               goto out_release;
-       if (req->rq_bytes_sent == 0)
-               goto out_release;
-       if (req->rq_bytes_sent == req->rq_snd_buf.len)
-               goto out_release;
-       set_bit(XPRT_CLOSE_WAIT, &xprt->state);
-out_release:
-       xprt_release_xprt(xprt, task);
-}
-
  static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
  {
         transport->old_data_ready = sk->sk_data_ready;
@@ -921,114 +1277,6 @@ static void xs_destroy(struct rpc_xprt *xprt)
         module_put(THIS_MODULE);
  }
  
-static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
-{
-       struct xdr_skb_reader desc = {
-               .skb            = skb,
-               .offset         = sizeof(rpc_fraghdr),
-               .count          = skb->len - sizeof(rpc_fraghdr),
-       };
-
-       if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
-               return -1;
-       if (desc.count)
-               return -1;
-       return 0;
-}
-
-/**
- * xs_local_data_read_skb
- * @xprt: transport
- * @sk: socket
- * @skb: skbuff
- *
- * Currently this assumes we can read the whole reply in a single gulp.
- */
-static void xs_local_data_read_skb(struct rpc_xprt *xprt,
-               struct sock *sk,
-               struct sk_buff *skb)
-{
-       struct rpc_task *task;
-       struct rpc_rqst *rovr;
-       int repsize, copied;
-       u32 _xid;
-       __be32 *xp;
-
-       repsize = skb->len - sizeof(rpc_fraghdr);
-       if (repsize < 4) {
-               dprintk("RPC:       impossible RPC reply size %d\n", repsize);
-               return;
-       }
-
-       /* Copy the XID from the skb... */
-       xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
-       if (xp == NULL)
-               return;
-
-       /* Look up and lock the request corresponding to the given XID */
-       spin_lock(&xprt->recv_lock);
-       rovr = xprt_lookup_rqst(xprt, *xp);
-       if (!rovr)
-               goto out_unlock;
-       xprt_pin_rqst(rovr);
-       spin_unlock(&xprt->recv_lock);
-       task = rovr->rq_task;
-
-       copied = rovr->rq_private_buf.buflen;
-       if (copied > repsize)
-               copied = repsize;
-
-       if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
-               dprintk("RPC:       sk_buff copy failed\n");
-               spin_lock(&xprt->recv_lock);
-               goto out_unpin;
-       }
-
-       spin_lock(&xprt->recv_lock);
-       xprt_complete_rqst(task, copied);
-out_unpin:
-       xprt_unpin_rqst(rovr);
- out_unlock:
-       spin_unlock(&xprt->recv_lock);
-}
-
-static void xs_local_data_receive(struct sock_xprt *transport)
-{
-       struct sk_buff *skb;
-       struct sock *sk;
-       int err;
-
-restart:
-       mutex_lock(&transport->recv_mutex);
-       sk = transport->inet;
-       if (sk == NULL)
-               goto out;
-       for (;;) {
-               skb = skb_recv_datagram(sk, 0, 1, &err);
-               if (skb != NULL) {
-                       xs_local_data_read_skb(&transport->xprt, sk, skb);
-                       skb_free_datagram(sk, skb);
-                       continue;
-               }
-               if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
-                       break;
-               if (need_resched()) {
-                       mutex_unlock(&transport->recv_mutex);
-                       cond_resched();
-                       goto restart;
-               }
-       }
-out:
-       mutex_unlock(&transport->recv_mutex);
-}
-
-static void xs_local_data_receive_workfn(struct work_struct *work)
-{
-       struct sock_xprt *transport =
-               container_of(work, struct sock_xprt, recv_worker);
-       xs_local_data_receive(transport);
-}
-
  /**
   * xs_udp_data_read_skb - receive callback for UDP sockets
   * @xprt: transport
@@ -1058,13 +1306,13 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
                 return;
  
         /* Look up and lock the request corresponding to the given XID */
-       spin_lock(&xprt->recv_lock);
+       spin_lock(&xprt->queue_lock);
         rovr = xprt_lookup_rqst(xprt, *xp);
         if (!rovr)
                 goto out_unlock;
         xprt_pin_rqst(rovr);
         xprt_update_rtt(rovr->rq_task);
-       spin_unlock(&xprt->recv_lock);
+       spin_unlock(&xprt->queue_lock);
         task = rovr->rq_task;
  
         if ((copied = rovr->rq_private_buf.buflen) > repsize)
@@ -1072,7 +1320,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
  
         /* Suck it into the iovec, verify checksum if not done by hw. */
         if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
-               spin_lock(&xprt->recv_lock);
+               spin_lock(&xprt->queue_lock);
                 __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
                 goto out_unpin;
         }
@@ -1081,13 +1329,13 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
         spin_lock_bh(&xprt->transport_lock);
         xprt_adjust_cwnd(xprt, task, copied);
         spin_unlock_bh(&xprt->transport_lock);
-       spin_lock(&xprt->recv_lock);
+       spin_lock(&xprt->queue_lock);
         xprt_complete_rqst(task, copied);
         __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
  out_unpin:
         xprt_unpin_rqst(rovr);
   out_unlock:
-       spin_unlock(&xprt->recv_lock);
+       spin_unlock(&xprt->queue_lock);
  }
  
  static void xs_udp_data_receive(struct sock_xprt *transport)
@@ -1096,25 +1344,18 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
         struct sock *sk;
         int err;
  
-restart:
         mutex_lock(&transport->recv_mutex);
         sk = transport->inet;
         if (sk == NULL)
                 goto out;
+       clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
         for (;;) {
                 skb = skb_recv_udp(sk, 0, 1, &err);
-               if (skb != NULL) {
-                       xs_udp_data_read_skb(&transport->xprt, sk, skb);
-                       consume_skb(skb);
-                       continue;
-               }
-               if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+               if (skb == NULL)
                         break;
-               if (need_resched()) {
-                       mutex_unlock(&transport->recv_mutex);
-                       cond_resched();
-                       goto restart;
-               }
+               xs_udp_data_read_skb(&transport->xprt, sk, skb);
+               consume_skb(skb);
+               cond_resched();
         }
  out:
         mutex_unlock(&transport->recv_mutex);
@@ -1163,263 +1404,7 @@ static void xs_tcp_force_close(struct rpc_xprt *xprt)
         xprt_force_disconnect(xprt);
  }
  
-static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
-{
-       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-       size_t len, used;
-       char *p;
-
-       p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset;
-       len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset;
-       used = xdr_skb_read_bits(desc, p, len);
-       transport->tcp_offset += used;
-       if (used != len)
-               return;
-
-       transport->tcp_reclen = ntohl(transport->tcp_fraghdr);
-       if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
-               transport->tcp_flags |= TCP_RCV_LAST_FRAG;
-       else
-               transport->tcp_flags &= ~TCP_RCV_LAST_FRAG;
-       transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
-
-       transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR;
-       transport->tcp_offset = 0;
-
-       /* Sanity check of the record length */
-       if (unlikely(transport->tcp_reclen < 8)) {
-               dprintk("RPC:       invalid TCP record fragment length\n");
-               xs_tcp_force_close(xprt);
-               return;
-       }
-       dprintk("RPC:       reading TCP record fragment of length %d\n",
-                       transport->tcp_reclen);
-}
-
-static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
-{
-       if (transport->tcp_offset == transport->tcp_reclen) {
-               transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR;
-               transport->tcp_offset = 0;
-               if (transport->tcp_flags & TCP_RCV_LAST_FRAG) {
-                       transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-                       transport->tcp_flags |= TCP_RCV_COPY_XID;
-                       transport->tcp_copied = 0;
-               }
-       }
-}
-
-static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
-       size_t len, used;
-       char *p;
-
-       len = sizeof(transport->tcp_xid) - transport->tcp_offset;
-       dprintk("RPC:       reading XID (%zu bytes)\n", len);
-       p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
-       used = xdr_skb_read_bits(desc, p, len);
-       transport->tcp_offset += used;
-       if (used != len)
-               return;
-       transport->tcp_flags &= ~TCP_RCV_COPY_XID;
-       transport->tcp_flags |= TCP_RCV_READ_CALLDIR;
-       transport->tcp_copied = 4;
-       dprintk("RPC:       reading %s XID %08x\n",
-                       (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for"
-                                                             : "request with",
-                       ntohl(transport->tcp_xid));
-       xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
-                                      struct xdr_skb_reader *desc)
-{
-       size_t len, used;
-       u32 offset;
-       char *p;
-
-       /*
-        * We want transport->tcp_offset to be 8 at the end of this routine
-        * (4 bytes for the xid and 4 bytes for the call/reply flag).
-        * When this function is called for the first time,
-        * transport->tcp_offset is 4 (after having already read the xid).
-        */
-       offset = transport->tcp_offset - sizeof(transport->tcp_xid);
-       len = sizeof(transport->tcp_calldir) - offset;
-       dprintk("RPC:       reading CALL/REPLY flag (%zu bytes)\n", len);
-       p = ((char *) &transport->tcp_calldir) + offset;
-       used = xdr_skb_read_bits(desc, p, len);
-       transport->tcp_offset += used;
-       if (used != len)
-               return;
-       transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR;
-       /*
-        * We don't yet have the XDR buffer, so we will write the calldir
-        * out after we get the buffer from the 'struct rpc_rqst'
-        */
-       switch (ntohl(transport->tcp_calldir)) {
-       case RPC_REPLY:
-               transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
-               transport->tcp_flags |= TCP_RCV_COPY_DATA;
-               transport->tcp_flags |= TCP_RPC_REPLY;
-               break;
-       case RPC_CALL:
-               transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
-               transport->tcp_flags |= TCP_RCV_COPY_DATA;
-               transport->tcp_flags &= ~TCP_RPC_REPLY;
-               break;
-       default:
-               dprintk("RPC:       invalid request message type\n");
-               xs_tcp_force_close(&transport->xprt);
-       }
-       xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
-                                    struct xdr_skb_reader *desc,
-                                    struct rpc_rqst *req)
-{
-       struct sock_xprt *transport =
-                               container_of(xprt, struct sock_xprt, xprt);
-       struct xdr_buf *rcvbuf;
-       size_t len;
-       ssize_t r;
-
-       rcvbuf = &req->rq_private_buf;
-
-       if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) {
-               /*
-                * Save the RPC direction in the XDR buffer
-                */
-               memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied,
-                       &transport->tcp_calldir,
-                       sizeof(transport->tcp_calldir));
-               transport->tcp_copied += sizeof(transport->tcp_calldir);
-               transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR;
-       }
-
-       len = desc->count;
-       if (len > transport->tcp_reclen - transport->tcp_offset)
-               desc->count = transport->tcp_reclen - transport->tcp_offset;
-       r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
-                                         desc, xdr_skb_read_bits);
-
-       if (desc->count) {
-               /* Error when copying to the receive buffer,
-                * usually because we weren't able to allocate
-                * additional buffer pages. All we can do now
-                * is turn off TCP_RCV_COPY_DATA, so the request
-                * will not receive any additional updates,
-                * and time out.
-                * Any remaining data from this record will
-                * be discarded.
-                */
-               transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-               dprintk("RPC:       XID %08x truncated request\n",
-                               ntohl(transport->tcp_xid));
-               dprintk("RPC:       xprt = %p, tcp_copied = %lu, "
-                               "tcp_offset = %u, tcp_reclen = %u\n",
-                               xprt, transport->tcp_copied,
-                               transport->tcp_offset, transport->tcp_reclen);
-               return;
-       }
-
-       transport->tcp_copied += r;
-       transport->tcp_offset += r;
-       desc->count = len - r;
-
-       dprintk("RPC:       XID %08x read %zd bytes\n",
-                       ntohl(transport->tcp_xid), r);
-       dprintk("RPC:       xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
-                       "tcp_reclen = %u\n", xprt, transport->tcp_copied,
-                       transport->tcp_offset, transport->tcp_reclen);
-
-       if (transport->tcp_copied == req->rq_private_buf.buflen)
-               transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-       else if (transport->tcp_offset == transport->tcp_reclen) {
-               if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
-                       transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-       }
-}
-
-/*
- * Finds the request corresponding to the RPC xid and invokes the common
- * tcp read code to read the data.
- */
-static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
-                                   struct xdr_skb_reader *desc)
-{
-       struct sock_xprt *transport =
-                               container_of(xprt, struct sock_xprt, xprt);
-       struct rpc_rqst *req;
-
-       dprintk("RPC:       read reply XID %08x\n", ntohl(transport->tcp_xid));
-
-       /* Find and lock the request corresponding to this xid */
-       spin_lock(&xprt->recv_lock);
-       req = xprt_lookup_rqst(xprt, transport->tcp_xid);
-       if (!req) {
-               dprintk("RPC:       XID %08x request not found!\n",
-                               ntohl(transport->tcp_xid));
-               spin_unlock(&xprt->recv_lock);
-               return -1;
-       }
-       xprt_pin_rqst(req);
-       spin_unlock(&xprt->recv_lock);
-
-       xs_tcp_read_common(xprt, desc, req);
-
-       spin_lock(&xprt->recv_lock);
-       if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
-               xprt_complete_rqst(req->rq_task, transport->tcp_copied);
-       xprt_unpin_rqst(req);
-       spin_unlock(&xprt->recv_lock);
-       return 0;
-}
-
  #if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Obtains an rpc_rqst previously allocated and invokes the common
- * tcp read code to read the data.  The result is placed in the callback
- * queue.
- * If we're unable to obtain the rpc_rqst we schedule the closing of the
- * connection and return -1.
- */
-static int xs_tcp_read_callback(struct rpc_xprt *xprt,
-                                      struct xdr_skb_reader *desc)
-{
-       struct sock_xprt *transport =
-                               container_of(xprt, struct sock_xprt, xprt);
-       struct rpc_rqst *req;
-
-       /* Look up the request corresponding to the given XID */
-       req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
-       if (req == NULL) {
-               printk(KERN_WARNING "Callback slot table overflowed\n");
-               xprt_force_disconnect(xprt);
-               return -1;
-       }
-
-       dprintk("RPC:       read callback  XID %08x\n", ntohl(req->rq_xid));
-       xs_tcp_read_common(xprt, desc, req);
-
-       if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
-               xprt_complete_bc_request(req, transport->tcp_copied);
-
-       return 0;
-}
-
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
-                                       struct xdr_skb_reader *desc)
-{
-       struct sock_xprt *transport =
-                               container_of(xprt, struct sock_xprt, xprt);
-
-       return (transport->tcp_flags & TCP_RPC_REPLY) ?
-               xs_tcp_read_reply(xprt, desc) :
-               xs_tcp_read_callback(xprt, desc);
-}
-
  static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
  {
         int ret;
@@ -1435,145 +1420,8 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
  {
         return PAGE_SIZE;
  }
-#else
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
-                                       struct xdr_skb_reader *desc)
-{
-       return xs_tcp_read_reply(xprt, desc);
-}
  #endif /* CONFIG_SUNRPC_BACKCHANNEL */
  
-/*
- * Read data off the transport.  This can be either an RPC_CALL or an
- * RPC_REPLY.  Relay the processing to helper functions.
- */
-static void xs_tcp_read_data(struct rpc_xprt *xprt,
-                                   struct xdr_skb_reader *desc)
-{
-       struct sock_xprt *transport =
-                               container_of(xprt, struct sock_xprt, xprt);
-
-       if (_xs_tcp_read_data(xprt, desc) == 0)
-               xs_tcp_check_fraghdr(transport);
-       else {
-               /*
-                * The transport_lock protects the request handling.
-                * There's no need to hold it to update the tcp_flags.
-                */
-               transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-       }
-}
-
-static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
-       size_t len;
-
-       len = transport->tcp_reclen - transport->tcp_offset;
-       if (len > desc->count)
-               len = desc->count;
-       desc->count -= len;
-       desc->offset += len;
-       transport->tcp_offset += len;
-       dprintk("RPC:       discarded %zu bytes\n", len);
-       xs_tcp_check_fraghdr(transport);
-}
-
-static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
-{
-       struct rpc_xprt *xprt = rd_desc->arg.data;
-       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-       struct xdr_skb_reader desc = {
-               .skb    = skb,
-               .offset = offset,
-               .count  = len,
-       };
-       size_t ret;
-
-       dprintk("RPC:       xs_tcp_data_recv started\n");
-       do {
-               trace_xs_tcp_data_recv(transport);
-               /* Read in a new fragment marker if necessary */
-               /* Can we ever really expect to get completely empty fragments? */
-               if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
-                       xs_tcp_read_fraghdr(xprt, &desc);
-                       continue;
-               }
-               /* Read in the xid if necessary */
-               if (transport->tcp_flags & TCP_RCV_COPY_XID) {
-                       xs_tcp_read_xid(transport, &desc);
-                       continue;
-               }
-               /* Read in the call/reply flag */
-               if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) {
-                       xs_tcp_read_calldir(transport, &desc);
-                       continue;
-               }
-               /* Read in the request data */
-               if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
-                       xs_tcp_read_data(xprt, &desc);
-                       continue;
-               }
-               /* Skip over any trailing bytes on short reads */
-               xs_tcp_read_discard(transport, &desc);
-       } while (desc.count);
-       ret = len - desc.count;
-       if (ret < rd_desc->count)
-               rd_desc->count -= ret;
-       else
-               rd_desc->count = 0;
-       trace_xs_tcp_data_recv(transport);
-       dprintk("RPC:       xs_tcp_data_recv done\n");
-       return ret;
-}
-
-static void xs_tcp_data_receive(struct sock_xprt *transport)
-{
-       struct rpc_xprt *xprt = &transport->xprt;
-       struct sock *sk;
-       read_descriptor_t rd_desc = {
-               .arg.data = xprt,
-       };
-       unsigned long total = 0;
-       int read = 0;
-
-restart:
-       mutex_lock(&transport->recv_mutex);
-       sk = transport->inet;
-       if (sk == NULL)
-               goto out;
-
-       /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
-       for (;;) {
-               rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
-               lock_sock(sk);
-               read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
-               if (rd_desc.count != 0 || read < 0) {
-                       clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
-                       release_sock(sk);
-                       break;
-               }
-               release_sock(sk);
-               total += read;
-               if (need_resched()) {
-                       mutex_unlock(&transport->recv_mutex);
-                       cond_resched();
-                       goto restart;
-               }
-       }
-       if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
-               queue_work(xprtiod_workqueue, &transport->recv_worker);
-out:
-       mutex_unlock(&transport->recv_mutex);
-       trace_xs_tcp_data_ready(xprt, read, total);
-}
-
-static void xs_tcp_data_receive_workfn(struct work_struct *work)
-{
-       struct sock_xprt *transport =
-               container_of(work, struct sock_xprt, recv_worker);
-       xs_tcp_data_receive(transport);
-}
-
  /**
   * xs_tcp_state_change - callback to handle TCP socket state changes
   * @sk: socket whose state has changed
@@ -1600,17 +1448,13 @@ static void xs_tcp_state_change(struct sock *sk)
         case TCP_ESTABLISHED:
                 spin_lock(&xprt->transport_lock);
                 if (!xprt_test_and_set_connected(xprt)) {
-
-                       /* Reset TCP record info */
-                       transport->tcp_offset = 0;
-                       transport->tcp_reclen = 0;
-                       transport->tcp_copied = 0;
-                       transport->tcp_flags =
-                               TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
                         xprt->connect_cookie++;
                         clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
                         xprt_clear_connecting(xprt);
  
+                       xprt->stat.connect_count++;
+                       xprt->stat.connect_time += (long)jiffies -
+                                                  xprt->stat.connect_start;
                         xprt_wake_pending_tasks(xprt, -EAGAIN);
                 }
                 spin_unlock(&xprt->transport_lock);
@@ -1675,7 +1519,8 @@ static void xs_write_space(struct sock *sk)
         if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
                 goto out;
  
-       xprt_write_space(xprt);
+       if (xprt_write_space(xprt))
+               sk->sk_write_pending--;
  out:
         rcu_read_unlock();
  }
@@ -1773,11 +1618,17 @@ static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
         spin_unlock_bh(&xprt->transport_lock);
  }
  
-static unsigned short xs_get_random_port(void)
+static int xs_get_random_port(void)
  {
-       unsigned short range = xprt_max_resvport - xprt_min_resvport + 1;
-       unsigned short rand = (unsigned short) prandom_u32() % range;
-       return rand + xprt_min_resvport;
+       unsigned short min = xprt_min_resvport, max = xprt_max_resvport;
+       unsigned short range;
+       unsigned short rand;
+
+       if (max < min)
+               return -EADDRINUSE;
+       range = max - min + 1;
+       rand = (unsigned short) prandom_u32() % range;
+       return rand + min;
  }
  
  /**
@@ -1833,9 +1684,9 @@ static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
                 transport->srcport = xs_sock_getport(sock);
  }
  
-static unsigned short xs_get_srcport(struct sock_xprt *transport)
+static int xs_get_srcport(struct sock_xprt *transport)
  {
-       unsigned short port = transport->srcport;
+       int port = transport->srcport;
  
         if (port == 0 && transport->xprt.resvport)
                 port = xs_get_random_port();
@@ -1856,7 +1707,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
  {
         struct sockaddr_storage myaddr;
         int err, nloop = 0;
-       unsigned short port = xs_get_srcport(transport);
+       int port = xs_get_srcport(transport);
         unsigned short last;
  
         /*
@@ -1874,8 +1725,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
          * transport->xprt.resvport == 1) xs_get_srcport above will
          * ensure that port is non-zero and we will bind as needed.
          */
-       if (port == 0)
-               return 0;
+       if (port <= 0)
+               return port;
  
         memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
         do {
@@ -2028,9 +1879,8 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
                 write_unlock_bh(&sk->sk_callback_lock);
         }
  
-       /* Tell the socket layer to start connecting... */
-       xprt->stat.connect_count++;
-       xprt->stat.connect_start = jiffies;
+       xs_stream_reset_connect(transport);
+
         return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
  }
  
@@ -2062,6 +1912,9 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
         case 0:
                 dprintk("RPC:       xprt %p connected to %s\n",
                                 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+               xprt->stat.connect_count++;
+               xprt->stat.connect_time += (long)jiffies -
+                                          xprt->stat.connect_start;
                 xprt_set_connected(xprt);
         case -ENOBUFS:
                 break;
@@ -2386,9 +2239,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
  
         xs_set_memalloc(xprt);
  
+       /* Reset TCP record info */
+       xs_stream_reset_connect(transport);
+
         /* Tell the socket layer to start connecting... */
-       xprt->stat.connect_count++;
-       xprt->stat.connect_start = jiffies;
         set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
         ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
         switch (ret) {
@@ -2561,7 +2415,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
                         "%llu %llu %lu %llu %llu\n",
                         xprt->stat.bind_count,
                         xprt->stat.connect_count,
-                       xprt->stat.connect_time,
+                       xprt->stat.connect_time / HZ,
                         idle_time,
                         xprt->stat.sends,
                         xprt->stat.recvs,
@@ -2616,7 +2470,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
                         transport->srcport,
                         xprt->stat.bind_count,
                         xprt->stat.connect_count,
-                       xprt->stat.connect_time,
+                       xprt->stat.connect_time / HZ,
                         idle_time,
                         xprt->stat.sends,
                         xprt->stat.recvs,
@@ -2704,9 +2558,8 @@ static int bc_sendto(struct rpc_rqst *req)
  /*
   * The send routine. Borrows from svc_send
   */
-static int bc_send_request(struct rpc_task *task)
+static int bc_send_request(struct rpc_rqst *req)
  {
-       struct rpc_rqst *req = task->tk_rqstp;
         struct svc_xprt *xprt;
         int len;
  
@@ -2720,12 +2573,7 @@ static int bc_send_request(struct rpc_task *task)
          * Grab the mutex to serialize data as the connection is shared
          * with the fore channel
          */
-       if (!mutex_trylock(&xprt->xpt_mutex)) {
-               rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
-               if (!mutex_trylock(&xprt->xpt_mutex))
-                       return -EAGAIN;
-               rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
-       }
+       mutex_lock(&xprt->xpt_mutex);
         if (test_bit(XPT_DEAD, &xprt->xpt_flags))
                 len = -ENOTCONN;
         else
@@ -2761,7 +2609,7 @@ static void bc_destroy(struct rpc_xprt *xprt)
  
  static const struct rpc_xprt_ops xs_local_ops = {
         .reserve_xprt           = xprt_reserve_xprt,
-       .release_xprt           = xs_tcp_release_xprt,
+       .release_xprt           = xprt_release_xprt,
         .alloc_slot             = xprt_alloc_slot,
         .free_slot              = xprt_free_slot,
         .rpcbind                = xs_local_rpcbind,
@@ -2769,6 +2617,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
         .connect                = xs_local_connect,
         .buf_alloc              = rpc_malloc,
         .buf_free               = rpc_free,
+       .prepare_request        = xs_stream_prepare_request,
         .send_request           = xs_local_send_request,
         .set_retrans_timeout    = xprt_set_retrans_timeout_def,
         .close                  = xs_close,
@@ -2803,14 +2652,15 @@ static const struct rpc_xprt_ops xs_udp_ops = {
  
  static const struct rpc_xprt_ops xs_tcp_ops = {
         .reserve_xprt           = xprt_reserve_xprt,
-       .release_xprt           = xs_tcp_release_xprt,
-       .alloc_slot             = xprt_lock_and_alloc_slot,
+       .release_xprt           = xprt_release_xprt,
+       .alloc_slot             = xprt_alloc_slot,
         .free_slot              = xprt_free_slot,
         .rpcbind                = rpcb_getport_async,
         .set_port               = xs_set_port,
         .connect                = xs_connect,
         .buf_alloc              = rpc_malloc,
         .buf_free               = rpc_free,
+       .prepare_request        = xs_stream_prepare_request,
         .send_request           = xs_tcp_send_request,
         .set_retrans_timeout    = xprt_set_retrans_timeout_def,
         .close                  = xs_tcp_shutdown,
@@ -2952,9 +2802,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
         xprt->ops = &xs_local_ops;
         xprt->timeout = &xs_local_default_timeout;
  
-       INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn);
-       INIT_DELAYED_WORK(&transport->connect_worker,
-                       xs_dummy_setup_socket);
+       INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+       INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket);
  
         switch (sun->sun_family) {
         case AF_LOCAL:
@@ -3106,7 +2955,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
         xprt->connect_timeout = xprt->timeout->to_initval *
                 (xprt->timeout->to_retries + 1);
  
-       INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
+       INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
         INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
  
         switch (addr->sa_family) {
@@ -3317,12 +3166,8 @@ static int param_set_uint_minmax(const char *val,
  
  static int param_set_portnr(const char *val, const struct kernel_param *kp)
  {
-       if (kp->arg == &xprt_min_resvport)
-               return param_set_uint_minmax(val, kp,
-                       RPC_MIN_RESVPORT,
-                       xprt_max_resvport);
         return param_set_uint_minmax(val, kp,
-                       xprt_min_resvport,
+                       RPC_MIN_RESVPORT,
                         RPC_MAX_RESVPORT);
  }
  
diff --git a/scripts/Makefile b/scripts/Makefile

index 61affa3..ece52ff 100644 (file)
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -39,8 +39,7 @@ build_unifdef: $(obj)/unifdef
  subdir-$(CONFIG_MODVERSIONS) += genksyms
  subdir-y                     += mod
  subdir-$(CONFIG_SECURITY_SELINUX) += selinux
-subdir-$(CONFIG_DTC)         += dtc
  subdir-$(CONFIG_GDB_SCRIPTS) += gdb
  
  # Let clean descend into subdirs
-subdir-        += basic kconfig package gcc-plugins
+subdir-        += basic dtc kconfig package gcc-plugins
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib

index 61e5966..8fe4468 100644 (file)
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -283,7 +283,7 @@ $(obj)/%.dtb.S: $(obj)/%.dtb FORCE
  
  quiet_cmd_dtc = DTC     $@
  cmd_dtc = mkdir -p $(dir ${dtc-tmp}) ; \
-       $(CPP) $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \
+       $(HOSTCC) -E $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \
         $(DTC) -O dtb -o $@ -b 0 \
                 $(addprefix -i,$(dir $<) $(DTC_INCLUDE)) $(DTC_FLAGS) \
                 -d $(depfile).dtc.tmp $(dtc-tmp) ; \
diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile

index 1c943e0..056d5da 100644 (file)
--- a/scripts/dtc/Makefile
+++ b/scripts/dtc/Makefile
@@ -1,7 +1,7 @@
  # SPDX-License-Identifier: GPL-2.0
  # scripts/dtc makefile
  
-hostprogs-y    := dtc
+hostprogs-$(CONFIG_DTC) := dtc
  always         := $(hostprogs-y)
  
  dtc-objs       := dtc.o flattree.o fstree.o data.o livetree.o treesource.o \
@@ -11,6 +11,13 @@ dtc-objs     += dtc-lexer.lex.o dtc-parser.tab.o
  # Source files need to get at the userspace version of libfdt_env.h to compile
  HOST_EXTRACFLAGS := -I$(src)/libfdt
  
+ifeq ($(wildcard /usr/include/yaml.h),)
+HOST_EXTRACFLAGS += -DNO_YAML
+else
+dtc-objs       += yamltree.o
+HOSTLDLIBS_dtc := -lyaml
+endif
+
  # Generated files need one more search path to include headers in source tree
  HOSTCFLAGS_dtc-lexer.lex.o := -I$(src)
  HOSTCFLAGS_dtc-parser.tab.o := -I$(src)
diff --git a/scripts/dtc/Makefile.dtc b/scripts/dtc/Makefile.dtc

index bece49b..d437563 100644 (file)
--- a/scripts/dtc/Makefile.dtc
+++ b/scripts/dtc/Makefile.dtc
@@ -14,5 +14,9 @@ DTC_SRCS = \
         treesource.c \
         util.c
  
+ifneq ($(NO_YAML),1)
+DTC_SRCS += yamltree.c
+endif
+
  DTC_GEN_SRCS = dtc-lexer.lex.c dtc-parser.tab.c
  DTC_OBJS = $(DTC_SRCS:%.c=%.o) $(DTC_GEN_SRCS:%.c=%.o)
diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c

index a2cc103..9c9b0c3 100644 (file)
--- a/scripts/dtc/checks.c
+++ b/scripts/dtc/checks.c
@@ -962,6 +962,143 @@ static void check_simple_bus_reg(struct check *c, struct dt_info *dti, struct no
  }
  WARNING(simple_bus_reg, check_simple_bus_reg, NULL, &reg_format, &simple_bus_bridge);
  
+static const struct bus_type i2c_bus = {
+       .name = "i2c-bus",
+};
+
+static void check_i2c_bus_bridge(struct check *c, struct dt_info *dti, struct node *node)
+{
+       if (strprefixeq(node->name, node->basenamelen, "i2c-bus") ||
+           strprefixeq(node->name, node->basenamelen, "i2c-arb")) {
+               node->bus = &i2c_bus;
+       } else if (strprefixeq(node->name, node->basenamelen, "i2c")) {
+               struct node *child;
+               for_each_child(node, child) {
+                       if (strprefixeq(child->name, node->basenamelen, "i2c-bus"))
+                               return;
+               }
+               node->bus = &i2c_bus;
+       } else
+               return;
+
+       if (!node->children)
+               return;
+
+       if (node_addr_cells(node) != 1)
+               FAIL(c, dti, node, "incorrect #address-cells for I2C bus");
+       if (node_size_cells(node) != 0)
+               FAIL(c, dti, node, "incorrect #size-cells for I2C bus");
+
+}
+WARNING(i2c_bus_bridge, check_i2c_bus_bridge, NULL, &addr_size_cells);
+
+static void check_i2c_bus_reg(struct check *c, struct dt_info *dti, struct node *node)
+{
+       struct property *prop;
+       const char *unitname = get_unitname(node);
+       char unit_addr[17];
+       uint32_t reg = 0;
+       int len;
+       cell_t *cells = NULL;
+
+       if (!node->parent || (node->parent->bus != &i2c_bus))
+               return;
+
+       prop = get_property(node, "reg");
+       if (prop)
+               cells = (cell_t *)prop->val.val;
+
+       if (!cells) {
+               FAIL(c, dti, node, "missing or empty reg property");
+               return;
+       }
+
+       reg = fdt32_to_cpu(*cells);
+       snprintf(unit_addr, sizeof(unit_addr), "%x", reg);
+       if (!streq(unitname, unit_addr))
+               FAIL(c, dti, node, "I2C bus unit address format error, expected \"%s\"",
+                    unit_addr);
+
+       for (len = prop->val.len; len > 0; len -= 4) {
+               reg = fdt32_to_cpu(*(cells++));
+               if (reg > 0x3ff)
+                       FAIL_PROP(c, dti, node, prop, "I2C address must be less than 10-bits, got \"0x%x\"",
+                                 reg);
+
+       }
+}
+WARNING(i2c_bus_reg, check_i2c_bus_reg, NULL, &reg_format, &i2c_bus_bridge);
+
+static const struct bus_type spi_bus = {
+       .name = "spi-bus",
+};
+
+static void check_spi_bus_bridge(struct check *c, struct dt_info *dti, struct node *node)
+{
+
+       if (strprefixeq(node->name, node->basenamelen, "spi")) {
+               node->bus = &spi_bus;
+       } else {
+               /* Try to detect SPI buses which don't have proper node name */
+               struct node *child;
+
+               if (node_addr_cells(node) != 1 || node_size_cells(node) != 0)
+                       return;
+
+               for_each_child(node, child) {
+                       struct property *prop;
+                       for_each_property(child, prop) {
+                               if (strprefixeq(prop->name, 4, "spi-")) {
+                                       node->bus = &spi_bus;
+                                       break;
+                               }
+                       }
+                       if (node->bus == &spi_bus)
+                               break;
+               }
+
+               if (node->bus == &spi_bus && get_property(node, "reg"))
+                       FAIL(c, dti, node, "node name for SPI buses should be 'spi'");
+       }
+       if (node->bus != &spi_bus || !node->children)
+               return;
+
+       if (node_addr_cells(node) != 1)
+               FAIL(c, dti, node, "incorrect #address-cells for SPI bus");
+       if (node_size_cells(node) != 0)
+               FAIL(c, dti, node, "incorrect #size-cells for SPI bus");
+
+}
+WARNING(spi_bus_bridge, check_spi_bus_bridge, NULL, &addr_size_cells);
+
+static void check_spi_bus_reg(struct check *c, struct dt_info *dti, struct node *node)
+{
+       struct property *prop;
+       const char *unitname = get_unitname(node);
+       char unit_addr[9];
+       uint32_t reg = 0;
+       cell_t *cells = NULL;
+
+       if (!node->parent || (node->parent->bus != &spi_bus))
+               return;
+
+       prop = get_property(node, "reg");
+       if (prop)
+               cells = (cell_t *)prop->val.val;
+
+       if (!cells) {
+               FAIL(c, dti, node, "missing or empty reg property");
+               return;
+       }
+
+       reg = fdt32_to_cpu(*cells);
+       snprintf(unit_addr, sizeof(unit_addr), "%x", reg);
+       if (!streq(unitname, unit_addr))
+               FAIL(c, dti, node, "SPI bus unit address format error, expected \"%s\"",
+                    unit_addr);
+}
+WARNING(spi_bus_reg, check_spi_bus_reg, NULL, &reg_format, &spi_bus_bridge);
+
  static void check_unit_address_format(struct check *c, struct dt_info *dti,
                                       struct node *node)
  {
@@ -1582,6 +1719,12 @@ static struct check *check_table[] = {
         &simple_bus_bridge,
         &simple_bus_reg,
  
+       &i2c_bus_bridge,
+       &i2c_bus_reg,
+
+       &spi_bus_bridge,
+       &spi_bus_reg,
+
         &avoid_default_addr_size,
         &avoid_unnecessary_addr_size,
         &unique_unit_address,
diff --git a/scripts/dtc/data.c b/scripts/dtc/data.c

index aa37a16..4a20414 100644 (file)
--- a/scripts/dtc/data.c
+++ b/scripts/dtc/data.c
@@ -74,7 +74,8 @@ struct data data_copy_escape_string(const char *s, int len)
         struct data d;
         char *q;
  
-       d = data_grow_for(empty_data, len + 1);
+       d = data_add_marker(empty_data, TYPE_STRING, NULL);
+       d = data_grow_for(d, len + 1);
  
         q = d.val;
         while (i < len) {
@@ -94,6 +95,7 @@ struct data data_copy_file(FILE *f, size_t maxlen)
  {
         struct data d = empty_data;
  
+       d = data_add_marker(d, TYPE_NONE, NULL);
         while (!feof(f) && (d.len < maxlen)) {
                 size_t chunksize, ret;
  
diff --git a/scripts/dtc/dtc-parser.y b/scripts/dtc/dtc-parser.y

index 011a5b2..dd70ebf 100644 (file)
--- a/scripts/dtc/dtc-parser.y
+++ b/scripts/dtc/dtc-parser.y
@@ -287,6 +287,7 @@ propdata:
                 }
         | propdataprefix DT_REF
                 {
+                       $1 = data_add_marker($1, TYPE_STRING, $2);
                         $$ = data_add_marker($1, REF_PATH, $2);
                 }
         | propdataprefix DT_INCBIN '(' DT_STRING ',' integer_prim ',' integer_prim ')'
@@ -340,22 +341,27 @@ arrayprefix:
         DT_BITS DT_LITERAL '<'
                 {
                         unsigned long long bits;
+                       enum markertype type = TYPE_UINT32;
  
                         bits = $2;
  
-                       if ((bits !=  8) && (bits != 16) &&
-                           (bits != 32) && (bits != 64)) {
+                       switch (bits) {
+                       case 8: type = TYPE_UINT8; break;
+                       case 16: type = TYPE_UINT16; break;
+                       case 32: type = TYPE_UINT32; break;
+                       case 64: type = TYPE_UINT64; break;
+                       default:
                                 ERROR(&@2, "Array elements must be"
                                       " 8, 16, 32 or 64-bits");
                                 bits = 32;
                         }
  
-                       $$.data = empty_data;
+                       $$.data = data_add_marker(empty_data, type, NULL);
                         $$.bits = bits;
                 }
         | '<'
                 {
-                       $$.data = empty_data;
+                       $$.data = data_add_marker(empty_data, TYPE_UINT32, NULL);
                         $$.bits = 32;
                 }
         | arrayprefix integer_prim
@@ -499,7 +505,7 @@ integer_unary:
  bytestring:
           /* empty */
                 {
-                       $$ = empty_data;
+                       $$ = data_add_marker(empty_data, TYPE_UINT8, NULL);
                 }
         | bytestring DT_BYTE
                 {
diff --git a/scripts/dtc/dtc.c b/scripts/dtc/dtc.c

index c36994e..64134aa 100644 (file)
--- a/scripts/dtc/dtc.c
+++ b/scripts/dtc/dtc.c
@@ -95,6 +95,9 @@ static const char * const usage_opts_help[] = {
         "\n\tOutput formats are:\n"
          "\t\tdts - device tree source text\n"
          "\t\tdtb - device tree blob\n"
+#ifndef NO_YAML
+        "\t\tyaml - device tree encoded as YAML\n"
+#endif
          "\t\tasm - assembler source",
         "\n\tBlob version to produce, defaults to "stringify(DEFAULT_FDT_VERSION)" (for dtb and asm output)",
         "\n\tOutput dependency file",
@@ -128,6 +131,8 @@ static const char *guess_type_by_name(const char *fname, const char *fallback)
                 return fallback;
         if (!strcasecmp(s, ".dts"))
                 return "dts";
+       if (!strcasecmp(s, ".yaml"))
+               return "yaml";
         if (!strcasecmp(s, ".dtb"))
                 return "dtb";
         return fallback;
@@ -350,6 +355,12 @@ int main(int argc, char *argv[])
  
         if (streq(outform, "dts")) {
                 dt_to_source(outf, dti);
+#ifndef NO_YAML
+       } else if (streq(outform, "yaml")) {
+               if (!streq(inform, "dts"))
+                       die("YAML output format requires dts input format\n");
+               dt_to_yaml(outf, dti);
+#endif
         } else if (streq(outform, "dtb")) {
                 dt_to_blob(outf, dti, outversion);
         } else if (streq(outform, "asm")) {
diff --git a/scripts/dtc/dtc.h b/scripts/dtc/dtc.h

index 6d66770..cbe5415 100644 (file)
--- a/scripts/dtc/dtc.h
+++ b/scripts/dtc/dtc.h
@@ -74,10 +74,17 @@ typedef uint32_t cell_t;
  
  /* Data blobs */
  enum markertype {
+       TYPE_NONE,
         REF_PHANDLE,
         REF_PATH,
         LABEL,
+       TYPE_UINT8,
+       TYPE_UINT16,
+       TYPE_UINT32,
+       TYPE_UINT64,
+       TYPE_STRING,
  };
+extern const char *markername(enum markertype markertype);
  
  struct  marker {
         enum markertype type;
@@ -101,6 +108,8 @@ struct data {
         for_each_marker(m) \
                 if ((m)->type == (t))
  
+size_t type_marker_length(struct marker *m);
+
  void data_free(struct data d);
  
  struct data data_grow_for(struct data d, int xlen);
@@ -290,6 +299,10 @@ struct dt_info *dt_from_blob(const char *fname);
  void dt_to_source(FILE *f, struct dt_info *dti);
  struct dt_info *dt_from_source(const char *f);
  
+/* YAML source */
+
+void dt_to_yaml(FILE *f, struct dt_info *dti);
+
  /* FS trees */
  
  struct dt_info *dt_from_fs(const char *dirname);
diff --git a/scripts/dtc/flattree.c b/scripts/dtc/flattree.c

index 8d268fb..851ea87 100644 (file)
--- a/scripts/dtc/flattree.c
+++ b/scripts/dtc/flattree.c
@@ -393,7 +393,7 @@ void dt_to_blob(FILE *f, struct dt_info *dti, int version)
                         padlen = 0;
                         if (quiet < 1)
                                 fprintf(stderr,
-                                       "Warning: blob size %d >= minimum size %d\n",
+                                       "Warning: blob size %"PRIu32" >= minimum size %d\n",
                                         fdt32_to_cpu(fdt.totalsize), minsize);
                 }
         }
diff --git a/scripts/dtc/libfdt/fdt.c b/scripts/dtc/libfdt/fdt.c

index 7855a17..ae03b11 100644 (file)
--- a/scripts/dtc/libfdt/fdt.c
+++ b/scripts/dtc/libfdt/fdt.c
@@ -55,7 +55,12 @@
  
  #include "libfdt_internal.h"
  
-int fdt_check_header(const void *fdt)
+/*
+ * Minimal sanity check for a read-only tree. fdt_ro_probe_() checks
+ * that the given buffer contains what appears to be a flattened
+ * device tree with sane information in its header.
+ */
+int fdt_ro_probe_(const void *fdt)
  {
         if (fdt_magic(fdt) == FDT_MAGIC) {
                 /* Complete tree */
@@ -74,6 +79,78 @@ int fdt_check_header(const void *fdt)
         return 0;
  }
  
+static int check_off_(uint32_t hdrsize, uint32_t totalsize, uint32_t off)
+{
+       return (off >= hdrsize) && (off <= totalsize);
+}
+
+static int check_block_(uint32_t hdrsize, uint32_t totalsize,
+                       uint32_t base, uint32_t size)
+{
+       if (!check_off_(hdrsize, totalsize, base))
+               return 0; /* block start out of bounds */
+       if ((base + size) < base)
+               return 0; /* overflow */
+       if (!check_off_(hdrsize, totalsize, base + size))
+               return 0; /* block end out of bounds */
+       return 1;
+}
+
+size_t fdt_header_size_(uint32_t version)
+{
+       if (version <= 1)
+               return FDT_V1_SIZE;
+       else if (version <= 2)
+               return FDT_V2_SIZE;
+       else if (version <= 3)
+               return FDT_V3_SIZE;
+       else if (version <= 16)
+               return FDT_V16_SIZE;
+       else
+               return FDT_V17_SIZE;
+}
+
+int fdt_check_header(const void *fdt)
+{
+       size_t hdrsize;
+
+       if (fdt_magic(fdt) != FDT_MAGIC)
+               return -FDT_ERR_BADMAGIC;
+       hdrsize = fdt_header_size(fdt);
+       if ((fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION)
+           || (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION))
+               return -FDT_ERR_BADVERSION;
+       if (fdt_version(fdt) < fdt_last_comp_version(fdt))
+               return -FDT_ERR_BADVERSION;
+
+       if ((fdt_totalsize(fdt) < hdrsize)
+           || (fdt_totalsize(fdt) > INT_MAX))
+               return -FDT_ERR_TRUNCATED;
+
+       /* Bounds check memrsv block */
+       if (!check_off_(hdrsize, fdt_totalsize(fdt), fdt_off_mem_rsvmap(fdt)))
+               return -FDT_ERR_TRUNCATED;
+
+       /* Bounds check structure block */
+       if (fdt_version(fdt) < 17) {
+               if (!check_off_(hdrsize, fdt_totalsize(fdt),
+                               fdt_off_dt_struct(fdt)))
+                       return -FDT_ERR_TRUNCATED;
+       } else {
+               if (!check_block_(hdrsize, fdt_totalsize(fdt),
+                                 fdt_off_dt_struct(fdt),
+                                 fdt_size_dt_struct(fdt)))
+                       return -FDT_ERR_TRUNCATED;
+       }
+
+       /* Bounds check strings block */
+       if (!check_block_(hdrsize, fdt_totalsize(fdt),
+                         fdt_off_dt_strings(fdt), fdt_size_dt_strings(fdt)))
+               return -FDT_ERR_TRUNCATED;
+
+       return 0;
+}
+
  const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len)
  {
         unsigned absoffset = offset + fdt_off_dt_struct(fdt);
@@ -244,7 +321,7 @@ const char *fdt_find_string_(const char *strtab, int tabsize, const char *s)
  
  int fdt_move(const void *fdt, void *buf, int bufsize)
  {
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         if (fdt_totalsize(fdt) > bufsize)
                 return -FDT_ERR_NOSPACE;
diff --git a/scripts/dtc/libfdt/fdt_addresses.c b/scripts/dtc/libfdt/fdt_addresses.c

index eff4dbc..49537b5 100644 (file)
--- a/scripts/dtc/libfdt/fdt_addresses.c
+++ b/scripts/dtc/libfdt/fdt_addresses.c
@@ -1,6 +1,7 @@
  /*
   * libfdt - Flat Device Tree manipulation
   * Copyright (C) 2014 David Gibson <david@gibson.dropbear.id.au>
+ * Copyright (C) 2018 embedded brains GmbH
   *
   * libfdt is dual licensed: you can use it either under the terms of
   * the GPL, or the BSD license, at your option.
@@ -55,42 +56,32 @@
  
  #include "libfdt_internal.h"
  
-int fdt_address_cells(const void *fdt, int nodeoffset)
+static int fdt_cells(const void *fdt, int nodeoffset, const char *name)
  {
-       const fdt32_t *ac;
+       const fdt32_t *c;
         int val;
         int len;
  
-       ac = fdt_getprop(fdt, nodeoffset, "#address-cells", &len);
-       if (!ac)
+       c = fdt_getprop(fdt, nodeoffset, name, &len);
+       if (!c)
                 return 2;
  
-       if (len != sizeof(*ac))
+       if (len != sizeof(*c))
                 return -FDT_ERR_BADNCELLS;
  
-       val = fdt32_to_cpu(*ac);
+       val = fdt32_to_cpu(*c);
         if ((val <= 0) || (val > FDT_MAX_NCELLS))
                 return -FDT_ERR_BADNCELLS;
  
         return val;
  }
  
-int fdt_size_cells(const void *fdt, int nodeoffset)
+int fdt_address_cells(const void *fdt, int nodeoffset)
  {
-       const fdt32_t *sc;
-       int val;
-       int len;
-
-       sc = fdt_getprop(fdt, nodeoffset, "#size-cells", &len);
-       if (!sc)
-               return 2;
-
-       if (len != sizeof(*sc))
-               return -FDT_ERR_BADNCELLS;
-
-       val = fdt32_to_cpu(*sc);
-       if ((val < 0) || (val > FDT_MAX_NCELLS))
-               return -FDT_ERR_BADNCELLS;
+       return fdt_cells(fdt, nodeoffset, "#address-cells");
+}
  
-       return val;
+int fdt_size_cells(const void *fdt, int nodeoffset)
+{
+       return fdt_cells(fdt, nodeoffset, "#size-cells");
  }
diff --git a/scripts/dtc/libfdt/fdt_overlay.c b/scripts/dtc/libfdt/fdt_overlay.c

index bf75388..5fdab6c 100644 (file)
--- a/scripts/dtc/libfdt/fdt_overlay.c
+++ b/scripts/dtc/libfdt/fdt_overlay.c
@@ -697,7 +697,7 @@ static int get_path_len(const void *fdt, int nodeoffset)
         int len = 0, namelen;
         const char *name;
  
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         for (;;) {
                 name = fdt_get_name(fdt, nodeoffset, &namelen);
@@ -866,8 +866,8 @@ int fdt_overlay_apply(void *fdt, void *fdto)
         uint32_t delta = fdt_get_max_phandle(fdt);
         int ret;
  
-       FDT_CHECK_HEADER(fdt);
-       FDT_CHECK_HEADER(fdto);
+       FDT_RO_PROBE(fdt);
+       FDT_RO_PROBE(fdto);
  
         ret = overlay_adjust_local_phandles(fdto, delta);
         if (ret)
diff --git a/scripts/dtc/libfdt/fdt_ro.c b/scripts/dtc/libfdt/fdt_ro.c

index dfb3236..eafc142 100644 (file)
--- a/scripts/dtc/libfdt/fdt_ro.c
+++ b/scripts/dtc/libfdt/fdt_ro.c
@@ -76,17 +76,72 @@ static int fdt_nodename_eq_(const void *fdt, int offset,
                 return 0;
  }
  
+const char *fdt_get_string(const void *fdt, int stroffset, int *lenp)
+{
+       uint32_t absoffset = stroffset + fdt_off_dt_strings(fdt);
+       size_t len;
+       int err;
+       const char *s, *n;
+
+       err = fdt_ro_probe_(fdt);
+       if (err != 0)
+               goto fail;
+
+       err = -FDT_ERR_BADOFFSET;
+       if (absoffset >= fdt_totalsize(fdt))
+               goto fail;
+       len = fdt_totalsize(fdt) - absoffset;
+
+       if (fdt_magic(fdt) == FDT_MAGIC) {
+               if (stroffset < 0)
+                       goto fail;
+               if (fdt_version(fdt) >= 17) {
+                       if (stroffset >= fdt_size_dt_strings(fdt))
+                               goto fail;
+                       if ((fdt_size_dt_strings(fdt) - stroffset) < len)
+                               len = fdt_size_dt_strings(fdt) - stroffset;
+               }
+       } else if (fdt_magic(fdt) == FDT_SW_MAGIC) {
+               if ((stroffset >= 0)
+                   || (stroffset < -fdt_size_dt_strings(fdt)))
+                       goto fail;
+               if ((-stroffset) < len)
+                       len = -stroffset;
+       } else {
+               err = -FDT_ERR_INTERNAL;
+               goto fail;
+       }
+
+       s = (const char *)fdt + absoffset;
+       n = memchr(s, '\0', len);
+       if (!n) {
+               /* missing terminating NULL */
+               err = -FDT_ERR_TRUNCATED;
+               goto fail;
+       }
+
+       if (lenp)
+               *lenp = n - s;
+       return s;
+
+fail:
+       if (lenp)
+               *lenp = err;
+       return NULL;
+}
+
  const char *fdt_string(const void *fdt, int stroffset)
  {
-       return (const char *)fdt + fdt_off_dt_strings(fdt) + stroffset;
+       return fdt_get_string(fdt, stroffset, NULL);
  }
  
  static int fdt_string_eq_(const void *fdt, int stroffset,
                           const char *s, int len)
  {
-       const char *p = fdt_string(fdt, stroffset);
+       int slen;
+       const char *p = fdt_get_string(fdt, stroffset, &slen);
  
-       return (strlen(p) == len) && (memcmp(p, s, len) == 0);
+       return p && (slen == len) && (memcmp(p, s, len) == 0);
  }
  
  uint32_t fdt_get_max_phandle(const void *fdt)
@@ -115,21 +170,42 @@ uint32_t fdt_get_max_phandle(const void *fdt)
         return 0;
  }
  
+static const struct fdt_reserve_entry *fdt_mem_rsv(const void *fdt, int n)
+{
+       int offset = n * sizeof(struct fdt_reserve_entry);
+       int absoffset = fdt_off_mem_rsvmap(fdt) + offset;
+
+       if (absoffset < fdt_off_mem_rsvmap(fdt))
+               return NULL;
+       if (absoffset > fdt_totalsize(fdt) - sizeof(struct fdt_reserve_entry))
+               return NULL;
+       return fdt_mem_rsv_(fdt, n);
+}
+
  int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size)
  {
-       FDT_CHECK_HEADER(fdt);
-       *address = fdt64_to_cpu(fdt_mem_rsv_(fdt, n)->address);
-       *size = fdt64_to_cpu(fdt_mem_rsv_(fdt, n)->size);
+       const struct fdt_reserve_entry *re;
+
+       FDT_RO_PROBE(fdt);
+       re = fdt_mem_rsv(fdt, n);
+       if (!re)
+               return -FDT_ERR_BADOFFSET;
+
+       *address = fdt64_ld(&re->address);
+       *size = fdt64_ld(&re->size);
         return 0;
  }
  
  int fdt_num_mem_rsv(const void *fdt)
  {
-       int i = 0;
+       int i;
+       const struct fdt_reserve_entry *re;
  
-       while (fdt64_to_cpu(fdt_mem_rsv_(fdt, i)->size) != 0)
-               i++;
-       return i;
+       for (i = 0; (re = fdt_mem_rsv(fdt, i)) != NULL; i++) {
+               if (fdt64_ld(&re->size) == 0)
+                       return i;
+       }
+       return -FDT_ERR_TRUNCATED;
  }
  
  static int nextprop_(const void *fdt, int offset)
@@ -161,7 +237,7 @@ int fdt_subnode_offset_namelen(const void *fdt, int offset,
  {
         int depth;
  
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         for (depth = 0;
              (offset >= 0) && (depth >= 0);
@@ -187,7 +263,7 @@ int fdt_path_offset_namelen(const void *fdt, const char *path, int namelen)
         const char *p = path;
         int offset = 0;
  
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         /* see if we have an alias */
         if (*path != '/') {
@@ -237,7 +313,7 @@ const char *fdt_get_name(const void *fdt, int nodeoffset, int *len)
         const char *nameptr;
         int err;
  
-       if (((err = fdt_check_header(fdt)) != 0)
+       if (((err = fdt_ro_probe_(fdt)) != 0)
             || ((err = fdt_check_node_offset_(fdt, nodeoffset)) < 0))
                         goto fail;
  
@@ -303,7 +379,7 @@ static const struct fdt_property *fdt_get_property_by_offset_(const void *fdt,
         prop = fdt_offset_ptr_(fdt, offset);
  
         if (lenp)
-               *lenp = fdt32_to_cpu(prop->len);
+               *lenp = fdt32_ld(&prop->len);
  
         return prop;
  }
@@ -340,7 +416,7 @@ static const struct fdt_property *fdt_get_property_namelen_(const void *fdt,
                         offset = -FDT_ERR_INTERNAL;
                         break;
                 }
-               if (fdt_string_eq_(fdt, fdt32_to_cpu(prop->nameoff),
+               if (fdt_string_eq_(fdt, fdt32_ld(&prop->nameoff),
                                    name, namelen)) {
                         if (poffset)
                                 *poffset = offset;
@@ -393,7 +469,7 @@ const void *fdt_getprop_namelen(const void *fdt, int nodeoffset,
  
         /* Handle realignment */
         if (fdt_version(fdt) < 0x10 && (poffset + sizeof(*prop)) % 8 &&
-           fdt32_to_cpu(prop->len) >= 8)
+           fdt32_ld(&prop->len) >= 8)
                 return prop->data + 4;
         return prop->data;
  }
@@ -406,12 +482,22 @@ const void *fdt_getprop_by_offset(const void *fdt, int offset,
         prop = fdt_get_property_by_offset_(fdt, offset, lenp);
         if (!prop)
                 return NULL;
-       if (namep)
-               *namep = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+       if (namep) {
+               const char *name;
+               int namelen;
+               name = fdt_get_string(fdt, fdt32_ld(&prop->nameoff),
+                                     &namelen);
+               if (!name) {
+                       if (lenp)
+                               *lenp = namelen;
+                       return NULL;
+               }
+               *namep = name;
+       }
  
         /* Handle realignment */
         if (fdt_version(fdt) < 0x10 && (offset + sizeof(*prop)) % 8 &&
-           fdt32_to_cpu(prop->len) >= 8)
+           fdt32_ld(&prop->len) >= 8)
                 return prop->data + 4;
         return prop->data;
  }
@@ -436,7 +522,7 @@ uint32_t fdt_get_phandle(const void *fdt, int nodeoffset)
                         return 0;
         }
  
-       return fdt32_to_cpu(*php);
+       return fdt32_ld(php);
  }
  
  const char *fdt_get_alias_namelen(const void *fdt,
@@ -462,7 +548,7 @@ int fdt_get_path(const void *fdt, int nodeoffset, char *buf, int buflen)
         int offset, depth, namelen;
         const char *name;
  
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         if (buflen < 2)
                 return -FDT_ERR_NOSPACE;
@@ -514,7 +600,7 @@ int fdt_supernode_atdepth_offset(const void *fdt, int nodeoffset,
         int offset, depth;
         int supernodeoffset = -FDT_ERR_INTERNAL;
  
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         if (supernodedepth < 0)
                 return -FDT_ERR_NOTFOUND;
@@ -573,7 +659,7 @@ int fdt_node_offset_by_prop_value(const void *fdt, int startoffset,
         const void *val;
         int len;
  
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         /* FIXME: The algorithm here is pretty horrible: we scan each
          * property of a node in fdt_getprop(), then if that didn't
@@ -599,7 +685,7 @@ int fdt_node_offset_by_phandle(const void *fdt, uint32_t phandle)
         if ((phandle == 0) || (phandle == -1))
                 return -FDT_ERR_BADPHANDLE;
  
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         /* FIXME: The algorithm here is pretty horrible: we
          * potentially scan each property of a node in
@@ -752,7 +838,7 @@ int fdt_node_offset_by_compatible(const void *fdt, int startoffset,
  {
         int offset, err;
  
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         /* FIXME: The algorithm here is pretty horrible: we scan each
          * property of a node in fdt_node_check_compatible(), then if
@@ -771,3 +857,66 @@ int fdt_node_offset_by_compatible(const void *fdt, int startoffset,
  
         return offset; /* error from fdt_next_node() */
  }
+
+int fdt_check_full(const void *fdt, size_t bufsize)
+{
+       int err;
+       int num_memrsv;
+       int offset, nextoffset = 0;
+       uint32_t tag;
+       unsigned depth = 0;
+       const void *prop;
+       const char *propname;
+
+       if (bufsize < FDT_V1_SIZE)
+               return -FDT_ERR_TRUNCATED;
+       err = fdt_check_header(fdt);
+       if (err != 0)
+               return err;
+       if (bufsize < fdt_totalsize(fdt))
+               return -FDT_ERR_TRUNCATED;
+
+       num_memrsv = fdt_num_mem_rsv(fdt);
+       if (num_memrsv < 0)
+               return num_memrsv;
+
+       while (1) {
+               offset = nextoffset;
+               tag = fdt_next_tag(fdt, offset, &nextoffset);
+
+               if (nextoffset < 0)
+                       return nextoffset;
+
+               switch (tag) {
+               case FDT_NOP:
+                       break;
+
+               case FDT_END:
+                       if (depth != 0)
+                               return -FDT_ERR_BADSTRUCTURE;
+                       return 0;
+
+               case FDT_BEGIN_NODE:
+                       depth++;
+                       if (depth > INT_MAX)
+                               return -FDT_ERR_BADSTRUCTURE;
+                       break;
+
+               case FDT_END_NODE:
+                       if (depth == 0)
+                               return -FDT_ERR_BADSTRUCTURE;
+                       depth--;
+                       break;
+
+               case FDT_PROP:
+                       prop = fdt_getprop_by_offset(fdt, offset, &propname,
+                                                    &err);
+                       if (!prop)
+                               return err;
+                       break;
+
+               default:
+                       return -FDT_ERR_INTERNAL;
+               }
+       }
+}
diff --git a/scripts/dtc/libfdt/fdt_rw.c b/scripts/dtc/libfdt/fdt_rw.c

index 9b82905..2e49855 100644 (file)
--- a/scripts/dtc/libfdt/fdt_rw.c
+++ b/scripts/dtc/libfdt/fdt_rw.c
@@ -67,9 +67,9 @@ static int fdt_blocks_misordered_(const void *fdt,
                     (fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt)));
  }
  
-static int fdt_rw_check_header_(void *fdt)
+static int fdt_rw_probe_(void *fdt)
  {
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         if (fdt_version(fdt) < 17)
                 return -FDT_ERR_BADVERSION;
@@ -82,10 +82,10 @@ static int fdt_rw_check_header_(void *fdt)
         return 0;
  }
  
-#define FDT_RW_CHECK_HEADER(fdt) \
+#define FDT_RW_PROBE(fdt) \
         { \
                 int err_; \
-               if ((err_ = fdt_rw_check_header_(fdt)) != 0) \
+               if ((err_ = fdt_rw_probe_(fdt)) != 0) \
                         return err_; \
         }
  
@@ -176,7 +176,7 @@ int fdt_add_mem_rsv(void *fdt, uint64_t address, uint64_t size)
         struct fdt_reserve_entry *re;
         int err;
  
-       FDT_RW_CHECK_HEADER(fdt);
+       FDT_RW_PROBE(fdt);
  
         re = fdt_mem_rsv_w_(fdt, fdt_num_mem_rsv(fdt));
         err = fdt_splice_mem_rsv_(fdt, re, 0, 1);
@@ -192,7 +192,7 @@ int fdt_del_mem_rsv(void *fdt, int n)
  {
         struct fdt_reserve_entry *re = fdt_mem_rsv_w_(fdt, n);
  
-       FDT_RW_CHECK_HEADER(fdt);
+       FDT_RW_PROBE(fdt);
  
         if (n >= fdt_num_mem_rsv(fdt))
                 return -FDT_ERR_NOTFOUND;
@@ -252,7 +252,7 @@ int fdt_set_name(void *fdt, int nodeoffset, const char *name)
         int oldlen, newlen;
         int err;
  
-       FDT_RW_CHECK_HEADER(fdt);
+       FDT_RW_PROBE(fdt);
  
         namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen);
         if (!namep)
@@ -275,7 +275,7 @@ int fdt_setprop_placeholder(void *fdt, int nodeoffset, const char *name,
         struct fdt_property *prop;
         int err;
  
-       FDT_RW_CHECK_HEADER(fdt);
+       FDT_RW_PROBE(fdt);
  
         err = fdt_resize_property_(fdt, nodeoffset, name, len, &prop);
         if (err == -FDT_ERR_NOTFOUND)
@@ -308,7 +308,7 @@ int fdt_appendprop(void *fdt, int nodeoffset, const char *name,
         struct fdt_property *prop;
         int err, oldlen, newlen;
  
-       FDT_RW_CHECK_HEADER(fdt);
+       FDT_RW_PROBE(fdt);
  
         prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen);
         if (prop) {
@@ -334,7 +334,7 @@ int fdt_delprop(void *fdt, int nodeoffset, const char *name)
         struct fdt_property *prop;
         int len, proplen;
  
-       FDT_RW_CHECK_HEADER(fdt);
+       FDT_RW_PROBE(fdt);
  
         prop = fdt_get_property_w(fdt, nodeoffset, name, &len);
         if (!prop)
@@ -354,7 +354,7 @@ int fdt_add_subnode_namelen(void *fdt, int parentoffset,
         uint32_t tag;
         fdt32_t *endtag;
  
-       FDT_RW_CHECK_HEADER(fdt);
+       FDT_RW_PROBE(fdt);
  
         offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen);
         if (offset >= 0)
@@ -394,7 +394,7 @@ int fdt_del_node(void *fdt, int nodeoffset)
  {
         int endoffset;
  
-       FDT_RW_CHECK_HEADER(fdt);
+       FDT_RW_PROBE(fdt);
  
         endoffset = fdt_node_end_offset_(fdt, nodeoffset);
         if (endoffset < 0)
@@ -435,7 +435,7 @@ int fdt_open_into(const void *fdt, void *buf, int bufsize)
         const char *fdtend = fdtstart + fdt_totalsize(fdt);
         char *tmp;
  
-       FDT_CHECK_HEADER(fdt);
+       FDT_RO_PROBE(fdt);
  
         mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
                 * sizeof(struct fdt_reserve_entry);
@@ -494,7 +494,7 @@ int fdt_pack(void *fdt)
  {
         int mem_rsv_size;
  
-       FDT_RW_CHECK_HEADER(fdt);
+       FDT_RW_PROBE(fdt);
  
         mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
                 * sizeof(struct fdt_reserve_entry);
diff --git a/scripts/dtc/libfdt/fdt_sw.c b/scripts/dtc/libfdt/fdt_sw.c

index 6d33cc2..9fa4a94 100644 (file)
--- a/scripts/dtc/libfdt/fdt_sw.c
+++ b/scripts/dtc/libfdt/fdt_sw.c
@@ -55,21 +55,77 @@
  
  #include "libfdt_internal.h"
  
-static int fdt_sw_check_header_(void *fdt)
+static int fdt_sw_probe_(void *fdt)
  {
-       if (fdt_magic(fdt) != FDT_SW_MAGIC)
+       if (fdt_magic(fdt) == FDT_MAGIC)
+               return -FDT_ERR_BADSTATE;
+       else if (fdt_magic(fdt) != FDT_SW_MAGIC)
                 return -FDT_ERR_BADMAGIC;
-       /* FIXME: should check more details about the header state */
         return 0;
  }
  
-#define FDT_SW_CHECK_HEADER(fdt) \
+#define FDT_SW_PROBE(fdt) \
+       { \
+               int err; \
+               if ((err = fdt_sw_probe_(fdt)) != 0) \
+                       return err; \
+       }
+
+/* 'memrsv' state:     Initial state after fdt_create()
+ *
+ * Allowed functions:
+ *     fdt_add_reservmap_entry()
+ *     fdt_finish_reservemap()         [moves to 'struct' state]
+ */
+static int fdt_sw_probe_memrsv_(void *fdt)
+{
+       int err = fdt_sw_probe_(fdt);
+       if (err)
+               return err;
+
+       if (fdt_off_dt_strings(fdt) != 0)
+               return -FDT_ERR_BADSTATE;
+       return 0;
+}
+
+#define FDT_SW_PROBE_MEMRSV(fdt) \
+       { \
+               int err; \
+               if ((err = fdt_sw_probe_memrsv_(fdt)) != 0) \
+                       return err; \
+       }
+
+/* 'struct' state:     Enter this state after fdt_finish_reservemap()
+ *
+ * Allowed functions:
+ *     fdt_begin_node()
+ *     fdt_end_node()
+ *     fdt_property*()
+ *     fdt_finish()                    [moves to 'complete' state]
+ */
+static int fdt_sw_probe_struct_(void *fdt)
+{
+       int err = fdt_sw_probe_(fdt);
+       if (err)
+               return err;
+
+       if (fdt_off_dt_strings(fdt) != fdt_totalsize(fdt))
+               return -FDT_ERR_BADSTATE;
+       return 0;
+}
+
+#define FDT_SW_PROBE_STRUCT(fdt) \
         { \
                 int err; \
-               if ((err = fdt_sw_check_header_(fdt)) != 0) \
+               if ((err = fdt_sw_probe_struct_(fdt)) != 0) \
                         return err; \
         }
  
+/* 'complete' state:   Enter this state after fdt_finish()
+ *
+ * Allowed functions: none
+ */
+
  static void *fdt_grab_space_(void *fdt, size_t len)
  {
         int offset = fdt_size_dt_struct(fdt);
@@ -87,9 +143,11 @@ static void *fdt_grab_space_(void *fdt, size_t len)
  
  int fdt_create(void *buf, int bufsize)
  {
+       const size_t hdrsize = FDT_ALIGN(sizeof(struct fdt_header),
+                                        sizeof(struct fdt_reserve_entry));
         void *fdt = buf;
  
-       if (bufsize < sizeof(struct fdt_header))
+       if (bufsize < hdrsize)
                 return -FDT_ERR_NOSPACE;
  
         memset(buf, 0, bufsize);
@@ -99,10 +157,9 @@ int fdt_create(void *buf, int bufsize)
         fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION);
         fdt_set_totalsize(fdt,  bufsize);
  
-       fdt_set_off_mem_rsvmap(fdt, FDT_ALIGN(sizeof(struct fdt_header),
-                                             sizeof(struct fdt_reserve_entry)));
+       fdt_set_off_mem_rsvmap(fdt, hdrsize);
         fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt));
-       fdt_set_off_dt_strings(fdt, bufsize);
+       fdt_set_off_dt_strings(fdt, 0);
  
         return 0;
  }
@@ -112,11 +169,14 @@ int fdt_resize(void *fdt, void *buf, int bufsize)
         size_t headsize, tailsize;
         char *oldtail, *newtail;
  
-       FDT_SW_CHECK_HEADER(fdt);
+       FDT_SW_PROBE(fdt);
  
-       headsize = fdt_off_dt_struct(fdt);
+       headsize = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt);
         tailsize = fdt_size_dt_strings(fdt);
  
+       if ((headsize + tailsize) > fdt_totalsize(fdt))
+               return -FDT_ERR_INTERNAL;
+
         if ((headsize + tailsize) > bufsize)
                 return -FDT_ERR_NOSPACE;
  
@@ -133,8 +193,9 @@ int fdt_resize(void *fdt, void *buf, int bufsize)
                 memmove(buf, fdt, headsize);
         }
  
-       fdt_set_off_dt_strings(buf, bufsize);
         fdt_set_totalsize(buf, bufsize);
+       if (fdt_off_dt_strings(buf))
+               fdt_set_off_dt_strings(buf, bufsize);
  
         return 0;
  }
@@ -144,10 +205,7 @@ int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size)
         struct fdt_reserve_entry *re;
         int offset;
  
-       FDT_SW_CHECK_HEADER(fdt);
-
-       if (fdt_size_dt_struct(fdt))
-               return -FDT_ERR_BADSTATE;
+       FDT_SW_PROBE_MEMRSV(fdt);
  
         offset = fdt_off_dt_struct(fdt);
         if ((offset + sizeof(*re)) > fdt_totalsize(fdt))
@@ -164,16 +222,23 @@ int fdt_add_reservemap_entry(void *fdt, uint64_t addr, uint64_t size)
  
  int fdt_finish_reservemap(void *fdt)
  {
-       return fdt_add_reservemap_entry(fdt, 0, 0);
+       int err = fdt_add_reservemap_entry(fdt, 0, 0);
+
+       if (err)
+               return err;
+
+       fdt_set_off_dt_strings(fdt, fdt_totalsize(fdt));
+       return 0;
  }
  
  int fdt_begin_node(void *fdt, const char *name)
  {
         struct fdt_node_header *nh;
-       int namelen = strlen(name) + 1;
+       int namelen;
  
-       FDT_SW_CHECK_HEADER(fdt);
+       FDT_SW_PROBE_STRUCT(fdt);
  
+       namelen = strlen(name) + 1;
         nh = fdt_grab_space_(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen));
         if (! nh)
                 return -FDT_ERR_NOSPACE;
@@ -187,7 +252,7 @@ int fdt_end_node(void *fdt)
  {
         fdt32_t *en;
  
-       FDT_SW_CHECK_HEADER(fdt);
+       FDT_SW_PROBE_STRUCT(fdt);
  
         en = fdt_grab_space_(fdt, FDT_TAGSIZE);
         if (! en)
@@ -225,7 +290,7 @@ int fdt_property_placeholder(void *fdt, const char *name, int len, void **valp)
         struct fdt_property *prop;
         int nameoff;
  
-       FDT_SW_CHECK_HEADER(fdt);
+       FDT_SW_PROBE_STRUCT(fdt);
  
         nameoff = fdt_find_add_string_(fdt, name);
         if (nameoff == 0)
@@ -262,7 +327,7 @@ int fdt_finish(void *fdt)
         uint32_t tag;
         int offset, nextoffset;
  
-       FDT_SW_CHECK_HEADER(fdt);
+       FDT_SW_PROBE_STRUCT(fdt);
  
         /* Add terminator */
         end = fdt_grab_space_(fdt, sizeof(*end));
diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h

index 1e27780..2bd151d 100644 (file)
--- a/scripts/dtc/libfdt/libfdt.h
+++ b/scripts/dtc/libfdt/libfdt.h
@@ -90,8 +90,9 @@
  
  /* Error codes: codes for bad device tree blobs */
  #define FDT_ERR_TRUNCATED      8
-       /* FDT_ERR_TRUNCATED: Structure block of the given device tree
-        * ends without an FDT_END tag. */
+       /* FDT_ERR_TRUNCATED: FDT or a sub-block is improperly
+        * terminated (overflows, goes outside allowed bounds, or
+        * isn't properly terminated).  */
  #define FDT_ERR_BADMAGIC       9
         /* FDT_ERR_BADMAGIC: Given "device tree" appears not to be a
          * device tree at all - it is missing the flattened device
@@ -153,6 +154,29 @@ static inline void *fdt_offset_ptr_w(void *fdt, int offset, int checklen)
  
  uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
  
+/*
+ * Alignment helpers:
+ *     These helpers access words from a device tree blob.  They're
+ *     built to work even with unaligned pointers on platforms (ike
+ *     ARM) that don't like unaligned loads and stores
+ */
+
+static inline uint32_t fdt32_ld(const fdt32_t *p)
+{
+       fdt32_t v;
+
+       memcpy(&v, p, sizeof(v));
+       return fdt32_to_cpu(v);
+}
+
+static inline uint64_t fdt64_ld(const fdt64_t *p)
+{
+       fdt64_t v;
+
+       memcpy(&v, p, sizeof(v));
+       return fdt64_to_cpu(v);
+}
+
  /**********************************************************************/
  /* Traversal functions                                                */
  /**********************************************************************/
@@ -213,7 +237,7 @@ int fdt_next_subnode(const void *fdt, int offset);
  /* General functions                                                  */
  /**********************************************************************/
  #define fdt_get_header(fdt, field) \
-       (fdt32_to_cpu(((const struct fdt_header *)(fdt))->field))
+       (fdt32_ld(&((const struct fdt_header *)(fdt))->field))
  #define fdt_magic(fdt)                 (fdt_get_header(fdt, magic))
  #define fdt_totalsize(fdt)             (fdt_get_header(fdt, totalsize))
  #define fdt_off_dt_struct(fdt)         (fdt_get_header(fdt, off_dt_struct))
@@ -244,18 +268,31 @@ fdt_set_hdr_(size_dt_struct);
  #undef fdt_set_hdr_
  
  /**
- * fdt_check_header - sanity check a device tree or possible device tree
+ * fdt_header_size - return the size of the tree's header
+ * @fdt: pointer to a flattened device tree
+ */
+size_t fdt_header_size_(uint32_t version);
+static inline size_t fdt_header_size(const void *fdt)
+{
+       return fdt_header_size_(fdt_version(fdt));
+}
+
+/**
+ * fdt_check_header - sanity check a device tree header
+
   * @fdt: pointer to data which might be a flattened device tree
   *
   * fdt_check_header() checks that the given buffer contains what
- * appears to be a flattened device tree with sane information in its
- * header.
+ * appears to be a flattened device tree, and that the header contains
+ * valid information (to the extent that can be determined from the
+ * header alone).
   *
   * returns:
   *     0, if the buffer appears to contain a valid device tree
   *     -FDT_ERR_BADMAGIC,
   *     -FDT_ERR_BADVERSION,
- *     -FDT_ERR_BADSTATE, standard meanings, as above
+ *     -FDT_ERR_BADSTATE,
+ *     -FDT_ERR_TRUNCATED, standard meanings, as above
   */
  int fdt_check_header(const void *fdt);
  
@@ -284,6 +321,24 @@ int fdt_move(const void *fdt, void *buf, int bufsize);
  /* Read-only functions                                                */
  /**********************************************************************/
  
+int fdt_check_full(const void *fdt, size_t bufsize);
+
+/**
+ * fdt_get_string - retrieve a string from the strings block of a device tree
+ * @fdt: pointer to the device tree blob
+ * @stroffset: offset of the string within the strings block (native endian)
+ * @lenp: optional pointer to return the string's length
+ *
+ * fdt_get_string() retrieves a pointer to a single string from the
+ * strings block of the device tree blob at fdt, and optionally also
+ * returns the string's length in *lenp.
+ *
+ * returns:
+ *     a pointer to the string, on success
+ *     NULL, if stroffset is out of bounds, or doesn't point to a valid string
+ */
+const char *fdt_get_string(const void *fdt, int stroffset, int *lenp);
+
  /**
   * fdt_string - retrieve a string from the strings block of a device tree
   * @fdt: pointer to the device tree blob
@@ -294,7 +349,7 @@ int fdt_move(const void *fdt, void *buf, int bufsize);
   *
   * returns:
   *     a pointer to the string, on success
- *     NULL, if stroffset is out of bounds
+ *     NULL, if stroffset is out of bounds, or doesn't point to a valid string
   */
  const char *fdt_string(const void *fdt, int stroffset);
  
@@ -1090,7 +1145,7 @@ int fdt_address_cells(const void *fdt, int nodeoffset);
   *
   * returns:
   *     0 <= n < FDT_MAX_NCELLS, on success
- *      2, if the node has no #address-cells property
+ *      2, if the node has no #size-cells property
   *      -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid
   *             #size-cells property
   *     -FDT_ERR_BADMAGIC,
@@ -1313,10 +1368,13 @@ static inline int fdt_property_u64(void *fdt, const char *name, uint64_t val)
         fdt64_t tmp = cpu_to_fdt64(val);
         return fdt_property(fdt, name, &tmp, sizeof(tmp));
  }
+
+#ifndef SWIG /* Not available in Python */
  static inline int fdt_property_cell(void *fdt, const char *name, uint32_t val)
  {
         return fdt_property_u32(fdt, name, val);
  }
+#endif
  
  /**
   * fdt_property_placeholder - add a new property and return a ptr to its value
diff --git a/scripts/dtc/libfdt/libfdt_env.h b/scripts/dtc/libfdt/libfdt_env.h

index bd24746..eb20538 100644 (file)
--- a/scripts/dtc/libfdt/libfdt_env.h
+++ b/scripts/dtc/libfdt/libfdt_env.h
@@ -56,6 +56,7 @@
  #include <stdint.h>
  #include <stdlib.h>
  #include <string.h>
+#include <limits.h>
  
  #ifdef __CHECKER__
  #define FDT_FORCE __attribute__((force))
diff --git a/scripts/dtc/libfdt/libfdt_internal.h b/scripts/dtc/libfdt/libfdt_internal.h

index 7681e19..4109f89 100644 (file)
--- a/scripts/dtc/libfdt/libfdt_internal.h
+++ b/scripts/dtc/libfdt/libfdt_internal.h
@@ -55,10 +55,11 @@
  #define FDT_ALIGN(x, a)                (((x) + (a) - 1) & ~((a) - 1))
  #define FDT_TAGALIGN(x)                (FDT_ALIGN((x), FDT_TAGSIZE))
  
-#define FDT_CHECK_HEADER(fdt) \
+int fdt_ro_probe_(const void *fdt);
+#define FDT_RO_PROBE(fdt)                      \
         { \
                 int err_; \
-               if ((err_ = fdt_check_header(fdt)) != 0) \
+               if ((err_ = fdt_ro_probe_(fdt)) != 0)   \
                         return err_; \
         }
  
diff --git a/scripts/dtc/livetree.c b/scripts/dtc/livetree.c

index 6e4c367..4ff0679 100644 (file)
--- a/scripts/dtc/livetree.c
+++ b/scripts/dtc/livetree.c
@@ -594,6 +594,7 @@ struct node *get_node_by_ref(struct node *tree, const char *ref)
  cell_t get_node_phandle(struct node *root, struct node *node)
  {
         static cell_t phandle = 1; /* FIXME: ick, static local */
+       struct data d = empty_data;
  
         if ((node->phandle != 0) && (node->phandle != -1))
                 return node->phandle;
@@ -603,17 +604,16 @@ cell_t get_node_phandle(struct node *root, struct node *node)
  
         node->phandle = phandle;
  
+       d = data_add_marker(d, TYPE_UINT32, NULL);
+       d = data_append_cell(d, phandle);
+
         if (!get_property(node, "linux,phandle")
             && (phandle_format & PHANDLE_LEGACY))
-               add_property(node,
-                            build_property("linux,phandle",
-                                           data_append_cell(empty_data, phandle)));
+               add_property(node, build_property("linux,phandle", d));
  
         if (!get_property(node, "phandle")
             && (phandle_format & PHANDLE_EPAPR))
-               add_property(node,
-                            build_property("phandle",
-                                           data_append_cell(empty_data, phandle)));
+               add_property(node, build_property("phandle", d));
  
         /* If the node *does* have a phandle property, we must
          * be dealing with a self-referencing phandle, which will be
diff --git a/scripts/dtc/treesource.c b/scripts/dtc/treesource.c

index 2461a3d..f2874f1 100644 (file)
--- a/scripts/dtc/treesource.c
+++ b/scripts/dtc/treesource.c
@@ -61,24 +61,14 @@ static bool isstring(char c)
                 || strchr("\a\b\t\n\v\f\r", c));
  }
  
-static void write_propval_string(FILE *f, struct data val)
+static void write_propval_string(FILE *f, const char *s, size_t len)
  {
-       const char *str = val.val;
-       int i;
-       struct marker *m = val.markers;
-
-       assert(str[val.len-1] == '\0');
+       const char *end = s + len - 1;
+       assert(*end == '\0');
  
-       while (m && (m->offset == 0)) {
-               if (m->type == LABEL)
-                       fprintf(f, "%s: ", m->ref);
-               m = m->next;
-       }
         fprintf(f, "\"");
-
-       for (i = 0; i < (val.len-1); i++) {
-               char c = str[i];
-
+       while (s < end) {
+               char c = *s++;
                 switch (c) {
                 case '\a':
                         fprintf(f, "\\a");
@@ -108,91 +98,78 @@ static void write_propval_string(FILE *f, struct data val)
                         fprintf(f, "\\\"");
                         break;
                 case '\0':
-                       fprintf(f, "\", ");
-                       while (m && (m->offset <= (i + 1))) {
-                               if (m->type == LABEL) {
-                                       assert(m->offset == (i+1));
-                                       fprintf(f, "%s: ", m->ref);
-                               }
-                               m = m->next;
-                       }
-                       fprintf(f, "\"");
+                       fprintf(f, "\\0");
                         break;
                 default:
                         if (isprint((unsigned char)c))
                                 fprintf(f, "%c", c);
                         else
-                               fprintf(f, "\\x%02hhx", c);
+                               fprintf(f, "\\x%02"PRIx8, c);
                 }
         }
         fprintf(f, "\"");
-
-       /* Wrap up any labels at the end of the value */
-       for_each_marker_of_type(m, LABEL) {
-               assert (m->offset == val.len);
-               fprintf(f, " %s:", m->ref);
-       }
  }
  
-static void write_propval_cells(FILE *f, struct data val)
+static void write_propval_int(FILE *f, const char *p, size_t len, size_t width)
  {
-       void *propend = val.val + val.len;
-       fdt32_t *cp = (fdt32_t *)val.val;
-       struct marker *m = val.markers;
-
-       fprintf(f, "<");
-       for (;;) {
-               while (m && (m->offset <= ((char *)cp - val.val))) {
-                       if (m->type == LABEL) {
-                               assert(m->offset == ((char *)cp - val.val));
-                               fprintf(f, "%s: ", m->ref);
-                       }
-                       m = m->next;
-               }
+       const char *end = p + len;
+       assert(len % width == 0);
  
-               fprintf(f, "0x%x", fdt32_to_cpu(*cp++));
-               if ((void *)cp >= propend)
+       for (; p < end; p += width) {
+               switch (width) {
+               case 1:
+                       fprintf(f, " %02"PRIx8, *(const uint8_t*)p);
+                       break;
+               case 2:
+                       fprintf(f, " 0x%02"PRIx16, fdt16_to_cpu(*(const fdt16_t*)p));
+                       break;
+               case 4:
+                       fprintf(f, " 0x%02"PRIx32, fdt32_to_cpu(*(const fdt32_t*)p));
+                       break;
+               case 8:
+                       fprintf(f, " 0x%02"PRIx64, fdt64_to_cpu(*(const fdt64_t*)p));
                         break;
-               fprintf(f, " ");
+               }
         }
+}
  
-       /* Wrap up any labels at the end of the value */
-       for_each_marker_of_type(m, LABEL) {
-               assert (m->offset == val.len);
-               fprintf(f, " %s:", m->ref);
-       }
-       fprintf(f, ">");
+static bool has_data_type_information(struct marker *m)
+{
+       return m->type >= TYPE_UINT8;
  }
  
-static void write_propval_bytes(FILE *f, struct data val)
+static struct marker *next_type_marker(struct marker *m)
  {
-       void *propend = val.val + val.len;
-       const char *bp = val.val;
-       struct marker *m = val.markers;
-
-       fprintf(f, "[");
-       for (;;) {
-               while (m && (m->offset == (bp-val.val))) {
-                       if (m->type == LABEL)
-                               fprintf(f, "%s: ", m->ref);
-                       m = m->next;
-               }
+       while (m && !has_data_type_information(m))
+               m = m->next;
+       return m;
+}
  
-               fprintf(f, "%02hhx", (unsigned char)(*bp++));
-               if ((const void *)bp >= propend)
-                       break;
-               fprintf(f, " ");
-       }
+size_t type_marker_length(struct marker *m)
+{
+       struct marker *next = next_type_marker(m->next);
  
-       /* Wrap up any labels at the end of the value */
-       for_each_marker_of_type(m, LABEL) {
-               assert (m->offset == val.len);
-               fprintf(f, " %s:", m->ref);
-       }
-       fprintf(f, "]");
+       if (next)
+               return next->offset - m->offset;
+       return 0;
  }
  
-static void write_propval(FILE *f, struct property *prop)
+static const char *delim_start[] = {
+       [TYPE_UINT8] = "[",
+       [TYPE_UINT16] = "/bits/ 16 <",
+       [TYPE_UINT32] = "<",
+       [TYPE_UINT64] = "/bits/ 64 <",
+       [TYPE_STRING] = "",
+};
+static const char *delim_end[] = {
+       [TYPE_UINT8] = " ]",
+       [TYPE_UINT16] = " >",
+       [TYPE_UINT32] = " >",
+       [TYPE_UINT64] = " >",
+       [TYPE_STRING] = "",
+};
+
+static enum markertype guess_value_type(struct property *prop)
  {
         int len = prop->val.len;
         const char *p = prop->val.val;
@@ -201,11 +178,6 @@ static void write_propval(FILE *f, struct property *prop)
         int nnotstringlbl = 0, nnotcelllbl = 0;
         int i;
  
-       if (len == 0) {
-               fprintf(f, ";\n");
-               return;
-       }
-
         for (i = 0; i < len; i++) {
                 if (! isstring(p[i]))
                         nnotstring++;
@@ -220,17 +192,91 @@ static void write_propval(FILE *f, struct property *prop)
                         nnotcelllbl++;
         }
  
-       fprintf(f, " = ");
         if ((p[len-1] == '\0') && (nnotstring == 0) && (nnul < (len-nnul))
             && (nnotstringlbl == 0)) {
-               write_propval_string(f, prop->val);
+               return TYPE_STRING;
         } else if (((len % sizeof(cell_t)) == 0) && (nnotcelllbl == 0)) {
-               write_propval_cells(f, prop->val);
-       } else {
-               write_propval_bytes(f, prop->val);
+               return TYPE_UINT32;
         }
  
-       fprintf(f, ";\n");
+       return TYPE_UINT8;
+}
+
+static void write_propval(FILE *f, struct property *prop)
+{
+       size_t len = prop->val.len;
+       struct marker *m = prop->val.markers;
+       struct marker dummy_marker;
+       enum markertype emit_type = TYPE_NONE;
+
+       if (len == 0) {
+               fprintf(f, ";\n");
+               return;
+       }
+
+       fprintf(f, " = ");
+
+       if (!next_type_marker(m)) {
+               /* data type information missing, need to guess */
+               dummy_marker.type = guess_value_type(prop);
+               dummy_marker.next = prop->val.markers;
+               dummy_marker.offset = 0;
+               dummy_marker.ref = NULL;
+               m = &dummy_marker;
+       }
+
+       struct marker *m_label = prop->val.markers;
+       for_each_marker(m) {
+               size_t chunk_len;
+               const char *p = &prop->val.val[m->offset];
+
+               if (!has_data_type_information(m))
+                       continue;
+
+               chunk_len = type_marker_length(m);
+               if (!chunk_len)
+                       chunk_len = len - m->offset;
+
+               if (emit_type != TYPE_NONE)
+                       fprintf(f, "%s, ", delim_end[emit_type]);
+               emit_type = m->type;
+
+               for_each_marker_of_type(m_label, LABEL) {
+                       if (m_label->offset > m->offset)
+                               break;
+                       fprintf(f, "%s: ", m_label->ref);
+               }
+
+               fprintf(f, "%s", delim_start[emit_type]);
+
+               if (chunk_len <= 0)
+                       continue;
+
+               switch(emit_type) {
+               case TYPE_UINT16:
+                       write_propval_int(f, p, chunk_len, 2);
+                       break;
+               case TYPE_UINT32:
+                       write_propval_int(f, p, chunk_len, 4);
+                       break;
+               case TYPE_UINT64:
+                       write_propval_int(f, p, chunk_len, 8);
+                       break;
+               case TYPE_STRING:
+                       write_propval_string(f, p, chunk_len);
+                       break;
+               default:
+                       write_propval_int(f, p, chunk_len, 1);
+               }
+       }
+
+       /* Wrap up any labels at the end of the value */
+       for_each_marker_of_type(m_label, LABEL) {
+               assert (m_label->offset == len);
+               fprintf(f, " %s:", m_label->ref);
+       }
+
+       fprintf(f, "%s;\n", delim_end[emit_type] ? : "");
  }
  
  static void write_tree_source_node(FILE *f, struct node *tree, int level)
@@ -281,4 +327,3 @@ void dt_to_source(FILE *f, struct dt_info *dti)
  
         write_tree_source_node(f, dti->dt, 0);
  }
-
diff --git a/scripts/dtc/update-dtc-source.sh b/scripts/dtc/update-dtc-source.sh

index 1a009fd..7dd29a0 100755 (executable)
--- a/scripts/dtc/update-dtc-source.sh
+++ b/scripts/dtc/update-dtc-source.sh
@@ -32,7 +32,7 @@ DTC_UPSTREAM_PATH=`pwd`/../dtc
  DTC_LINUX_PATH=`pwd`/scripts/dtc
  
  DTC_SOURCE="checks.c data.c dtc.c dtc.h flattree.c fstree.c livetree.c srcpos.c \
-               srcpos.h treesource.c util.c util.h version_gen.h Makefile.dtc \
+               srcpos.h treesource.c util.c util.h version_gen.h yamltree.c Makefile.dtc \
                 dtc-lexer.l dtc-parser.y"
  LIBFDT_SOURCE="Makefile.libfdt fdt.c fdt.h fdt_addresses.c fdt_empty_tree.c \
                 fdt_overlay.c fdt_ro.c fdt_rw.c fdt_strerror.c fdt_sw.c \
diff --git a/scripts/dtc/util.c b/scripts/dtc/util.c

index 9953c32..a69b7a1 100644 (file)
--- a/scripts/dtc/util.c
+++ b/scripts/dtc/util.c
@@ -227,11 +227,11 @@ char get_escape_char(const char *s, int *i)
         return val;
  }
  
-int utilfdt_read_err_len(const char *filename, char **buffp, off_t *len)
+int utilfdt_read_err(const char *filename, char **buffp, size_t *len)
  {
         int fd = 0;     /* assume stdin */
         char *buf = NULL;
-       off_t bufsize = 1024, offset = 0;
+       size_t bufsize = 1024, offset = 0;
         int ret = 0;
  
         *buffp = NULL;
@@ -264,20 +264,15 @@ int utilfdt_read_err_len(const char *filename, char **buffp, off_t *len)
                 free(buf);
         else
                 *buffp = buf;
-       *len = bufsize;
+       if (len)
+               *len = bufsize;
         return ret;
  }
  
-int utilfdt_read_err(const char *filename, char **buffp)
-{
-       off_t len;
-       return utilfdt_read_err_len(filename, buffp, &len);
-}
-
-char *utilfdt_read_len(const char *filename, off_t *len)
+char *utilfdt_read(const char *filename, size_t *len)
  {
         char *buff;
-       int ret = utilfdt_read_err_len(filename, &buff, len);
+       int ret = utilfdt_read_err(filename, &buff, len);
  
         if (ret) {
                 fprintf(stderr, "Couldn't open blob from '%s': %s\n", filename,
@@ -288,12 +283,6 @@ char *utilfdt_read_len(const char *filename, off_t *len)
         return buff;
  }
  
-char *utilfdt_read(const char *filename)
-{
-       off_t len;
-       return utilfdt_read_len(filename, &len);
-}
-
  int utilfdt_write_err(const char *filename, const void *blob)
  {
         int fd = 1;     /* assume stdout */
diff --git a/scripts/dtc/util.h b/scripts/dtc/util.h

index 66fba8e..f6cea82 100644 (file)
--- a/scripts/dtc/util.h
+++ b/scripts/dtc/util.h
@@ -98,16 +98,10 @@ char get_escape_char(const char *s, int *i);
   * stderr.
   *
   * @param filename     The filename to read, or - for stdin
- * @return Pointer to allocated buffer containing fdt, or NULL on error
- */
-char *utilfdt_read(const char *filename);
-
-/**
- * Like utilfdt_read(), but also passes back the size of the file read.
- *
   * @param len          If non-NULL, the amount of data we managed to read
+ * @return Pointer to allocated buffer containing fdt, or NULL on error
   */
-char *utilfdt_read_len(const char *filename, off_t *len);
+char *utilfdt_read(const char *filename, size_t *len);
  
  /**
   * Read a device tree file into a buffer. Does not report errors, but only
@@ -116,16 +110,10 @@ char *utilfdt_read_len(const char *filename, off_t *len);
   *
   * @param filename     The filename to read, or - for stdin
   * @param buffp                Returns pointer to buffer containing fdt
- * @return 0 if ok, else an errno value representing the error
- */
-int utilfdt_read_err(const char *filename, char **buffp);
-
-/**
- * Like utilfdt_read_err(), but also passes back the size of the file read.
- *
   * @param len          If non-NULL, the amount of data we managed to read
+ * @return 0 if ok, else an errno value representing the error
   */
-int utilfdt_read_err_len(const char *filename, char **buffp, off_t *len);
+int utilfdt_read_err(const char *filename, char **buffp, size_t *len);
  
  /**
   * Write a device tree buffer to a file. This will report any errors on
diff --git a/scripts/dtc/version_gen.h b/scripts/dtc/version_gen.h

index b00f14f..6d23fd0 100644 (file)
--- a/scripts/dtc/version_gen.h
+++ b/scripts/dtc/version_gen.h
@@ -1 +1 @@
-#define DTC_VERSION "DTC 1.4.6-g84e414b0"
+#define DTC_VERSION "DTC 1.4.7-gc86da84d"
diff --git a/scripts/dtc/yamltree.c b/scripts/dtc/yamltree.c

new file mode 100644 (file)

index 0000000..a00285a
--- /dev/null
+++ b/scripts/dtc/yamltree.c
@@ -0,0 +1,247 @@
+/*
+ * (C) Copyright Linaro, Ltd. 2018
+ * (C) Copyright Arm Holdings.  2017
+ * (C) Copyright David Gibson <dwg@au1.ibm.com>, IBM Corporation.  2005.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ *                                                                   USA
+ */
+
+#include <stdlib.h>
+#include <yaml.h>
+#include "dtc.h"
+#include "srcpos.h"
+
+char *yaml_error_name[] = {
+       [YAML_NO_ERROR] = "no error",
+       [YAML_MEMORY_ERROR] = "memory error",
+       [YAML_READER_ERROR] = "reader error",
+       [YAML_SCANNER_ERROR] = "scanner error",
+       [YAML_PARSER_ERROR] = "parser error",
+       [YAML_COMPOSER_ERROR] = "composer error",
+       [YAML_WRITER_ERROR] = "writer error",
+       [YAML_EMITTER_ERROR] = "emitter error",
+};
+
+#define yaml_emitter_emit_or_die(emitter, event) (                     \
+{                                                                      \
+       if (!yaml_emitter_emit(emitter, event))                         \
+               die("yaml '%s': %s in %s, line %i\n",                   \
+                   yaml_error_name[(emitter)->error],                  \
+                   (emitter)->problem, __func__, __LINE__);            \
+})
+
+static void yaml_propval_int(yaml_emitter_t *emitter, struct marker *markers, char *data, int len, int width)
+{
+       yaml_event_t event;
+       void *tag;
+       int off, start_offset = markers->offset;
+
+       switch(width) {
+               case 1: tag = "!u8"; break;
+               case 2: tag = "!u16"; break;
+               case 4: tag = "!u32"; break;
+               case 8: tag = "!u64"; break;
+               default:
+                       die("Invalid width %i", width);
+       }
+       assert(len % width == 0);
+
+       yaml_sequence_start_event_initialize(&event, NULL,
+               (yaml_char_t *)tag, width == 4, YAML_FLOW_SEQUENCE_STYLE);
+       yaml_emitter_emit_or_die(emitter, &event);
+
+       for (off = 0; off < len; off += width) {
+               char buf[32];
+               struct marker *m;
+               bool is_phandle = false;
+
+               switch(width) {
+               case 1:
+                       sprintf(buf, "0x%"PRIx8, *(uint8_t*)(data + off));
+                       break;
+               case 2:
+                       sprintf(buf, "0x%"PRIx16, fdt16_to_cpu(*(fdt16_t*)(data + off)));
+                       break;
+               case 4:
+                       sprintf(buf, "0x%"PRIx32, fdt32_to_cpu(*(fdt32_t*)(data + off)));
+                       m = markers;
+                       is_phandle = false;
+                       for_each_marker_of_type(m, REF_PHANDLE) {
+                               if (m->offset == (start_offset + off)) {
+                                       is_phandle = true;
+                                       break;
+                               }
+                       }
+                       break;
+               case 8:
+                       sprintf(buf, "0x%"PRIx64, fdt64_to_cpu(*(fdt64_t*)(data + off)));
+                       break;
+               }
+
+               if (is_phandle)
+                       yaml_scalar_event_initialize(&event, NULL,
+                               (yaml_char_t*)"!phandle", (yaml_char_t *)buf,
+                               strlen(buf), 0, 0, YAML_PLAIN_SCALAR_STYLE);
+               else
+                       yaml_scalar_event_initialize(&event, NULL,
+                               (yaml_char_t*)YAML_INT_TAG, (yaml_char_t *)buf,
+                               strlen(buf), 1, 1, YAML_PLAIN_SCALAR_STYLE);
+               yaml_emitter_emit_or_die(emitter, &event);
+       }
+
+       yaml_sequence_end_event_initialize(&event);
+       yaml_emitter_emit_or_die(emitter, &event);
+}
+
+static void yaml_propval_string(yaml_emitter_t *emitter, char *str, int len)
+{
+       yaml_event_t event;
+       int i;
+
+       assert(str[len-1] == '\0');
+
+       /* Make sure the entire string is in the lower 7-bit ascii range */
+       for (i = 0; i < len; i++)
+               assert(isascii(str[i]));
+
+       yaml_scalar_event_initialize(&event, NULL,
+               (yaml_char_t *)YAML_STR_TAG, (yaml_char_t*)str,
+               len-1, 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE);
+       yaml_emitter_emit_or_die(emitter, &event);
+}
+
+static void yaml_propval(yaml_emitter_t *emitter, struct property *prop)
+{
+       yaml_event_t event;
+       int len = prop->val.len;
+       struct marker *m = prop->val.markers;
+
+       /* Emit the property name */
+       yaml_scalar_event_initialize(&event, NULL,
+               (yaml_char_t *)YAML_STR_TAG, (yaml_char_t*)prop->name,
+               strlen(prop->name), 1, 1, YAML_PLAIN_SCALAR_STYLE);
+       yaml_emitter_emit_or_die(emitter, &event);
+
+       /* Boolean properties are easiest to deal with. Length is zero, so just emit 'true' */
+       if (len == 0) {
+               yaml_scalar_event_initialize(&event, NULL,
+                       (yaml_char_t *)YAML_BOOL_TAG,
+                       (yaml_char_t*)"true",
+                       strlen("true"), 1, 0, YAML_PLAIN_SCALAR_STYLE);
+               yaml_emitter_emit_or_die(emitter, &event);
+               return;
+       }
+
+       if (!m)
+               die("No markers present in property '%s' value\n", prop->name);
+
+       yaml_sequence_start_event_initialize(&event, NULL,
+               (yaml_char_t *)YAML_SEQ_TAG, 1, YAML_FLOW_SEQUENCE_STYLE);
+       yaml_emitter_emit_or_die(emitter, &event);
+
+       for_each_marker(m) {
+               int chunk_len;
+               char *data = &prop->val.val[m->offset];
+
+               if (m->type < TYPE_UINT8)
+                       continue;
+
+               chunk_len = type_marker_length(m) ? : len;
+               assert(chunk_len > 0);
+               len -= chunk_len;
+
+               switch(m->type) {
+               case TYPE_UINT16:
+                       yaml_propval_int(emitter, m, data, chunk_len, 2);
+                       break;
+               case TYPE_UINT32:
+                       yaml_propval_int(emitter, m, data, chunk_len, 4);
+                       break;
+               case TYPE_UINT64:
+                       yaml_propval_int(emitter, m, data, chunk_len, 8);
+                       break;
+               case TYPE_STRING:
+                       yaml_propval_string(emitter, data, chunk_len);
+                       break;
+               default:
+                       yaml_propval_int(emitter, m, data, chunk_len, 1);
+                       break;
+               }
+       }
+
+       yaml_sequence_end_event_initialize(&event);
+       yaml_emitter_emit_or_die(emitter, &event);
+}
+
+
+static void yaml_tree(struct node *tree, yaml_emitter_t *emitter)
+{
+       struct property *prop;
+       struct node *child;
+       yaml_event_t event;
+
+       if (tree->deleted)
+               return;
+
+       yaml_mapping_start_event_initialize(&event, NULL,
+               (yaml_char_t *)YAML_MAP_TAG, 1, YAML_ANY_MAPPING_STYLE);
+       yaml_emitter_emit_or_die(emitter, &event);
+
+       for_each_property(tree, prop)
+               yaml_propval(emitter, prop);
+
+       /* Loop over all the children, emitting them into the map */
+       for_each_child(tree, child) {
+               yaml_scalar_event_initialize(&event, NULL,
+                       (yaml_char_t *)YAML_STR_TAG, (yaml_char_t*)child->name,
+                       strlen(child->name), 1, 0, YAML_PLAIN_SCALAR_STYLE);
+               yaml_emitter_emit_or_die(emitter, &event);
+               yaml_tree(child, emitter);
+       }
+
+       yaml_mapping_end_event_initialize(&event);
+       yaml_emitter_emit_or_die(emitter, &event);
+}
+
+void dt_to_yaml(FILE *f, struct dt_info *dti)
+{
+       yaml_emitter_t emitter;
+       yaml_event_t event;
+
+       yaml_emitter_initialize(&emitter);
+       yaml_emitter_set_output_file(&emitter, f);
+       yaml_stream_start_event_initialize(&event, YAML_UTF8_ENCODING);
+       yaml_emitter_emit_or_die(&emitter, &event);
+
+       yaml_document_start_event_initialize(&event, NULL, NULL, NULL, 0);
+       yaml_emitter_emit_or_die(&emitter, &event);
+
+       yaml_sequence_start_event_initialize(&event, NULL, (yaml_char_t *)YAML_SEQ_TAG, 1, YAML_ANY_SEQUENCE_STYLE);
+       yaml_emitter_emit_or_die(&emitter, &event);
+
+       yaml_tree(dti->dt, &emitter);
+
+       yaml_sequence_end_event_initialize(&event);
+       yaml_emitter_emit_or_die(&emitter, &event);
+
+       yaml_document_end_event_initialize(&event, 0);
+       yaml_emitter_emit_or_die(&emitter, &event);
+
+       yaml_stream_end_event_initialize(&event);
+       yaml_emitter_emit_or_die(&emitter, &event);
+
+       yaml_emitter_delete(&emitter);
+}
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh

index 3991ad1..864f865 100755 (executable)
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -167,8 +167,8 @@ setup()
         # add vrf table
         ip li add ${VRF} type vrf table ${VRF_TABLE}
         ip li set ${VRF} up
-       ip ro add table ${VRF_TABLE} unreachable default
-       ip -6 ro add table ${VRF_TABLE} unreachable default
+       ip ro add table ${VRF_TABLE} unreachable default metric 8192
+       ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
  
         # create test interfaces
         ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
@@ -185,20 +185,20 @@ setup()
         for n in 1 3 5 7; do
                 ip li set ${NETIFS[p${n}]} up
                 ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
-               ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+               ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
         done
  
         # move peer interfaces to namespace and add addresses
         for n in 2 4 6 8; do
                 ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
                 ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
-               ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+               ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
         done
  
-       set +e
+       ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+       ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
  
-       # let DAD complete - assume default of 1 probe
-       sleep 1
+       set +e
  }
  
  cleanup()
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile

index 201b598..b3ad909 100644 (file)
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -28,7 +28,8 @@ SUB_DIRS = alignment          \
            tm                   \
            vphn         \
            math         \
-          ptrace
+          ptrace       \
+          security
  
  endif
  
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h

index 7f348c0..52b4710 100644 (file)
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -17,6 +17,7 @@
                                     : "memory")
  
  #define mb()           asm volatile("sync" : : : "memory");
+#define barrier()      asm volatile("" : : : "memory");
  
  #define SPRN_MMCR2     769
  #define SPRN_MMCRA     770
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h

index c58c370..4962182 100644 (file)
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -11,6 +11,7 @@
  #include <stdint.h>
  #include <stdbool.h>
  #include <linux/auxvec.h>
+#include <linux/perf_event.h>
  #include "reg.h"
  
  /* Avoid headaches with PRI?64 - just use %ll? always */
@@ -31,6 +32,15 @@ void *get_auxv_entry(int type);
  
  int pick_online_cpu(void);
  
+int read_debugfs_file(char *debugfs_file, int *result);
+int write_debugfs_file(char *debugfs_file, int result);
+void set_dscr(unsigned long val);
+int perf_event_open_counter(unsigned int type,
+                           unsigned long config, int group_fd);
+int perf_event_enable(int fd);
+int perf_event_disable(int fd);
+int perf_event_reset(int fd);
+
  static inline bool have_hwcap(unsigned long ftr)
  {
         return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -80,4 +90,12 @@ do {                                                         \
  #define PPC_FEATURE2_ARCH_3_00 0x00800000
  #endif
  
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC)       (UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC)       (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
  #endif /* _SELFTESTS_POWERPC_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore

index 7d7c42e..ba91930 100644 (file)
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,4 +2,5 @@ hugetlb_vs_thp_test
  subpage_prot
  tempfile
  prot_sao
-segv_errors
-\ No newline at end of file
+segv_errors
+wild_bctr
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile

index 33ced6e..43d6842 100644 (file)
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
  noarg:
         $(MAKE) -C ../
  
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
  TEST_GEN_FILES := tempfile
  
  top_srcdir = ../../../../..
@@ -12,6 +12,8 @@ $(TEST_GEN_PROGS): ../harness.c
  
  $(OUTPUT)/prot_sao: ../utils.c
  
+$(OUTPUT)/wild_bctr: CFLAGS += -m64
+
  $(OUTPUT)/tempfile:
         dd if=/dev/zero of=$@ bs=64k count=1
  
diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c

new file mode 100644 (file)

index 0000000..1b0e9e9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corp.
+ *
+ * Test that an out-of-bounds branch to counter behaves as expected.
+ */
+
+#include <setjmp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+#define BAD_NIP        0x788c545a18000000ull
+
+static struct pt_regs signal_regs;
+static jmp_buf setjmp_env;
+
+static void save_regs(ucontext_t *ctxt)
+{
+       struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+       memcpy(&signal_regs, regs, sizeof(signal_regs));
+}
+
+static void segv_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+       save_regs(ctxt_v);
+       longjmp(setjmp_env, 1);
+}
+
+static void usr2_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+       save_regs(ctxt_v);
+}
+
+static int ok(void)
+{
+       printf("Everything is OK in here.\n");
+       return 0;
+}
+
+#define REG_POISON     0x5a5aUL
+#define POISONED_REG(n)        ((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n))
+
+static inline void poison_regs(void)
+{
+       #define POISON_REG(n)   \
+         "lis  " __stringify(n) "," __stringify(REG_POISON) ";" \
+         "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";" \
+         "sldi " __stringify(n) "," __stringify(n) ", 32 ;" \
+         "oris " __stringify(n) "," __stringify(n) "," __stringify(REG_POISON) ";" \
+         "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";"
+
+       asm (POISON_REG(15)
+            POISON_REG(16)
+            POISON_REG(17)
+            POISON_REG(18)
+            POISON_REG(19)
+            POISON_REG(20)
+            POISON_REG(21)
+            POISON_REG(22)
+            POISON_REG(23)
+            POISON_REG(24)
+            POISON_REG(25)
+            POISON_REG(26)
+            POISON_REG(27)
+            POISON_REG(28)
+            POISON_REG(29)
+            : // inputs
+            : // outputs
+            : "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
+              "26", "27", "28", "29"
+       );
+       #undef POISON_REG
+}
+
+static int check_regs(void)
+{
+       unsigned long i;
+
+       for (i = 15; i <= 29; i++)
+               FAIL_IF(signal_regs.gpr[i] != POISONED_REG(i));
+
+       printf("Regs OK\n");
+       return 0;
+}
+
+static void dump_regs(void)
+{
+       for (int i = 0; i < 32; i += 4) {
+               printf("r%02d 0x%016lx  r%02d 0x%016lx  " \
+                      "r%02d 0x%016lx  r%02d 0x%016lx\n",
+                      i, signal_regs.gpr[i],
+                      i+1, signal_regs.gpr[i+1],
+                      i+2, signal_regs.gpr[i+2],
+                      i+3, signal_regs.gpr[i+3]);
+       }
+}
+
+int test_wild_bctr(void)
+{
+       int (*func_ptr)(void);
+       struct sigaction segv = {
+               .sa_sigaction = segv_handler,
+               .sa_flags = SA_SIGINFO
+       };
+       struct sigaction usr2 = {
+               .sa_sigaction = usr2_handler,
+               .sa_flags = SA_SIGINFO
+       };
+
+       FAIL_IF(sigaction(SIGSEGV, &segv, NULL));
+       FAIL_IF(sigaction(SIGUSR2, &usr2, NULL));
+
+       bzero(&signal_regs, sizeof(signal_regs));
+
+       if (setjmp(setjmp_env) == 0) {
+               func_ptr = ok;
+               func_ptr();
+
+               kill(getpid(), SIGUSR2);
+               printf("Regs before:\n");
+               dump_regs();
+               bzero(&signal_regs, sizeof(signal_regs));
+
+               poison_regs();
+
+               func_ptr = (int (*)(void))BAD_NIP;
+               func_ptr();
+
+               FAIL_IF(1); /* we didn't segv? */
+       }
+
+       FAIL_IF(signal_regs.nip != BAD_NIP);
+
+       printf("All good - took SEGV as expected branching to 0x%llx\n", BAD_NIP);
+
+       dump_regs();
+       FAIL_IF(check_regs());
+
+       return 0;
+}
+
+int main(void)
+{
+       return test_harness(test_wild_bctr, "wild_bctr");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c

index ed3239b..ee1e9ca 100644 (file)
--- a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
+++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
@@ -65,14 +65,6 @@ static int unprotect_region(void)
  extern char __start___ex_table[];
  extern char __stop___ex_table[];
  
-#if defined(__powerpc64__)
-#define UCONTEXT_NIA(UC)       (UC)->uc_mcontext.gp_regs[PT_NIP]
-#elif defined(__powerpc__)
-#define UCONTEXT_NIA(UC)       (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
-#else
-#error implement UCONTEXT_NIA
-#endif
-
  struct extbl_entry {
         int insn;
         int fixup;
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile

index 923d531..9b35ca8 100644 (file)
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -2,7 +2,7 @@
  TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
                ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
                ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
-              perf-hwbreak
+              perf-hwbreak ptrace-syscall
  
  top_srcdir = ../../../../..
  include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c

new file mode 100644 (file)

index 0000000..3353210
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A ptrace test for testing PTRACE_SYSEMU, PTRACE_SETREGS and
+ * PTRACE_GETREG.  This test basically create a child process that executes
+ * syscalls and the parent process check if it is being traced appropriated.
+ *
+ * This test is heavily based on tools/testing/selftests/x86/ptrace_syscall.c
+ * test, and it was adapted to run on Powerpc by
+ * Breno Leitao <leitao@debian.org>
+ */
+#define _GNU_SOURCE
+
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include "utils.h"
+
+/* Bitness-agnostic defines for user_regs_struct fields. */
+#define user_syscall_nr        gpr[0]
+#define user_arg0              gpr[3]
+#define user_arg1              gpr[4]
+#define user_arg2              gpr[5]
+#define user_arg3              gpr[6]
+#define user_arg4              gpr[7]
+#define user_arg5              gpr[8]
+#define user_ip                nip
+
+#define PTRACE_SYSEMU          0x1d
+
+static int nerrs;
+
+static void wait_trap(pid_t chld)
+{
+       siginfo_t si;
+
+       if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
+               err(1, "waitid");
+       if (si.si_pid != chld)
+               errx(1, "got unexpected pid in event\n");
+       if (si.si_code != CLD_TRAPPED)
+               errx(1, "got unexpected event type %d\n", si.si_code);
+}
+
+static void test_ptrace_syscall_restart(void)
+{
+       int status;
+       struct pt_regs regs;
+       pid_t chld;
+
+       printf("[RUN]\tptrace-induced syscall restart\n");
+
+       chld = fork();
+       if (chld < 0)
+               err(1, "fork");
+
+       /*
+        * Child process is running 4 syscalls after ptrace.
+        *
+        * 1) getpid()
+        * 2) gettid()
+        * 3) tgkill() -> Send SIGSTOP
+        * 4) gettid() -> Where the tests will happen essentially
+        */
+       if (chld == 0) {
+               if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+                       err(1, "PTRACE_TRACEME");
+
+               pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+               printf("\tChild will make one syscall\n");
+               syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+               syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
+               _exit(0);
+       }
+       /* Parent process below */
+
+       /* Wait for SIGSTOP sent by tgkill above. */
+       if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+               err(1, "waitpid");
+
+       printf("[RUN]\tSYSEMU\n");
+       if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+               err(1, "PTRACE_SYSEMU");
+       wait_trap(chld);
+
+       if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+               err(1, "PTRACE_GETREGS");
+
+       /*
+        * Ptrace trapped prior to executing the syscall, thus r3 still has
+        * the syscall number instead of the sys_gettid() result
+        */
+       if (regs.user_syscall_nr != SYS_gettid ||
+           regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+           regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+           regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+               printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+                       (unsigned long)regs.user_syscall_nr,
+                       (unsigned long)regs.user_arg0,
+                       (unsigned long)regs.user_arg1,
+                       (unsigned long)regs.user_arg2,
+                       (unsigned long)regs.user_arg3,
+                       (unsigned long)regs.user_arg4,
+                       (unsigned long)regs.user_arg5);
+                nerrs++;
+       } else {
+               printf("[OK]\tInitial nr and args are correct\n"); }
+
+       printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
+              (unsigned long)regs.user_ip);
+
+       /*
+        * Rewind to retry the same syscall again. This will basically test
+        * the rewind process together with PTRACE_SETREGS and PTRACE_GETREGS.
+        */
+       regs.user_ip -= 4;
+       if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+               err(1, "PTRACE_SETREGS");
+
+       if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+               err(1, "PTRACE_SYSEMU");
+       wait_trap(chld);
+
+       if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+               err(1, "PTRACE_GETREGS");
+
+       if (regs.user_syscall_nr != SYS_gettid ||
+           regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+           regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+           regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+               printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+                       (unsigned long)regs.user_syscall_nr,
+                       (unsigned long)regs.user_arg0,
+                       (unsigned long)regs.user_arg1,
+                       (unsigned long)regs.user_arg2,
+                       (unsigned long)regs.user_arg3,
+                       (unsigned long)regs.user_arg4,
+                       (unsigned long)regs.user_arg5);
+               nerrs++;
+       } else {
+               printf("[OK]\tRestarted nr and args are correct\n");
+       }
+
+       printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
+              (unsigned long)regs.user_ip);
+
+       /*
+        * Inject a new syscall (getpid) in the same place the previous
+        * syscall (gettid), rewind and re-execute.
+        */
+       regs.user_syscall_nr = SYS_getpid;
+       regs.user_arg0 = 20;
+       regs.user_arg1 = 21;
+       regs.user_arg2 = 22;
+       regs.user_arg3 = 23;
+       regs.user_arg4 = 24;
+       regs.user_arg5 = 25;
+       regs.user_ip -= 4;
+
+       if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+               err(1, "PTRACE_SETREGS");
+
+       if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+               err(1, "PTRACE_SYSEMU");
+       wait_trap(chld);
+
+       if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+               err(1, "PTRACE_GETREGS");
+
+       /* Check that ptrace stopped at the new syscall that was
+        * injected, and guarantee that it haven't executed, i.e, user_args
+        * contain the arguments and not the syscall return value, for
+        * instance.
+        */
+       if (regs.user_syscall_nr != SYS_getpid
+               || regs.user_arg0 != 20 || regs.user_arg1 != 21
+               || regs.user_arg2 != 22 || regs.user_arg3 != 23
+               || regs.user_arg4 != 24 || regs.user_arg5 != 25) {
+
+               printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+                       (unsigned long)regs.user_syscall_nr,
+                       (unsigned long)regs.user_arg0,
+                       (unsigned long)regs.user_arg1,
+                       (unsigned long)regs.user_arg2,
+                       (unsigned long)regs.user_arg3,
+                       (unsigned long)regs.user_arg4,
+                       (unsigned long)regs.user_arg5);
+               nerrs++;
+       } else {
+               printf("[OK]\tReplacement nr and args are correct\n");
+       }
+
+       if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+               err(1, "PTRACE_CONT");
+
+       if (waitpid(chld, &status, 0) != chld)
+               err(1, "waitpid");
+
+       /* Guarantee that the process executed properly, returning 0 */
+       if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+               printf("[FAIL]\tChild failed\n");
+               nerrs++;
+       } else {
+               printf("[OK]\tChild exited cleanly\n");
+       }
+}
+
+int ptrace_syscall(void)
+{
+       test_ptrace_syscall_restart();
+
+       return nerrs;
+}
+
+int main(void)
+{
+       return test_harness(ptrace_syscall, "ptrace_syscall");
+}
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile

new file mode 100644 (file)

index 0000000..44690f1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+TEST_GEN_PROGS := rfi_flush
+
+CFLAGS += -I../../../../../usr/include
+
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c

new file mode 100644 (file)

index 0000000..564ed45
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define CACHELINE_SIZE 128
+
+struct perf_event_read {
+       __u64 nr;
+       __u64 l1d_misses;
+};
+
+static inline __u64 load(void *addr)
+{
+       __u64 tmp;
+
+       asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+       return tmp;
+}
+
+static void syscall_loop(char *p, unsigned long iterations,
+                        unsigned long zero_size)
+{
+       for (unsigned long i = 0; i < iterations; i++) {
+               for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+                       load(p + j);
+               getppid();
+       }
+}
+
+int rfi_flush_test(void)
+{
+       char *p;
+       int repetitions = 10;
+       int fd, passes = 0, iter, rc = 0;
+       struct perf_event_read v;
+       __u64 l1d_misses_total = 0;
+       unsigned long iterations = 100000, zero_size = 24 * 1024;
+       int rfi_flush_org, rfi_flush;
+
+       SKIP_IF(geteuid() != 0);
+
+       if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
+               perror("Unable to read powerpc/rfi_flush debugfs file");
+               SKIP_IF(1);
+       }
+
+       rfi_flush = rfi_flush_org;
+
+       fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+       FAIL_IF(fd < 0);
+
+       p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+       FAIL_IF(perf_event_enable(fd));
+
+       set_dscr(1);
+
+       iter = repetitions;
+
+again:
+       FAIL_IF(perf_event_reset(fd));
+
+       syscall_loop(p, iterations, zero_size);
+
+       FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+       /* Expect at least zero_size/CACHELINE_SIZE misses per iteration */
+       if (v.l1d_misses >= (iterations * zero_size / CACHELINE_SIZE) && rfi_flush)
+               passes++;
+       else if (v.l1d_misses < iterations && !rfi_flush)
+               passes++;
+
+       l1d_misses_total += v.l1d_misses;
+
+       while (--iter)
+               goto again;
+
+       if (passes < repetitions) {
+               printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+                      rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>',
+                      rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+                      repetitions - passes, repetitions);
+               rc = 1;
+       } else
+               printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+                      rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<',
+                      rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+                      passes, repetitions);
+
+       if (rfi_flush == rfi_flush_org) {
+               rfi_flush = !rfi_flush_org;
+               if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
+                       perror("error writing to powerpc/rfi_flush debugfs file");
+                       return 1;
+               }
+               iter = repetitions;
+               l1d_misses_total = 0;
+               passes = 0;
+               goto again;
+       }
+
+       perf_event_disable(fd);
+       close(fd);
+
+       set_dscr(0);
+
+       if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
+               perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+               return 1;
+       }
+
+       return rc;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(rfi_flush_test, "rfi_flush_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c

index 2bda81c..df1d7d4 100644 (file)
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -98,7 +98,7 @@ void texasr(void *in)
  
  int test_tmspr()
  {
-       pthread_t       thread;
+       pthread_t       *thread;
         int             thread_num;
         unsigned long   i;
  
@@ -107,21 +107,28 @@ int test_tmspr()
         /* To cause some context switching */
         thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
  
+       thread = malloc(thread_num * sizeof(pthread_t));
+       if (thread == NULL)
+               return EXIT_FAILURE;
+
         /* Test TFIAR and TFHAR */
-       for (i = 0 ; i < thread_num ; i += 2){
-               if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i))
+       for (i = 0; i < thread_num; i += 2) {
+               if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar,
+                                  (void *)i))
                         return EXIT_FAILURE;
         }
-       if (pthread_join(thread, NULL) != 0)
-               return EXIT_FAILURE;
-
         /* Test TEXASR */
-       for (i = 0 ; i < thread_num ; i++){
-               if (pthread_create(&thread, NULL, (void*)texasr, (void *)i))
+       for (i = 1; i < thread_num; i += 2) {
+               if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i))
                         return EXIT_FAILURE;
         }
-       if (pthread_join(thread, NULL) != 0)
-               return EXIT_FAILURE;
+
+       for (i = 0; i < thread_num; i++) {
+               if (pthread_join(thread[i], NULL) != 0)
+                       return EXIT_FAILURE;
+       }
+
+       free(thread);
  
         if (passed)
                 return 0;
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c

index 156c8e7..09894f4 100644 (file)
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -236,7 +236,8 @@ void *tm_una_ping(void *input)
         }
  
         /* Check if we were not expecting a failure and a it occurred. */
-       if (!expecting_failure() && is_failure(cr_)) {
+       if (!expecting_failure() && is_failure(cr_) &&
+           !failure_is_reschedule()) {
                 printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
                         failure_code());
                 return (void *) -1;
@@ -244,9 +245,11 @@ void *tm_una_ping(void *input)
  
         /*
          * Check if TM failed due to the cause we were expecting. 0xda is a
-        * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause.
+        * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause, unless
+        * it was caused by a reschedule.
          */
-       if (is_failure(cr_) && !failure_is_unavailable()) {
+       if (is_failure(cr_) && !failure_is_unavailable() &&
+           !failure_is_reschedule()) {
                 printf("\n\tUnexpected failure cause 0x%02lx\n\t",
                         failure_code());
                 return (void *) -1;
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h

index df42042..5518b1d 100644 (file)
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -52,6 +52,15 @@ static inline bool failure_is_unavailable(void)
         return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
  }
  
+static inline bool failure_is_reschedule(void)
+{
+       if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED ||
+           (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED)
+               return true;
+
+       return false;
+}
+
  static inline bool failure_is_nesting(void)
  {
         return (__builtin_get_texasru() & 0x400000);
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c

index aa8fc1e..43c3428 100644 (file)
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -10,16 +10,22 @@
  #include <fcntl.h>
  #include <link.h>
  #include <sched.h>
+#include <signal.h>
  #include <stdio.h>
+#include <stdlib.h>
  #include <string.h>
+#include <sys/ioctl.h>
  #include <sys/stat.h>
  #include <sys/types.h>
  #include <sys/utsname.h>
  #include <unistd.h>
+#include <asm/unistd.h>
+#include <linux/limits.h>
  
  #include "utils.h"
  
  static char auxv[4096];
+extern unsigned int dscr_insn[];
  
  int read_auxv(char *buf, ssize_t buf_size)
  {
@@ -121,3 +127,149 @@ bool is_ppc64le(void)
  
         return strcmp(uts.machine, "ppc64le") == 0;
  }
+
+int read_debugfs_file(char *debugfs_file, int *result)
+{
+       int rc = -1, fd;
+       char path[PATH_MAX];
+       char value[16];
+
+       strcpy(path, "/sys/kernel/debug/");
+       strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+       if ((fd = open(path, O_RDONLY)) < 0)
+               return rc;
+
+       if ((rc = read(fd, value, sizeof(value))) < 0)
+               return rc;
+
+       value[15] = 0;
+       *result = atoi(value);
+       close(fd);
+
+       return 0;
+}
+
+int write_debugfs_file(char *debugfs_file, int result)
+{
+       int rc = -1, fd;
+       char path[PATH_MAX];
+       char value[16];
+
+       strcpy(path, "/sys/kernel/debug/");
+       strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+       if ((fd = open(path, O_WRONLY)) < 0)
+               return rc;
+
+       snprintf(value, 16, "%d", result);
+
+       if ((rc = write(fd, value, strlen(value))) < 0)
+               return rc;
+
+       close(fd);
+
+       return 0;
+}
+
+static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
+               int cpu, int group_fd, unsigned long flags)
+{
+       return syscall(__NR_perf_event_open, hw_event, pid, cpu,
+                     group_fd, flags);
+}
+
+static void perf_event_attr_init(struct perf_event_attr *event_attr,
+                                       unsigned int type,
+                                       unsigned long config)
+{
+       memset(event_attr, 0, sizeof(*event_attr));
+
+       event_attr->type = type;
+       event_attr->size = sizeof(struct perf_event_attr);
+       event_attr->config = config;
+       event_attr->read_format = PERF_FORMAT_GROUP;
+       event_attr->disabled = 1;
+       event_attr->exclude_kernel = 1;
+       event_attr->exclude_hv = 1;
+       event_attr->exclude_guest = 1;
+}
+
+int perf_event_open_counter(unsigned int type,
+                           unsigned long config, int group_fd)
+{
+       int fd;
+       struct perf_event_attr event_attr;
+
+       perf_event_attr_init(&event_attr, type, config);
+
+       fd = perf_event_open(&event_attr, 0, -1, group_fd, 0);
+
+       if (fd < 0)
+               perror("perf_event_open() failed");
+
+       return fd;
+}
+
+int perf_event_enable(int fd)
+{
+       if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
+               perror("error while enabling perf events");
+               return -1;
+       }
+
+       return 0;
+}
+
+int perf_event_disable(int fd)
+{
+       if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
+               perror("error disabling perf events");
+               return -1;
+       }
+
+       return 0;
+}
+
+int perf_event_reset(int fd)
+{
+       if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
+               perror("error resetting perf events");
+               return -1;
+       }
+
+       return 0;
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+       static int warned = 0;
+       ucontext_t *ctx = (ucontext_t *)unused;
+       unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+       if (*pc == (unsigned long)&dscr_insn) {
+               if (!warned++)
+                       printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+               *pc += 4;
+       } else {
+               printf("SIGILL at %p\n", pc);
+               abort();
+       }
+}
+
+void set_dscr(unsigned long val)
+{
+       static int init = 0;
+       struct sigaction sa;
+
+       if (!init) {
+               memset(&sa, 0, sizeof(sa));
+               sa.sa_sigaction = sigill_handler;
+               sa.sa_flags = SA_SIGINFO;
+               if (sigaction(SIGILL, &sa, NULL))
+                       perror("sigill_handler");
+               init = 1;
+       }
+
+       asm volatile("dscr_insn: mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 27 Oct 2018 02:33:41 +0000 (19:33 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 27 Oct 2018 02:33:41 +0000 (19:33 -0700)